# [o3]- Proyecto Ozono - Prediccion a imagen_v0

# [0] - Inicialización

In [335]:
import findspark
findspark.init('/home/rulicering/BigData/spark-2.4.5-bin-hadoop2.7')
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window
import pandas as pd
from pyspark.sql.types import StructField,StringType,IntegerType,StructType,FloatType
import re as reg
import numpy as np
import datetime

#MlLib
from pyspark.ml.regression import LinearRegression

#Aux
from pyspark.ml.linalg import Vectors
from pyspark.ml.feature import VectorAssembler

In [336]:
spark = SparkSession.builder.appName('prediccion_a_imagen').getOrCreate()

# [1] Datos

## [1.0] - Carga de ficheros (Datos, Predicción clima,  Calendario)

In [337]:
df_estaciones = spark.read.csv('/home/rulicering/Datos_Proyecto_Ozono/Procesado/Estaciones/Estaciones-hoy.csv',inferSchema= True,header=True)
df_prediccion = spark.read.csv('/home/rulicering/Datos_Proyecto_Ozono/Procesado/Predicciones/Prediccion-hoy.csv',inferSchema= True,header=True)

In [338]:
df_estaciones = df_estaciones.drop("_c0")
df_prediccion = df_prediccion.drop("_c0")

## [1.1] - Añadir nombre de las estaciones a los datos de prediccion

In [339]:
cols = df_estaciones.columns[0:2]
df_nombre_estaciones = df_estaciones.filter(df_estaciones["MIDE_AIRE"] > 0).select(cols)

In [340]:
df_prediccion = df_nombre_estaciones.join(df_prediccion,on = "CODIGO_CORTO")

## [1.2] - Rename magnitudes numéricas a literal +  Redondear a 2 decimales

In [341]:
pd_prediccion = df_prediccion.toPandas()

In [342]:
dic_magnitudes = { "1": {"formula":"SO2", "unidad": "μg/m³"},
                  "6": {"formula":"CO", "unidad": "mg/m³"},
                  "7": {"formula":"NO", "unidad": "μg/m³"},
                  "8": {"formula":"NO2", "unidad": "μg/m³"},
                  "9": {"formula":"PM2.5", "unidad": "μg/m³"},
                  "10": {"formula":"PM10", "unidad": "μg/m³"},
                  "12": {"formula":"NOx", "unidad": "μg/m³"},
                  "14": {"formula":"O3", "unidad": "μg/m³"},
                  "20": {"formula":"TOL", "unidad": "μg/m³"},
                  "30": {"formula":"BEN", "unidad": "μg/m³"},
                  "35": {"formula":"EBE", "unidad": "μg/m³"},
                  "37": {"formula":"MXY", "unidad": "μg/m³"},
                  "38": {"formula":"PXY", "unidad": "μg/m³"},
                  "39": {"formula":"OXY", "unidad": "μg/m³"},
                  "42": {"formula":"TCH", "unidad": "mg/m³"},
                  "43": {"formula":"CH4", "unidad": "mg/m³"},
                  "44": {"formula":"NMHC", "unidad": "mg/m³"}  
}

In [343]:
cols_magnitudes = pd_prediccion.columns.tolist()[2:]

In [344]:
for col in cols_magnitudes:
    magnitud = col[2:]
    pd_prediccion = pd_prediccion.round({col:2})
    pd_prediccion = pd_prediccion.rename(columns={col:dic_magnitudes[magnitud]["formula"]})

In [345]:
pd_prediccion = pd_prediccion.rename(columns={"CODIGO_CORTO":"COD"})

# [2] - Formato

paletas_marron_a_verde = [(0.7498021866978873, 0.5194306094533307, 0.25981270201814527),
 (0.7799111510563024, 0.5772626362834098, 0.348887313340094),
 (0.8100201154147174, 0.6350946631134888, 0.4379619246620427),
 (0.8401290797731323, 0.6929266899435679, 0.5270365359839915),
 (0.8711201426967352, 0.7524530144346844, 0.6187207550595129),
 (0.9012291070551502, 0.8102850412647635, 0.7077953663814616),
 (0.9313380714135653, 0.8681170680948426, 0.7968699777034104),
 (0.9614470357719803, 0.9259490949249216, 0.885944589025359),
 (0.892399800802445, 0.9474323476878915, 0.9488407549129974),
 (0.805224341979654, 0.9004601726033415, 0.9028974727017461),
 (0.7180488831568632, 0.8534879975187916, 0.8569541904904949),
 (0.6308734243340722, 0.8065158224342415, 0.8110109082792437),
 (0.5411439969910824, 0.7581675094077615, 0.7637216314719599),
 (0.4539685381682915, 0.7111953343232115, 0.7177783492607087),
 (0.36679307934550054, 0.6642231592386615, 0.6718350670494575),
 (0.27961762052270966, 0.6172509841541115, 0.6258917848382063)]

In [393]:
pd = pd_prediccion

In [347]:
import seaborn as sns

In [416]:
from matplotlib.colors import ListedColormap

In [459]:
#sns.palplot(sns.color_palette("BrBG",9))
paleta = sns.color_palette("BrBG",9)

In [460]:
paleta_hex = paleta.as_hex()

In [461]:
paleta_hex.reverse()

In [467]:
paleta_marron_a_verde.reverse()

In [468]:
#key - CMAPS
#my_cmap = ListedColormap(sns.color_palette("BrBG",9).as_hex())
#my_cmap = ListedColormap(paleta_hex)
#cm = sns.light_palette("seagreen", as_cmap = True)
my_cmap = ListedColormap(paleta_marron_a_verde)

In [469]:
styled_table = pd_prediccion.style.background_gradient(cmap = my_cmap).\
                highlight_null(null_color='white').\
                format("{:.4}", subset= pd.columns.tolist()[2:], na_rep = "")

In [470]:
#styled_table.set_properties(**{"font-family":"Sawasdee","font-weight":"bold","font-size":"15","color":"black","padding":"5"})
styled_table2 = styled_table.set_properties(**{"font-family":"Avantgarde, TeX Gyre Adventor, URW Gothic L, sans-serif","font-size":"15","color":"black","padding":"5"})

In [471]:
styled_table3 = styled_table2.hide_index()

In [472]:
styled_table3

COD,ESTACION,SO2,PM10,NOx,O3,TOL,BEN,EBE,TCH,CH4,NMHC,CO,NO,NO2,PM2.5
27,Barajas Pueblo,,,8.9,67.37,,,,,,,,0.97,12.06,
47,Mendez Alvaro,,,7.36,,,,,,,,,0.37,10.32,
16,Arturo Soria,,,7.61,74.27,,,,,,,0.12,0.61,3.91,
40,Vallecas,4.75,7.99,7.36,,,,,,,,,0.37,10.32,
57,Sanchinarro,4.75,5.03,6.07,,,,,,,,0.19,0.37,2.91,
54,Ensanche de Vallecas,,,19.42,75.13,,,,,,,,0.37,11.23,
48,Castellana,,5.19,7.36,,,,,,,,,0.37,3.47,3.93
17,Villaverde,7.68,,17.89,66.11,,,,,,,,6.83,10.79,
35,Pza. del Carmen,4.75,,8.57,73.53,,,,,,,0.13,0.37,4.11,
55,Urb. Embajada,,8.78,9.89,,0.57,0.12,0.09,1.1,0.73,0.35,,5.35,5.62,


In [473]:
html = styled_table3.render()

In [474]:
import imgkit
config = imgkit.config(wkhtmltoimage='/usr/bin/wkhtmltopdf')
imgkit.from_string(html, 'styled_table.png')

Loading page (1/2)


True

# [5] - EXPORTAR

In [50]:
#Versiones
hoy = datetime.date.today().strftime("%Y-%m-%d")
pd_prediccion.to_csv("/home/rulicering/Datos_Proyecto_Ozono/Procesado/Predicciones/BackUp/Prediccion-" + hoy + ".csv")

In [49]:
pd_prediccion.to_csv("/home/rulicering/Datos_Proyecto_Ozono/Procesado/Predicciones/Prediccion-hoy.csv")

# [EXTRA] - CHECKEO