# [o3]- Proyecto Ozono - Prediccion a imagen_v0

# [0] - Inicialización

In [1]:
import findspark
findspark.init('/home/rulicering/BigData/spark-2.4.5-bin-hadoop2.7')
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window
import pandas as pandas
from pyspark.sql.types import StructField,StringType,IntegerType,StructType,FloatType
import re as reg
import numpy as np
import datetime

In [2]:
import seaborn as sns
import matplotlib
from matplotlib.colors import ListedColormap
import imgkit

In [3]:
spark = SparkSession.builder.appName('prediccion_a_imagen').getOrCreate()

# [1] Datos

## [1.0] - Carga de ficheros (Datos, Predicción clima,  Calendario)

In [4]:
hoy = datetime.date.today().strftime("%Y-%m-%d")
ayer = (datetime.date.today() + datetime.timedelta(days = -1)).strftime("%Y-%m-%d")

In [5]:
df_estaciones = spark.read.csv("/home/rulicering/Datos_Proyecto_Ozono/Procesado/Estaciones/Estaciones-" + ayer +".csv",inferSchema= True,header=True)
df_prediccion = spark.read.csv("/home/rulicering/Datos_Proyecto_Ozono/Procesado/Predicciones/Prediccion-" + hoy + ".csv",inferSchema= True,header=True)

In [6]:
df_estaciones = df_estaciones.drop("_c0")
df_prediccion = df_prediccion.drop("_c0")

## [1.1] - Añadir nombre de las estaciones a los datos de prediccion

In [7]:
cols = df_estaciones.columns[0:2]
df_nombre_estaciones = df_estaciones.filter(df_estaciones["MIDE_AIRE"] > 0).select(cols)

In [8]:
cols

['CODIGO_CORTO', 'ESTACION']

In [9]:
df_prediccion = df_nombre_estaciones.join(df_prediccion,on = "CODIGO_CORTO")

## [1.2] - Rename magnitudes numéricas a literal +  Redondear a 2 decimales

In [10]:
pd_prediccion = df_prediccion.toPandas() 

In [11]:
dic_magnitudes = { "1": {"formula":"SO<sub>2</sub>", "unidad": "μg/m³","limite": 125},
                  "6": {"formula":"CO", "unidad": "mg/m³","limite": 10},
                  "7": {"formula":"NO", "unidad": "μg/m³","limite": 200},
                  "8": {"formula":"NO<sub>2</sub>", "unidad": "μg/m³","limite": 200},
                  "9": {"formula":"PM2.5", "unidad": "μg/m³","limite": 50},
                  "10": {"formula":"PM10", "unidad": "μg/m³","limite": 50},
                  "12": {"formula":"NOx", "unidad": "μg/m³","limite": 200},
                  "14": {"formula":"O<sub>3</sub>", "unidad": "μg/m³","limite": 180},
                  "20": {"formula":"TOL", "unidad": "μg/m³","limite": None},
                  "30": {"formula":"BEN", "unidad": "μg/m³","limite": 5},
                  "35": {"formula":"EBE", "unidad": "μg/m³","limite": None},
                  "37": {"formula":"MXY", "unidad": "μg/m³","limite": None},
                  "38": {"formula":"PXY", "unidad": "μg/m³","limite": None},
                  "39": {"formula":"OXY", "unidad": "μg/m³","limite": None},
                  "42": {"formula":"TCH", "unidad": "mg/m³","limite": None},
                  "43": {"formula":"CH4", "unidad": "mg/m³","limite": None},
                  "44": {"formula":"NMHC", "unidad": "mg/m³","limite": None}  
}

In [12]:
cols_magnitudes = pd_prediccion.columns.tolist()[2:]

In [13]:
for col in cols_magnitudes:
    magnitud = col[2:]
    pd_prediccion = pd_prediccion.round({col:2})
    pd_prediccion = pd_prediccion.rename(columns={col:dic_magnitudes[magnitud]["formula"]})

In [14]:
pd_prediccion = pd_prediccion.rename(columns={"CODIGO_CORTO":"COD"})

In [15]:
pd_prediccion["COD"] = pd_prediccion["COD"].astype(int)

In [16]:
pd_prediccion = pd_prediccion.sort_values(by=["COD"])

# [2] - Formato

## Crear tu propia paleta

In [17]:
#paleta_aux = sns.choose_colorbrewer_palette('divergin')

In [18]:
#paleta = sns.choose_diverging_palette()

In [19]:
#sns.choose_colorbrewer_palette("diverging")

In [20]:
#sns.palplot(sns.color_palette("BrBG", 10))

## Paletas

In [21]:
#Ver las paletas
#sns.palplot(paleta_1_x10)

In [22]:
paleta_1_x10 = [(0.22938245351686504, 0.6232646673152314, 0.6159902502071596),
 (0.38316401151758134, 0.7003920039819671, 0.6945332760305609),
 (0.5369455695182975, 0.7775193406487028, 0.7730763018539624),
 (0.6989546983188569, 0.8587731189465341, 0.8558215110483562),
 (0.8527362563195732, 0.9359004556132697, 0.9343645368717577),
 (0.9558678192494904, 0.9056297164235482, 0.8462488437448462),
 (0.909305106623319, 0.8087307499736233, 0.6898529912403856),
 (0.8602512109206776, 0.7066475265993755, 0.5250896988035225),
 (0.8136884982945063, 0.6097485601494504, 0.3686938462990619),
 (0.7671257856683349, 0.5128495936995257, 0.21229799379460118)]

In [23]:
paleta_2_x16 = [(0.7498021866978873, 0.5194306094533307, 0.25981270201814527),
 (0.7799111510563024, 0.5772626362834098, 0.348887313340094),
 (0.8100201154147174, 0.6350946631134888, 0.4379619246620427),
 (0.8401290797731323, 0.6929266899435679, 0.5270365359839915),
 (0.8711201426967352, 0.7524530144346844, 0.6187207550595129),
 (0.9012291070551502, 0.8102850412647635, 0.7077953663814616),
 (0.9313380714135653, 0.8681170680948426, 0.7968699777034104),
 (0.9614470357719803, 0.9259490949249216, 0.885944589025359),
 (0.892399800802445, 0.9474323476878915, 0.9488407549129974),
 (0.805224341979654, 0.9004601726033415, 0.9028974727017461),
 (0.7180488831568632, 0.8534879975187916, 0.8569541904904949),
 (0.6308734243340722, 0.8065158224342415, 0.8110109082792437),
 (0.5411439969910824, 0.7581675094077615, 0.7637216314719599),
 (0.4539685381682915, 0.7111953343232115, 0.7177783492607087),
 (0.36679307934550054, 0.6642231592386615, 0.6718350670494575),
 (0.27961762052270966, 0.6172509841541115, 0.6258917848382063)]

In [24]:
paleta_3_x10 = sns.color_palette("BrBG", 10)

In [25]:
paleta_4_x10 = [(0.9975265542807706, 0.9550787554401984, 0.9024734457192293),
 (0.9546431966971258, 0.8878478016723288, 0.8050686578959644),
 (0.9117598391134811, 0.8206168479044592, 0.7076638700726995),
 (0.8673449330447063, 0.75098478864488, 0.6067803398271752),
 (0.8244615754610615, 0.6837538348770105, 0.5093755520039103),
 (0.7800466693922865, 0.6141217756174313, 0.40849202175838584),
 (0.7371633118086418, 0.5468908218495617, 0.31108723393512105),
 (0.6927484057398668, 0.4772587625899825, 0.21020370368959662),
 (0.649865048156222, 0.410027808822113, 0.11279891586633173),
 (0.6069816905725773, 0.3427968550542434, 0.0153941280430668)]

In [26]:
paleta_5_x10 = [(0.0919646289888505, 0.5776239907727797, 0.30411380238369873),
 (0.3415609381007306, 0.712725874663591, 0.37362552864282983),
 (0.5771626297577854, 0.8186851211072667, 0.40761245674740476),
 (0.7803921568627452, 0.906805074971165, 0.49942329873125735),
 (0.9327950788158401, 0.9717031910803539, 0.6570549788542867),
 (0.9982314494425222, 0.9451749327181854, 0.6570549788542868),
 (0.9946943483275664, 0.8092272202998847, 0.48696655132641264),
 (0.981776239907728, 0.6073817762399077, 0.3457900807381774),
 (0.9345636293733178, 0.38054594386774326, 0.24121491733948497),
 (0.8239138792772011, 0.16978085351787778, 0.15255670895809303)]

## Formato paletas
    Necesitamos los hexadecimales, son los colores en si.

In [27]:
#paleta_hex = paleta_3_x10.as_hex()
#paleta_hex.reverse()

In [28]:
#paleta = paleta_1_x10
paleta = paleta_5_x10

In [29]:
#key - CMAPS
#my_cmap = ListedColormap(sns.color_palette("BrBG",9).as_hex())
#my_cmap = ListedColormap(paleta_hex)
#cm = sns.light_palette("seagreen", as_cmap = True)
#my_cmap = ListedColormap(paleta)

## Damos estilo a la tabla

In [30]:
pd = pd_prediccion

In [31]:
#Nulos y formato numérico
#styled_table = pd_prediccion.style.background_gradient(cmap = my_cmap).\
styled_table = pd_prediccion.style.highlight_null(null_color='white').\
                format("{:.4}", subset= pd.columns.tolist()[2:], na_rep = "")

In [32]:
styled_table2 = styled_table

#Css
styled_table2 = styled_table.set_properties(**{"font-family":"Sawasdee",
                                               "font-weight":"900","font-size":"15",
                                               "color":"black","padding":"0px",
                                               "margin":"0px",
                                               "border":" 0px solid black",
                                               "column-gap": "0px"
                                              })
#styled_table2 = styled_table.set_properties(**{"font-family":"Avantgarde, TeX Gyre Adventor, URW Gothic L, sans-serif","font-size":"15","color":"black","padding":"5"})

In [33]:
#Ocultamos el indice
styled_table3 = styled_table2.hide_index()

In [34]:
def dar_color_fondo(val,limite):
    if(pandas.notna(val)):
        long_tramo = limite/9
        tramo =  int(val/long_tramo)
        tramo = tramo if tramo <= 9 else 9
        return 'background-color: %s' % matplotlib.colors.to_hex(paleta[tramo])
    else: return 'background-color: white'

In [35]:
for elem in dic_magnitudes:
    if(dic_magnitudes[elem]["limite"]):
        styled_table3.applymap(dar_color_fondo,
                               limite = dic_magnitudes[elem]["limite"],
                               subset=[dic_magnitudes[elem]["formula"]])

In [51]:
#Formato headers
styled_table4 = styled_table3.set_table_styles(     
    #[{'selector': f'th:nth-child({3})',
    [{'props': [("font-family","Sawasdee"),
               ("color","black"),
               ("background-color", 'white'),
               ("border-collapse", "collapse"),
               ("text-align","center")
               ]}    
        ,{'selector': f'th',
       'props': [('background-color', 'white'),
                ("font-size","13px"),
                ("font-weight","900"),
                ("border"," 0px solid black"),
                ("border-bottom-width","1px"),
                ("column-width", "50px")
                ]}
        ,{'selector': f'td',
       'props': [("font-size","12px"),
                ("font-weight","600"),
                ("border"," 2px solid white")
                ]}
    
    ])
html = '<meta charset="UTF-8">'+styled_table4.render()
config = imgkit.config(wkhtmltoimage='/usr/bin/wkhtmltopdf')
imgkit.from_string(html, 'styled_table.png')
styled_table4

Loading page (1/2)


COD,ESTACION,SO2,PM10,NOx,O3,TOL,BEN,EBE,TCH,CH4,NMHC,CO,NO,NO2,PM2.5
4,Pza. de España,3.74,,52.49,,,,,,,,0.13,12.97,27.87,
8,Escuelas Aguirre,7.74,24.54,31.75,68.43,3.53,0.38,1.42,1.4,1.29,0.15,0.21,2.79,31.03,8.87
11,Avda. Ramón y Cajal,,,28.73,,1.12,0.14,0.12,,,,,6.08,25.55,
16,Arturo Soria,,,14.99,73.66,,,,,,,0.19,1.37,20.75,
17,Villaverde,8.64,,62.12,62.99,,,,,,,,17.67,27.21,
18,Farolillo,6.61,15.12,27.22,72.85,1.13,0.11,0.11,,,,0.34,5.7,22.06,
24,Casa de Campo,4.8,20.03,14.31,73.31,1.13,0.11,0.11,1.23,1.09,0.12,0.12,1.46,12.35,8.48
27,Barajas Pueblo,,,37.34,77.25,,,,,,,,7.47,25.5,
35,Pza. del Carmen,4.85,,29.13,67.95,,,,,,,0.13,0.99,23.12,
36,Moratalaz,4.85,20.84,29.13,,,,,,,,0.12,0.99,23.12,


# [5] - EXPORTAR

#Versiones
hoy = datetime.date.today().strftime("%Y-%m-%d")
pd_prediccion.to_csv("/home/rulicering/Datos_Proyecto_Ozono/Procesado/Predicciones/BackUp/Prediccion-" + hoy + ".csv")

pd_prediccion.to_csv("/home/rulicering/Datos_Proyecto_Ozono/Procesado/Predicciones/Prediccion-hoy.csv")

# [EXTRA] - CHECKEO