# Visualization of Predictions
Export predictions as colored tables in html format

In [31]:
%matplotlib widget
import os, geopandas, folium, requests, matplotlib, folium.plugins, branca, plotly
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import utils


In [32]:
PREDICTION_DIR = r"predictions"

START_DATE = datetime.today().strftime('%Y-%m-%d')
END_DATE = (datetime.today() + timedelta(days=7)).strftime('%Y-%m-%d') # 7 days after today

POLLUTANTS = ["SO2", "NOy", "CO", "C6H6", "PM10", "PM2.5", "Pb", "O3"]

In [33]:
filename = "predictions_station_Kallio 2.csv"

df = pd.read_csv(PREDICTION_DIR + "/" + filename)
df.head()

Unnamed: 0,Date,Predicted_PM2.5,Predicted_PM10,Predicted_SO2,Predicted_O3
0,2025-10-02,3.987999,7.508876,0.806964,55.983604
1,2025-10-03,4.409436,6.970961,0.591071,53.631402
2,2025-10-04,4.716873,7.002096,0.804539,52.971061
3,2025-10-05,4.953396,7.182804,1.274145,53.463397
4,2025-10-06,5.148458,7.399596,1.725513,54.246363


In [35]:
def preprocess(df):
    
    df = df.copy()
    
    df["Date"] = pd.to_datetime(df["Date"])
    df = df[(df["Date"] >= START_DATE) & \
                    (df["Date"] <= END_DATE)]                  # Filter to a smaller range of dates
    
    df.columns = df.columns.str.removeprefix("Predicted_")
    
    for column in df.columns:
        if column in POLLUTANTS: # if the column name contains pollutant name (i.e. "PM2.5")
            df[column] = df[column].astype(int)
    
    
    return df

df = preprocess(df)

df

Unnamed: 0,Date,PM2.5,PM10,SO2,O3
12,2025-10-14,5,8,1,55
13,2025-10-15,5,8,1,55
14,2025-10-16,5,8,1,55
15,2025-10-17,5,8,1,55
16,2025-10-18,5,8,1,55
17,2025-10-19,5,8,1,55
18,2025-10-20,5,8,1,55
19,2025-10-21,5,8,1,55


In [37]:
def make_plotly_prediction_table(df, filename, dir_predictions, show_fig=False):
    cell_colors = [["white" for i in range(len(df))]]
    cell_values = [df["Date"].dt.strftime("%Y-%m-%d").to_list()]

    for column in df.columns:
        for pollutant in POLLUTANTS:
            if pollutant in column: # if the column name contains pollutant name (i.e. "PM2.5")
                
                cell_values.append(df[column].to_list())
                
                cmap = utils.cmaps[pollutant]
                color_mapped_series = df[column].apply(cmap)
                color_mapped_list = color_mapped_series.str[:-2].to_list() # remove last 2 chars which are alpha channel
                cell_colors.append(color_mapped_list)
                
    fig = go.Figure(data=[go.Table(
        header=dict(values=df.columns,
                    fill_color="paleturquoise"
                    ),
        cells=dict(values=cell_values,
                    fill_color=cell_colors
                    ),
        )
    ])
    
    if show_fig: fig.show()

    fig.write_html(f"{dir_predictions}/{os.path.splitext(filename)[0]}.html") # remove ".csv" from filename

make_plotly_prediction_table(df, filename, PREDICTION_DIR, True)

Define function to generate html table (in string) for timeStampedGeoJson popup

In [38]:
def make_html_prediction_table(df, dir_predictions, filename, return_string:bool):
    
    def map_html_color(val, pollutant):
        cmap = utils.cmaps[pollutant]
        return f"background-color: {cmap(val)}"
    
    df = df.copy()
    df["Date"] = df["Date"].dt.strftime("%Y-%m-%d")
    df = df.set_index("Date")    

    styled = df.style
    
    for column in df.columns:
        pollutant = column.removeprefix("Predicted_")
        styled = styled.map(map_html_color, subset=[column], pollutant=pollutant)
    
    if return_string:
        # return html as string
        return styled.to_html()  
    else:
        # return .html file
        return styled.to_html(f"{dir_predictions}/{os.path.splitext(filename)[0]}.html")


df_test = df.copy()

styled = make_html_prediction_table(df_test, PREDICTION_DIR, filename, return_string=True)
styled

'<style type="text/css">\n#T_bd7d7_row0_col0, #T_bd7d7_row0_col1, #T_bd7d7_row0_col2, #T_bd7d7_row0_col3, #T_bd7d7_row1_col0, #T_bd7d7_row1_col1, #T_bd7d7_row1_col2, #T_bd7d7_row1_col3, #T_bd7d7_row2_col0, #T_bd7d7_row2_col1, #T_bd7d7_row2_col2, #T_bd7d7_row2_col3, #T_bd7d7_row3_col0, #T_bd7d7_row3_col1, #T_bd7d7_row3_col2, #T_bd7d7_row3_col3, #T_bd7d7_row4_col0, #T_bd7d7_row4_col1, #T_bd7d7_row4_col2, #T_bd7d7_row4_col3, #T_bd7d7_row5_col0, #T_bd7d7_row5_col1, #T_bd7d7_row5_col2, #T_bd7d7_row5_col3, #T_bd7d7_row6_col0, #T_bd7d7_row6_col1, #T_bd7d7_row6_col2, #T_bd7d7_row6_col3, #T_bd7d7_row7_col0, #T_bd7d7_row7_col1, #T_bd7d7_row7_col2, #T_bd7d7_row7_col3 {\n  background-color: #008000ff;\n}\n</style>\n<table id="T_bd7d7">\n  <thead>\n    <tr>\n      <th class="blank level0" >&nbsp;</th>\n      <th id="T_bd7d7_level0_col0" class="col_heading level0 col0" >PM2.5</th>\n      <th id="T_bd7d7_level0_col1" class="col_heading level0 col1" >PM10</th>\n      <th id="T_bd7d7_level0_col2" class

Iterate through csv predictions and generate html table strings

In [39]:
dir_predictions = os.path.join(os.getcwd(), PREDICTION_DIR)

df_html_tables = {} # mapping of {Air quality station name: table html string}

for root, _, files in os.walk(dir_predictions): # scans through data directory recursively
    for filename in files:
        if not filename.endswith(".csv"):
            continue
        df = pd.read_csv(os.path.join(root, filename))
        df = preprocess(df)
        
        # make_plotly_prediction_table(df, filename, dir_predictions) # For plotly tables (interactive but too large)
        
        html_string = make_html_prediction_table(df, dir_predictions, filename, return_string=True)
        station = os.path.splitext(filename)[0].removeprefix("predictions_station_")
        df_html_tables[station] = html_string
        
df_html_tables = pd.DataFrame([df_html_tables])
df_html_tables.to_csv(PREDICTION_DIR + '/predictions_html.txt', index=False) 