# Visualization of Predictions
Export predictions as colored tables in html format

In [80]:
%matplotlib widget
import os
import pandas as pd
from datetime import datetime, timedelta
import utils

In [81]:
PREDICTION_DIR = r"predictions"

START_DATE = datetime.today().strftime('%Y-%m-%d')
END_DATE = (datetime.today() + timedelta(days=6)).strftime('%Y-%m-%d') # 7 days after today

POLLUTANTS = ["SO2", "NOy", "CO", "C6H6", "PM10", "PM2.5", "Pb", "O3"]

In [82]:
filename = "predictions_station_Kallio 2.csv"

df = pd.read_csv(PREDICTION_DIR + "/" + filename)
df.head()

Unnamed: 0,Date,Predicted_PM2.5,Predicted_PM10,Predicted_SO2,Predicted_O3
0,2025-10-02,3.987999,7.508876,0.806964,55.983604
1,2025-10-03,4.409436,6.970961,0.591071,53.631402
2,2025-10-04,4.716873,7.002096,0.804539,52.971061
3,2025-10-05,4.953396,7.182804,1.274145,53.463397
4,2025-10-06,5.148458,7.399596,1.725513,54.246363


In [83]:
def preprocess(df):
    
    df = df.copy()
    
    df["Date"] = pd.to_datetime(df["Date"])
    df = df[(df["Date"] >= START_DATE) & \
                    (df["Date"] <= END_DATE)]                  # Filter to a smaller range of dates
    
    df.columns = df.columns.str.removeprefix("Predicted_")
    
    for column in df.columns:
        if column in POLLUTANTS: # if the column name contains pollutant name (i.e. "PM2.5")
            df[column] = df[column].astype(int)
    
    return df

df = preprocess(df)

df

Unnamed: 0,Date,PM2.5,PM10,SO2,O3
12,2025-10-14,5,8,1,55
13,2025-10-15,5,8,1,55
14,2025-10-16,5,8,1,55
15,2025-10-17,5,8,1,55
16,2025-10-18,5,8,1,55
17,2025-10-19,5,8,1,55
18,2025-10-20,5,8,1,55


Define function to generate html table (in string) for timeStampedGeoJson popup

In [84]:
def make_html_prediction_table(df, dir_predictions, filename, return_string:bool):
    
    def map_html_color(val, pollutant):
        cmap = utils.cmaps[pollutant]
        return f"background-color: {cmap(val)}"
    
    def map_green(val):
        return "background-color: #16ab16"
    
    df = df.copy()
    df["Date"] = df["Date"].dt.strftime("%Y-%m-%d")

    styled = df.style
    
    # Color cells for pollutant concentrations
    for column in df.columns[1:]: 
        # Color pollutant colums
        pollutant = column.removeprefix("Predicted_")
        styled = styled.map(map_html_color, subset=[column], pollutant=pollutant)
        
    # Color cells for health index
    df["Health"] = "Good :)"        
    styled = styled.map(map_green, subset=["Health"])
        
    styled = styled.hide(axis="index")  # hide integer indices
    
    if return_string:
        return styled.to_html() # return html as string
    else: 
        return styled # return styled object for viewing on ipynb output
        # return styled.to_html(f"{dir_predictions}/{os.path.splitext(filename)[0]}.html") # export to html file

df_test = df.copy()

styled = make_html_prediction_table(df_test, PREDICTION_DIR, filename, return_string=False)
print(filename)
styled

predictions_station_Kallio 2.csv


Date,PM2.5,PM10,SO2,O3,Health
2025-10-14,5,8,1,55,Good :)
2025-10-15,5,8,1,55,Good :)
2025-10-16,5,8,1,55,Good :)
2025-10-17,5,8,1,55,Good :)
2025-10-18,5,8,1,55,Good :)
2025-10-19,5,8,1,55,Good :)
2025-10-20,5,8,1,55,Good :)


Iterate through csv predictions and generate html table strings, exporting the strings into a .txt file.

In [85]:
dir_predictions = os.path.join(os.getcwd(), PREDICTION_DIR)

df_html_tables = {} # mapping of {Air quality station name: table html string}

for root, _, files in os.walk(dir_predictions): # scans through data directory recursively
    for filename in files:
        if not filename.endswith(".csv"):
            continue
        df = pd.read_csv(os.path.join(root, filename))
        df = preprocess(df)
        
        # make_plotly_prediction_table(df, filename, dir_predictions) # For plotly tables (interactive but too large)
        
        html_string = make_html_prediction_table(df, dir_predictions, filename, return_string=True)
        station = os.path.splitext(filename)[0].removeprefix("predictions_station_")
        df_html_tables[station] = html_string
        
df_html_tables = pd.DataFrame([df_html_tables])
df_html_tables.to_csv(PREDICTION_DIR + '/predictions_html.txt', index=False) 