# 1. Import Libraries

In [103]:
import gradio as gr 

import random
import os
import joblib
from dotenv import load_dotenv, find_dotenv

import pandas as pd
import numpy as np
import plotly.graph_objects as go

from PIL import Image
import base64
from io import BytesIO

from sktime.forecasting.base import ForecastingHorizon
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

Load api key and initialize llm chatbot

In [104]:
_ = load_dotenv(find_dotenv())
# groq_api_key = os.environ["GROQ_API_KEY"]
groq_api_key = "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"
llm_model = ChatGroq(temperature=1.5, model="llama-3.1-70b-versatile")

# 2. Load all needed files

### Plot Image files

In [119]:
plot_folder = '../img'

# List of plot image file
file_names = {
    "question11": "question11.png",
    "question12": "question12.png",
    "question21": "question21.png",
    "question22": "question22.png",
    "question31": "question31.png",
    "question32": "question32.png",
    "question4": "question4.png",
    "question5": "question5.png",
}

file_paths = {key: os.path.join(plot_folder, filename) for key, filename in file_names.items()}

### Model files

In [120]:
model_folder = '../model'

model_path = os.path.join(model_folder, 'classification.pkl')
scaler_path = os.path.join(model_folder, 'scaler.pkl')
label_encoder_path = os.path.join(model_folder, 'label_encoder.pkl')
forecast_model_path = os.path.join(model_folder, 'ts_forecasting_model.pkl')

loaded_model = joblib.load(model_path)
loaded_scaler = joblib.load(scaler_path)
label_encoder = joblib.load(label_encoder_path)
forecast_model = joblib.load(forecast_model_path)

# 3. Tabs for Questions and Models

### Question 1

In [121]:
def generate_page1():

    question1 = "Is there a correlation between wind speed/direction and PM10 levels? Does wind from certain directions bring higher pollution levels?"

    image11 = Image.open(file_paths["question11"])
    image11 = image11.resize((image11.width, image11.height))
    buf11 = BytesIO()
    image11.save(buf11, format='PNG')
    buf11.seek(0)
    image11_base64 = base64.b64encode(buf11.getvalue()).decode('utf-8')

    chart1 = f'<img src="data:image/png;base64,{image11_base64}" alt="heatmap" style="max-width:100%; height:auto;">'

    image12 = Image.open(file_paths["question12"])
    buf12 = BytesIO()
    image12.save(buf12, format='PNG')
    buf12.seek(0)
    image12_base64 = base64.b64encode(buf12.getvalue()).decode('utf-8')
    chart2 = f'<img src="data:image/png;base64,{image12_base64}" alt="bar_chart" style="max-width:100%; height:auto;">'

    buf11.close()
    buf12.close()


    reason = """
    <h4 style="font-size:20px; color:#004d99;">Answering this question will</h4>
    <p style="font-size:18px; line-height:1.6;">
    This will help us understand how wind patterns impact air quality, aiding in pollution prediction and control.
    This insight can assist urban planners and environmental agencies in identifying areas at risk of higher pollution due to prevailing winds,
    enabling more targeted interventions.
    </p>
    """

    comment1 = """
    <h4 style="font-size:20px; color:#004d99;">From the heatmap, the following insights can be drawn:</h4>

    <p style="font-size:18px; line-height:1.6;">
    - <b>Wind Speed and PM10 Levels:</b> There is an observable trend where higher wind speeds (on the right side of the x-axis) generally correspond to lower average PM10 concentrations. This suggests that as wind speed increases, it helps disperse particulate matter, leading to a reduction in pollution levels in the area.<br>
    </p>

    <p style="font-size:18px; line-height:1.6;">
    - <b>Wind Direction and PM10 Levels:</b> Certain wind directions are associated with higher PM10 levels. For instance:<br>
    &nbsp;&nbsp;&nbsp;&nbsp;- Wind directions in the range of <b>0°-120°</b> (towards the top of the y-axis), combined with lower wind speeds, are linked to elevated PM10 concentrations.<br>
    &nbsp;&nbsp;&nbsp;&nbsp;- Wind directions between <b>270°-300°</b> also show slightly increased PM10 levels, though less prominent than the 0°-120° range.<br>
    </p>

    <h4 style="font-size:20px; color:#004d99;">Summary:</h4>
    <p style="font-size:18px; line-height:1.6;">
    Winds coming from specific directions (<b>0°-120°</b> and to a lesser extent, <b>270°-300°</b>) are more likely to bring higher levels of particulate pollution, particularly when wind speeds are low. Both wind speed and direction influence PM10 levels, with lower wind speeds and certain wind directions being correlated with higher pollution.
    </p>
    """

    comment2 = """
    <h4 style="font-size:20px; color:#004d99;">The wind direction is categorized into bins corresponding to compass directions (e.g., N, NE, etc.).</h4>

    <h5 style="font-size:18px; color:#333366;">From the bar chart, we can observe the following:</h5>
    <p style="font-size:18px; line-height:1.6;">
    - <b>High PM10 Levels from West and Northwest:</b> The highest average PM10 concentrations are observed when the wind comes from the <b>West (W)</b> and <b>Northwest (NW)</b> directions, with averages of <b>41.5 µg/m³</b> and <b>45.9 µg/m³</b>, respectively. This suggests that winds from these directions are associated with increased PM10 levels.<br>
    </p>

    <p style="font-size:18px; line-height:1.6;">
    - <b>Moderate PM10 Levels from North and South:</b> Winds from the <b>North (N)</b> and <b>South (S)</b> also show relatively high PM10 levels, around <b>39.7 µg/m³</b> and <b>36.3 µg/m³</b>, respectively. These directions may contribute to moderate pollution levels.<br>
    </p>

    <p style="font-size:18px; line-height:1.6;">
    - <b>Lower PM10 Levels from East and Southeast:</b> Winds from the <b>East (E)</b> and <b>Southeast (SE)</b> bring in lower PM10 levels, averaging <b>28.6 µg/m³</b> and <b>33.1 µg/m³</b>. This could indicate fewer pollution sources or better pollutant dispersion.
    </p>

    <h4 style="font-size:20px; color:#004d99;">Summary:</h4>
    <p style="font-size:18px; line-height:1.6;">
    Winds from the <b>West</b> and <b>Northwest</b> bring the highest PM10 pollution levels. In contrast, winds from the <b>East</b> and <b>Southeast</b> are associated with cleaner air, possibly due to fewer pollution sources or more effective pollutant dispersion in those directions.
    </p>
    """

    return question1, reason, chart1, comment1, chart2, comment2

def web_question1():

    question, reason, chart1, comment1, chart2, comment2 = generate_page1()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 1: {question}</h2>""")
    gr.Markdown(reason)
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")

    gr.HTML(chart1)
    gr.Markdown(comment1)

    gr.HTML(chart2)
    gr.Markdown(comment2)

### Question 2

In [122]:
def generate_page2():

    question2 = "Are there distinct seasonal or monthly patterns in air quality metrics?"

    image21 = Image.open(file_paths["question21"])
    buf21 = BytesIO()
    image21.save(buf21, format='PNG')
    buf21.seek(0)
    image21_base64 = base64.b64encode(buf21.getvalue()).decode('utf-8')
    chart1 = f'<img src="data:image/png;base64,{image21_base64}" alt="lie_chart" style="max-width:100%; height:auto;">'

    image22 = Image.open(file_paths["question22"])
    buf22 = BytesIO()
    image22.save(buf22, format='PNG')
    buf22.seek(0)
    image22_base64 = base64.b64encode(buf22.getvalue()).decode('utf-8')
    chart2 = f'<img src="data:image/png;base64,{image22_base64}" alt="line_chart" style="max-width:100%; height:auto;">'
    
    buf21.close()
    buf22.close()

    reason = """
    <h4 style="font-size:20px; color:#004d99;">Answering this question will:</h4>
    <p style="font-size:18px; line-height:1.6;">
    Help us understand how air quality varies throughout the year and the influence of seasonal weather changes. This insight is valuable for forecasting pollution trends, informing public health advisories, and developing seasonal strategies to manage air quality.
    </p>
    """
    comment = """
    <h4 style="font-size:20px; color:#004d99;">US AQI Trends:</h4>
    <p style="font-size:18px; line-height:1.6;">
    - The <b>US AQI</b> shows noticeable fluctuations throughout the year, with peaks occurring during certain months (e.g., October) and troughs around January and mid-year.<br>
    - This suggests a possible seasonal influence, such as weather conditions or human activities impacting air quality.
    </p>

    <h4 style="font-size:20px; color:#004d99;">Carbon Monoxide Trends:</h4>
    <p style="font-size:18px; line-height:1.6;">
    - <b>Carbon monoxide (CO)</b> levels exhibit a decline around January, likely due to cooler, wetter conditions reducing vehicle emissions or enhancing atmospheric dispersion.<br>
    - The mid-year recovery of CO levels might relate to increased urban activities or reduced rainfall during those months.
    </p>

    <h4 style="font-size:20px; color:#004d99;">Ozone Trends:</h4>
    <p style="font-size:18px; line-height:1.6;">
    - <b>Ozone (O₃)</b> concentration follows a distinct seasonal pattern, with higher values during warmer months (e.g., July and August) and lower levels during colder months like January.<br>
    - This aligns with ozone formation, which relies on sunlight and heat, typically more abundant in summer.
    </p>

    <h4 style="font-size:20px; color:#004d99;">Other Pollutants (PM10, PM2.5, NO₂, SO₂):</h4>
    <p style="font-size:18px; line-height:1.6;">
    - These pollutants generally show a decrease in colder months (e.g., January) and an increase around mid-year and into October.<br>
    - Factors like increased rainfall during the rainy season can wash out particles, while drier months may contribute to pollutant accumulation.
    </p>

    <h4 style="font-size:20px; color:#004d99;">Conclusion:</h4>
    <p style="font-size:18px; line-height:1.6;">
    - <b>Air quality metrics</b> reveal a clear seasonal pattern:<br>
    &nbsp;&nbsp;&nbsp;&nbsp;- <b>Dry Season (October-April):</b> Higher concentrations of particulate matter and pollutants, possibly due to reduced atmospheric cleansing (e.g., less rainfall) and increased human activities.<br>
    &nbsp;&nbsp;&nbsp;&nbsp;- <b>Rainy Season (May-September):</b> Improved air quality due to the washing effect of rain and atmospheric dispersion.<br>
    - Correlations between weather metrics (e.g., precipitation, temperature) and air quality are evident. Warm, dry periods tend to elevate ozone and particulate levels, while wet, cold periods help reduce them.
    </p>
    """
    
    return question2, reason,  chart1, chart2, comment

def web_question2():

    question, reason,  chart1, chart2, comment = generate_page2()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 2: {question}</h2>""")
    gr.Markdown(reason)
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")
    
    gr.HTML(chart1)
    gr.HTML(chart2)

    gr.Markdown(comment)



### Question 3

In [123]:
def generate_page3():
    question = "What is the relationship between weather status and air quality? Does rainfall help reduce pollutant concentrations, and if so, to what extent?"
    
    image31 = Image.open(file_paths["question31"])
    buf31 = BytesIO()
    image31.save(buf31, format='PNG')
    buf31.seek(0)
    image31_base64 = base64.b64encode(buf31.getvalue()).decode('utf-8')
    chart31 = f'<img src="data:image/png;base64,{image31_base64}" alt=box_plot" style="max-width:100%; height:auto;">'
    buf31.close()

    image32 = Image.open(file_paths["question32"])
    buf32 = BytesIO()
    image32.save(buf32, format='PNG')
    buf32.seek(0)
    image32_base64 = base64.b64encode(buf32.getvalue()).decode('utf-8')
    chart32 = f'<img src="data:image/png;base64,{image32_base64}" alt="bar_chart" style="max-width:100%; height:auto;">'
    buf32.close()

    
    reason = """
    <h4 style="font-size:20px; color:#004d99;">Answering this question will:</h4>
    <p style="font-size:18px; line-height:1.6;">
    Clarify whether and how rainfall affects pollutant levels (e.g., PM2.5, PM10, and US AQI), offering insights into natural pollutant reduction mechanisms. This knowledge can support air quality management strategies, particularly in regions with varying rainfall patterns, and help inform public health recommendations during different weather conditions.
    </p>
    """

    table1 = """
    | Rain Type | PM10 Mean | PM10 Median | PM10 Std | PM2.5 Mean | PM2.5 Median | PM2.5 Std | US AQI Mean | US AQI Median | US AQI Std |
    |-----------|-----------|-------------|----------|------------|--------------|-----------|-------------|---------------|------------|
    | Drizzle   | 32.987159 | 30.6        | 13.662533| 22.281315  | 20.7         | 9.392538  | 74.340142   | 71.515960     | 17.165444  |
    | No Rain   | 35.671043 | 31.9        | 17.996838| 23.826444  | 21.1         | 12.521056 | 74.127290   | 71.648933     | 20.155288  |
    | Rain      | 33.372395 | 31.5        | 12.728185| 22.641353  | 21.3         | 8.744586  | 74.364449   | 72.943260     | 17.072013  |
    """

    table2 = """
    | Metric                   | Drizzle Reduction (%) | Rain Reduction (%) | ANOVA p-value           |
    |--------------------------|-----------------------|---------------------|-------------------------|
    | PM10 Reduction           | 7.52                 | 6.44                | 6.977636250745284e-18   |
    | PM2.5 Reduction          | 6.48                 | 4.97                | 3.551982339308116e-12   |
    | US AQI Reduction         | -0.29                | -0.32               | 0.8100562102478184      |

    """

    comment = """
    <h4 style="font-size:20px; color:#004d99;">Based on the statistical analysis and boxplot charts:</h4>

    <p style="font-size:18px; line-height:1.6;">
    - <b>PM10:</b> Decreased by approximately <b>6–7%</b> during "Rain" and "Drizzle" compared to "No Rain", indicating that rainfall helps reduce larger particulate matter.<br>
    - <b>PM2.5:</b> Lower during "Rain" (<b>22.63 µg/m³</b>) compared to "No Rain" (<b>23.61 µg/m³</b>), suggesting that rainfall might help reduce smaller particulates as well.<br>
    - <b>US AQI:</b> Remains relatively stable across different weather conditions (Rain, Drizzle, No Rain), with no significant fluctuations. This implies that rainfall does not have a major effect on the overall air quality as represented by AQI.
    </p>

    <h4 style="font-size:20px; color:#004d99;">Summary:</h4>
    <p style="font-size:18px; line-height:1.6;">
    Rainfall has a minor but statistically significant effect on reducing particulate pollutants (<b>PM10</b> and <b>PM2.5</b>), with reductions of <b>5–7%</b>. However, it does not significantly impact the overall air quality, as represented by the <b>US AQI</b>. This suggests that while rain can help lower particulate matter levels, other factors such as temperature, wind speed, and pollution sources may have a more substantial influence on AQI. Further research into these factors is needed to better understand their impact on air quality.
    </p>
    """

    return question, reason, table1, table2, chart31, chart32, comment

def web_question3():

    question, reason, table1, table2, chart1, chart2, comment = generate_page3()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 3: {question}</h2>""")
    gr.Markdown(reason)
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")
    gr.Markdown(table1)
    gr.HTML(chart1) 

    gr.Markdown(table2)
    gr.HTML(chart2) 
    gr.Markdown(comment)


### Question 4

In [124]:
def generate_page4():
    
    question4 = "Are there specific times of day (morning, afternoon, evening) when pollution levels tend to be higher?"

    image4 = Image.open(file_paths["question4"])
    buf4 = BytesIO()
    image4.save(buf4, format='PNG')
    buf4.seek(0)
    image4_base64 = base64.b64encode(buf4.getvalue()).decode('utf-8')
    chart = f'<img src="data:image/png;base64,{image4_base64}" alt="bar_chart">'

    buf4.close()

    reason = """
    <h4 style="font-size:20px; color:#004d99;">Answering this question will:</h4>
    <p style="font-size:18px; line-height:1.6;">
    Help us identify peak times for pollution during the day, which can be crucial for public health advisories and urban planning. Understanding daily pollution patterns enables environmental agencies to provide timely warnings and helps the public avoid outdoor activities during high-pollution periods.
    </p>
    """


    table = """
    | Time of Day | PM10       | PM2.5      | Carbon Monoxide | Nitrogen Dioxide | Sulphur Dioxide | Ozone      |
    |-------------|------------|------------|-----------------|------------------|-----------------|------------|
    | Morning     | 31.806175  | 21.311003  | 410.046121      | 23.269318        | 21.521233       | 46.114716  |
    | Afternoon   | 33.790926  | 22.350479  | 326.167579      | 17.503272        | 18.989216       | 102.154127 |
    | Evening     | 37.701977  | 25.411155  | 538.672180      | 37.215440        | 21.674070       | 30.552046  |
    """

    comment = """
    <h4 style="font-size:20px; color:#004d99;">Based on the chart, we can observe the following:</h4>

    <p style="font-size:18px; line-height:1.6;">
    - <b>Ozone:</b> Ozone concentrations are highest in the morning (around <b>95.8 µg/m³</b>) and significantly decrease in the afternoon and evening (<b>31.2 µg/m³</b> and <b>37.6 µg/m³</b>, respectively). This trend could be influenced by sunlight and cloud cover variations throughout the day. Ozone formation is typically affected by sunlight, so lower afternoon and evening values may reflect cloud cover or reduced sunlight intensity as the day progresses.
    </p>

    <p style="font-size:18px; line-height:1.6;">
    - <b>PM10 and PM2.5:</b> PM10 and PM2.5 levels are somewhat consistent across different times of day, with slightly higher concentrations in the afternoon. This pattern suggests that these particulate pollutants might be less sensitive to sunlight or cloud cover and more affected by traffic or industrial activities, which remain steady throughout the day.
    </p>

    <p style="font-size:18px; line-height:1.6;">
    - <b>Carbon Monoxide (CO):</b> CO levels show dramatic peaks in the evening (<b>530 μg/m³</b>) and morning (<b>410 μg/m³</b>), with lower concentrations in the afternoon (<b>330 μg/m³</b>). This pattern strongly correlates with rush hour traffic patterns, reflecting vehicle emissions during peak commuting times.
    </p>

    <p style="font-size:18px; line-height:1.6;">
    - <b>Nitrogen and Sulfur Oxides:</b> NO₂ shows minor increases in evening hours, while SO₂ maintains stable, low concentrations throughout the day. These patterns likely reflect a combination of industrial emissions and traffic patterns, with NO₂ more responsive to vehicle emissions during peak travel times.
    </p>

    <h4 style="font-size:20px; color:#004d99;">Summary:</h4>
    <p style="font-size:18px; line-height:1.6;">
    In summary, the pollution patterns show distinct daily cycles where <b>carbon monoxide</b> peaks dramatically during morning and evening rush hours (<b>410-530 μg/m³</b>), while <b>ozone</b> shows a characteristic afternoon peak (~<b>100 µg/m³</b>) driven by sunlight intensity. <b>Particulate matter (PM10 and PM2.5)</b> maintains relatively stable levels throughout the day with minor fluctuations, and <b>nitrogen/sulfur oxides</b> show modest variations, with NO₂ slightly elevated during peak traffic periods. These patterns strongly suggest that pollution levels are primarily influenced by a combination of human activity cycles (especially traffic) and natural environmental factors like sunlight.
    </p>
    """
    return question4, reason, chart, table, comment

def web_question4():

    question, reason, chart, table, comment = generate_page4()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 4: {question}</h2>""")
    gr.Markdown(reason)
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")
    gr.Markdown(table)
    gr.HTML(chart)
    gr.Markdown(comment)


### Question 5

In [125]:
def generate_page5():
    question5 = "Does a significant increase or decrease in temperature impact pollutant levels such as NO2 and ozone?"
    
    image5 = Image.open(file_paths["question5"])
    buf5 = BytesIO()
    image5.save(buf5, format='PNG')
    buf5.seek(0)
    image5_base64 = base64.b64encode(buf5.getvalue()).decode('utf-8')
    chart1 = f'<img src="data:image/png;base64,{image5_base64}" alt="scatter_plot">'

    buf5.close()

    reason = """
    <h4 style="font-size:20px; color:#004d99; font-family: 'Arial', sans-serif;">Answering this question will:</h4>
    <p style="font-size:18px; line-height:1.6; font-family: 'Arial', sans-serif;">
    This will provide insights into how temperature fluctuations impact air quality, helping to identify critical temperature thresholds that either worsen or improve pollution levels. This knowledge is crucial for environmental monitoring and the development of adaptive air quality management strategies.
    </p>
    """

    table1 = """
    | Temp Category | Nitrogen Dioxide Count | Nitrogen Dioxide Mean | Nitrogen Dioxide Std | Nitrogen Dioxide Min | Nitrogen Dioxide 25% | Nitrogen Dioxide 50% | Nitrogen Dioxide 75% | Nitrogen Dioxide Max | Ozone Count | Ozone Mean | Ozone Std | Ozone Min | Ozone 25% | Ozone 50% | Ozone 75% | Ozone Max |
    |---------------|------------------------|-----------------------|----------------------|----------------------|----------------------|----------------------|----------------------|----------------------|-------------|------------|-----------|-----------|-----------|-----------|-----------|-----------|
    | Low           | 890                    | 35.191573             | 21.824851            | 0.0                  | 19.2125              | 30.45                | 48.6375              | 132.60               | 890         | 28.465169 | 20.334033 | 0.0       | 13.0      | 24.0      | 41.0      | 105.0     |
    | Normal        | 15782                  | 28.835794             | 17.508611            | 0.0                  | 15.8000              | 24.80                | 38.5000              | 170.35               | 15782       | 51.460525 | 40.360431 | 0.0       | 19.0      | 42.0      | 75.0      | 272.0     |
    | High          | 872                    | 9.955046              | 5.385596             | 3.3                  | 6.1000               | 8.20                 | 12.7000              | 38.60               | 872         | 105.736239 | 24.017343 | 44.0      | 91.0      | 104.0     | 119.0     | 195.0     |
    """

    table2 = """
    | Pollutant          | t-statistic | p-value         |
    |--------------------|-------------|-----------------|
    | Nitrogen Dioxide   | 33.170409   | 7.912829e-188   |
    | Ozone              | -72.939745  | 0.000000e+00    |
    """

    comment = """
    <h4 style="font-size:20px; color:#004d99;">Based on the statistics and chart above, we can observe the following:</h4>

    <p style="font-size:18px; line-height:1.6;">
    - <b>Relationship between Temperature and NO2:</b>
        <ul>
            <li>The scatter plot shows a clear <b>negative trend</b> between temperature and nitrogen dioxide (NO2) levels. As temperature increases, NO2 levels tend to decrease.</li>
            <li>The data points are more <b>dispersed</b> at lower temperatures, indicating a wider range of NO2 concentrations.</li>
            <li>At higher temperatures, the data points <b>cluster tightly</b>, suggesting a more consistent inverse relationship between temperature and NO2.</li>
        </ul>
    </p>

    <p style="font-size:18px; line-height:1.6;">
    - <b>Relationship between Temperature and Ozone:</b>
        <ul>
            <li>The scatter plot for ozone exhibits a <b>positive trend</b>, where higher temperatures correspond to higher ozone levels.</li>
            <li>The data points are more <b>scattered</b> compared to the NO2 plot, but the overall positive correlation is still evident.</li>
            <li>At the highest temperature range, the ozone levels appear to increase more sharply, indicating a potentially <b>nonlinear relationship</b>.</li>
        </ul>
    </p>

    <h4 style="font-size:20px; color:#004d99;">Summary:</h4>
    <p style="font-size:18px; line-height:1.6;">
    These visual observations highlight the contrasting relationships between temperature and the two pollutants, NO2 and ozone. Understanding these dynamics is crucial for developing targeted air quality management strategies that account for the influence of meteorological conditions on different air pollutants.
    </p>
    """
    
    return question5, reason, chart1, table1, table2, comment

def web_question5():
    question, reason, chart1, table1, table2, comment = generate_page5()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 5: {question}</h2>""")
    gr.Markdown(reason)
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")
    gr.Markdown(table1)
    gr.Markdown(table2)
    gr.HTML(chart1)
    gr.Markdown(comment)


### AQI Prediction Model

Divide aqi into some categories

In [126]:
def classify_aqi(aqi):
    """Classify AQI level based on thresholds."""
    if aqi <= 50:
        return "Good"
    elif aqi <= 100:
        return "Moderate"
    elif aqi <= 200:
        return "Unhealthy"
    else:
        return "Hazardous"

Pydantic class for AQI Advice

In [106]:
class AQIAdvice(BaseModel):
    aqi: float = Field(description="The air quality index")
    advice: str = Field(description="The advice for the AQI level")

Function to get advice from llm

In [114]:
# Prompting format
prompt_template = ChatPromptTemplate.from_template(
    """
    Based on the average AQI of {aqi} over the next {number} days, which is classified as {aqi_classify}, 
    provide a detailed and friendly commentary on the air quality. 
    In addition, suggest appropriate activities to do or what to wear to stay safe and comfortable in this air quality condition.
    (Make sure your response is in English, friendly, and engaging, as if you're speaking to viewers on a TV weather forecast show.)
    """
)

# Function get advice
def get_aqi_advice(number, aqi):

    llm = llm_model.with_structured_output(schema=AQIAdvice)

    chain = prompt_template | llm

    res = chain.invoke({"number": number,
                        "aqi":aqi,
                        "aqi_classify": classify_aqi(aqi)})
    return res.advice

Function input number of days to predict and return predictions and average of predictions

In [108]:
def predict_aqi(model=forecast_model, pred_days=3):
    pred_range = pred_days * 24
    fh = ForecastingHorizon(np.arange(1, pred_range + 1), is_relative=True)
    # Prediction
    y_pred = model.predict(fh)
    # Calculate mean aqi in pred_days
    overall_avg = np.mean(y_pred)

    return {
        "y_pred":y_pred,
        "avg": overall_avg.round(2)
    }

Load past aqi

In [109]:
def load_past_aqi(num_days):
    data = pd.read_csv('../data/clean_hcmc_waq.csv')
    
    data['date_time'] = pd.to_datetime(data['date_time'])
    test_size = 7 * 24  

    data = data[['date_time', 'us_aqi']]

    end_date = data['date_time'].iloc[-test_size]

    start_date = end_date - pd.DateOffset(days=num_days)
    past_days_data = data[(data['date_time'] >= start_date) & (data['date_time'] < end_date)]
    return past_days_data


Function to plot aqi chart

In [118]:
def plot_aqi(model=forecast_model, pred_days=3, past_days=7):
    # Get past days data
    past_data = load_past_aqi(past_days)
    
    # result includes y_pred and avg
    result = predict_aqi(model, pred_days)
    
    # Create DataFrame for predictions
    df = pd.DataFrame({
        'time': pd.to_datetime(result['y_pred'].index),  # Ensure the index is datetime
        'AQI': result['y_pred'].values
    })
    
    # Create DataFrame for past data
    past_df = past_data.copy()

    # Advice
    # avg = result['avg']
    # advice = get_aqi_advice(pred_days, avg)

    # Classify AQI levels
    past_labels = [classify_aqi(aqi) for aqi in past_df['us_aqi']]
    future_labels = [classify_aqi(aqi) for aqi in df['AQI']]

    # Create plot
    fig = go.Figure()

    # Plot past data (historical data)
    fig.add_trace(go.Scatter(
        x=past_df['date_time'], 
        y=past_df['us_aqi'], 
        mode='lines', 
        name='Past AQI',
        line=dict(color='blue'),
        hovertemplate=(
            'Time: %{x|%Y-%m-%d %H:%M}<br>' +
            'AQI: %{y}<br>' +
            'Level: %{text}<extra></extra>'
        ),
        text=past_labels
    ))

    # Plot predicted data
    fig.add_trace(go.Scatter(
        x=df['time'], 
        y=df['AQI'], 
        mode='lines', 
        name='Predicted AQI',
        line=dict(color='red'),
        hovertemplate=(
            'Time: %{x|%Y-%m-%d %H:%M}<br>' +
            'AQI: %{y}<br>' +
            'Level: %{text}<extra></extra>'
        ),
        text=future_labels
    ))

    # Add vertical line to separate past and future
    separation_date = past_df['date_time'].max()  # Date where past data ends
    fig.add_vline(x=separation_date, line=dict(color='black', dash='dash'), name='Prediction Start')

    # Customize layout
    fig.update_layout(
        title=f'AQI Prediction and Past Data for Ho Chi Minh City ({pred_days} Days)',
        xaxis_title='Time',
        yaxis_title='AQI',
        template='plotly',
        xaxis=dict(tickformat="%Y-%m-%d"),
        height=600,
        width=1600
    )

    # Show plot
    fig.show()
    # # Display the advice in a more visually appealing format
    # advice_markdown = f"""
    # ### AQI Forecast Advice for the next {pred_days} Days:

    # {advice}
    # """

    # # Render advice in a markdown format
    # gr.Markdown(advice_markdown)
    return fig


### Classification model

Pydantic class Advice for Weather Status

In [115]:
class WeatherAdvice(BaseModel):
    weather_status: str = Field(description="The weather status given")
    advice: str = Field(description="The advice activity for the weather status")

Function to get advice from llm

In [129]:
# Prompting format
prompt_template = ChatPromptTemplate.from_template(
    """
    In the context of the weather status: {weather_status}, suggest some good activities to do or provide advice on what to wear when facing this weather condition.
    (Write your response in a friendly tone like on TV weather forecast show.)
    """
)

# Get advice
def get_weather_advice(weather_status):

    # Generate the response using the LLM
    llm = llm_model.with_structured_output(schema=WeatherAdvice)

    chain = prompt_template | llm

    res = chain.invoke({"weather_status": weather_status})
    return res.advice

Predicting pipeline

In [130]:
def predict_pipeline(precipitation, cloud_cover, wind_direction_10m, pm10, pm2_5,
                     carbon_monoxide, nitrogen_dioxide, sulphur_dioxide, ozone, us_aqi):
    # Prepare input data as a DataFrame
    input_data = {
        'precipitation': [precipitation],
        'cloud_cover': [cloud_cover],
        'wind_direction_10m': [wind_direction_10m],
        'pm10': [pm10],
        'pm2_5': [pm2_5],
        'carbon_monoxide': [carbon_monoxide],
        'nitrogen_dioxide': [nitrogen_dioxide],
        'sulphur_dioxide': [sulphur_dioxide],
        'ozone': [ozone],
        'us_aqi': [us_aqi]
    }
    input_df = pd.DataFrame(input_data)
    input_df = loaded_scaler.transform(input_df)
    # Make prediction
    prediction = loaded_model.predict(input_df)

    # Return the decoded label
    return label_encoder.inverse_transform(prediction)[0]

Create example data to demo

In [131]:
def set_example_values():
    return [
        round(random.uniform(0, 30), 3),  # Precipitation (mm)
        round(random.uniform(0, 100), 0),  # Cloud Cover (%)
        round(random.uniform(0, 50), 3),   # Wind Direction (°)
        round(random.uniform(0, 200), 3),  # PM10 (µg/m³)
        round(random.uniform(0, 200), 3),  # PM2.5 (µg/m³)
        round(random.uniform(0, 1000), 0), # Carbon Monoxide (ppm)
        round(random.uniform(0, 200), 3),  # Nitrogen Dioxide (ppm)
        round(random.uniform(0, 60), 3),   # Sulphur Dioxide (ppm)
        round(random.uniform(0, 300), 3),  # Ozone (ppm)
        round(random.uniform(0, 200), 0)   # US AQI
    ]

Tab for classification model

In [132]:
def classifier_tab():
    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Weather Status Prediction</h2>""")     
    gr.Markdown("Enter air quality and weather parameters to predict the weather status.")
    
    # Example values for the input fields
    example_values = set_example_values()
    
    inputs = [
        gr.Number(label="Precipitation (mm)", info=f"Rainfall amount in millimeters (e.g., {example_values[0]})"),
        gr.Number(label="Cloud Cover (%)", info=f"Percentage of cloud cover (e.g., {example_values[1]})"),
        gr.Number(label="Wind Direction (°)", info=f"Direction of wind in degrees (e.g., {example_values[2]})"),
        gr.Number(label="PM10 (µg/m³)", info=f"Particulate matter (10 microns) (e.g., {example_values[3]})"),
        gr.Number(label="PM2.5 (µg/m³)", info=f"Particulate matter (2.5 microns) (e.g., {example_values[4]})"),
        gr.Number(label="Carbon Monoxide (ppm)", info=f"Concentration of CO (e.g., {example_values[5]})"),
        gr.Number(label="Nitrogen Dioxide (ppm)", info=f"Concentration of NO2 (e.g., {example_values[6]})"),
        gr.Number(label="Sulphur Dioxide (ppm)", info=f"Concentration of SO2 (e.g., {example_values[7]})"),
        gr.Number(label="Ozone (ppm)", info=f"Concentration of O3 (e.g., {example_values[8]})"),
        gr.Number(label="US AQI", info=f"Air Quality Index (e.g., {example_values[9]})")
    ]
    
    output_box = gr.Textbox(label="Predicted Weather Status", lines=2, placeholder="Prediction result will appear here...")
    advice_box = gr.Textbox(label="Activities Recommendation", lines=2, placeholder="Advice based on the predicted weather status will appear here...")
    example_button = gr.Button("Try Example")
    predict_button = gr.Button("Predict")

    # Define the prediction function to handle prediction and weather advice
    def predict_pipeline(*input_values):
        # Assume the prediction logic happens here (just using random selection for the example)
        weather_labels = ['Light Rain', 'Moderate Drizzle', 'Light Drizzle', 'Moderate Rain', 'Overcast', 
                          'Heavy Rain', 'Dense Drizzle', 'Mainly Clear', 'Clear Sky', 'Partly Cloudy']
        predicted_label = random.choice(weather_labels)  # Randomly choose a label for the example
        weather_advice = get_weather_advice(predicted_label)
        return predicted_label, weather_advice
    
    predict_button.click(fn=predict_pipeline, inputs=inputs, outputs=[output_box, advice_box])
    example_button.click(fn=set_example_values, inputs=[], outputs=inputs)

# DEMO

In [127]:
with gr.Blocks() as demo:

    gr.Markdown("""
    <h1 style="font-size:36px; text-align:center; color:#004d99; margin-bottom:20px;">
        Capydata's Data Science Website: Weather and Air Quality Analysis
    </h1>
    """)
    
    with gr.Tab("Question 1"):
        web_question1()
        
    with gr.Tab("Question 2"):
        web_question2()
        
    with gr.Tab("Question 3"):
        web_question3()
        
    with gr.Tab("Question 4"):
        web_question4() 
    
    with gr.Tab("Question 5"):
        web_question5()
        
    with gr.Tab("AQI Prediction"):
        with gr.Column():
            with gr.Row():
                past_days = gr.Dropdown(
                    choices=[7, 14, 30],
                    value=7,
                    label="Past Days"
                )

                pred_days = gr.Dropdown(
                    choices=[3, 5, 7],
                    value=7,
                    label="Days to Predict"
                )
                
            btn = gr.Button(value="Update Prediction")
            map = gr.Plot(label="Predicted AQI Visualization")

        demo.load(lambda past_days, pred_days: plot_aqi(past_days=int(past_days), pred_days=int(pred_days)), 
                [past_days, pred_days], map)
        
        btn.click(lambda past_days, pred_days: plot_aqi(past_days=int(past_days), pred_days=int(pred_days)), 
                [past_days, pred_days], map)

    # with gr.Tab("Weather Classifier"):
    #     classifier_tab()


In [None]:
demo.close()
demo.launch(share=True)