In [59]:
import gradio as gr
from PIL import Image
import base64
from io import BytesIO
import os
import joblib
import pandas as pd

In [106]:
plot_folder = '../img'
question11_path = os.path.join(plot_folder, "question11.png")
question12_path = os.path.join(plot_folder, "question12.png")
question21_path = os.path.join(plot_folder, "question21.png")
question22_path = os.path.join(plot_folder, "question22.png")
question3_path = os.path.join(plot_folder, "question3.png")
question41_path = os.path.join(plot_folder, "question41.png")
question42_path = os.path.join(plot_folder, "question42.png")
question43_path = os.path.join(plot_folder, "question43.png")
question5_path = os.path.join(plot_folder, "question5.png")
question61_path = os.path.join(plot_folder, "question61.png")
question62_path = os.path.join(plot_folder, "question62.png")


# Question 1

In [98]:
def generate_page1():

    question1 = "Is there a correlation between wind speed/direction and PM10 levels? Does wind from certain directions bring higher pollution levels?"

    image11 = Image.open(question11_path)
    buf11 = BytesIO()
    image11.save(buf11, format='PNG')
    buf11.seek(0)
    image11_base64 = base64.b64encode(buf11.getvalue()).decode('utf-8')
    chart1 = f'<img src="data:image/png;base64,{image11_base64}" alt="heatmap" style="max-width:100%; height:auto;">'

    image12 = Image.open(question12_path)
    buf12 = BytesIO()
    image12.save(buf12, format='PNG')
    buf12.seek(0)
    image12_base64 = base64.b64encode(buf12.getvalue()).decode('utf-8')
    chart2 = f'<img src="data:image/png;base64,{image12_base64}" alt="bar_chart" style="max-width:100%; height:auto;">'

    buf11.close()
    buf12.close()
    
    comment1 = """
<h4 style="font-size:20px; color:#004d99;">From the heatmap, the following insights can be drawn:</h4>

<p style="font-size:18px; line-height:1.6;">
- <b>Wind Speed and PM10 Levels:</b> There is an observable trend where higher wind speeds (on the right side of the x-axis) generally correspond to lower average PM10 concentrations. This suggests that as wind speed increases, it helps disperse particulate matter, leading to a reduction in pollution levels in the area.<br>
</p>

<p style="font-size:18px; line-height:1.6;">
- <b>Wind Direction and PM10 Levels:</b> Certain wind directions are associated with higher PM10 levels. For instance:<br>
&nbsp;&nbsp;&nbsp;&nbsp;- Wind directions in the range of <b>0°-120°</b> (towards the top of the y-axis), combined with lower wind speeds, are linked to elevated PM10 concentrations.<br>
&nbsp;&nbsp;&nbsp;&nbsp;- Wind directions between <b>270°-300°</b> also show slightly increased PM10 levels, though less prominent than the 0°-120° range.<br>
</p>

<h4 style="font-size:20px; color:#004d99;">Summary:</h4>
<p style="font-size:18px; line-height:1.6;">
Winds coming from specific directions (<b>0°-120°</b> and to a lesser extent, <b>270°-300°</b>) are more likely to bring higher levels of particulate pollution, particularly when wind speeds are low. Both wind speed and direction influence PM10 levels, with lower wind speeds and certain wind directions being correlated with higher pollution.
</p>
"""

    comment2 = """
<h4 style="font-size:20px; color:#004d99;">The wind direction is categorized into bins corresponding to compass directions (e.g., N, NE, etc.).</h4>

<h5 style="font-size:18px; color:#333366;">From the bar chart, we can observe the following:</h5>
<p style="font-size:18px; line-height:1.6;">
- <b>High PM10 Levels from West and Northwest:</b> The highest average PM10 concentrations are observed when the wind comes from the <b>West (W)</b> and <b>Northwest (NW)</b> directions, with averages of <b>41.5 µg/m³</b> and <b>45.9 µg/m³</b>, respectively. This suggests that winds from these directions are associated with increased PM10 levels.<br>
</p>

<p style="font-size:18px; line-height:1.6;">
- <b>Moderate PM10 Levels from North and South:</b> Winds from the <b>North (N)</b> and <b>South (S)</b> also show relatively high PM10 levels, around <b>39.7 µg/m³</b> and <b>36.3 µg/m³</b>, respectively. These directions may contribute to moderate pollution levels.<br>
</p>

<p style="font-size:18px; line-height:1.6;">
- <b>Lower PM10 Levels from East and Southeast:</b> Winds from the <b>East (E)</b> and <b>Southeast (SE)</b> bring in lower PM10 levels, averaging <b>28.6 µg/m³</b> and <b>33.1 µg/m³</b>. This could indicate fewer pollution sources or better pollutant dispersion.
</p>

<h4 style="font-size:20px; color:#004d99;">Summary:</h4>
<p style="font-size:18px; line-height:1.6;">
Winds from the <b>West</b> and <b>Northwest</b> bring the highest PM10 pollution levels. In contrast, winds from the <b>East</b> and <b>Southeast</b> are associated with cleaner air, possibly due to fewer pollution sources or more effective pollutant dispersion in those directions.
</p>
"""

    return question1, chart1, comment1, chart2, comment2

def web_question1():

    question, chart1, comment1, chart2, comment2 = generate_page1()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 1: {question}</h2>""")
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")
    
    gr.HTML(chart1)
    gr.Markdown(comment1)

    gr.HTML(chart2)
    gr.Markdown(comment2)



# Question 2

In [97]:
def generate_page2():

    question2 = "How do extreme weather events (identified by weather_status) affect air quality parameters, and what are the lag effects on pollutant concentrations?"

    image21 = Image.open(question21_path)
    buf21 = BytesIO()
    image21.save(buf21, format='PNG')
    buf21.seek(0)
    image21_base64 = base64.b64encode(buf21.getvalue()).decode('utf-8')
    chart1 = f'<img src="data:image/png;base64,{image21_base64}" alt="heatmap" style="max-width:100%; height:auto;">'

    image22 = Image.open(question22_path)
    buf22 = BytesIO()
    image22.save(buf22, format='PNG')
    buf22.seek(0)
    image22_base64 = base64.b64encode(buf22.getvalue()).decode('utf-8')
    chart2 = f'<img src="data:image/png;base64,{image22_base64}" alt="bar_chart" style="max-width:100%; height:auto;">'
    
    buf21.close()
    buf22.close()
    
    comment = """
<h4 style="font-size:20px; color:#004d99;">Based on the analysis of extreme weather effects on air quality parameters, we observe:</h4>

<h5 style="font-size:18px; color:#333366;">1. Pollutant Concentration Changes:</h5>
<p style="font-size:18px; line-height:1.6;">
- <b>SO₂ and O₃:</b> These pollutants show the most significant increases during extreme weather (+16.69% and +17.25% respectively, <i>p</i>&lt;0.001).<br>
- <b>Carbon Monoxide (CO):</b> Uniquely, CO shows a decrease (-4.40%), suggesting possible washout effects.<br>
- <b>PM10 and PM2.5:</b> These show slight increases (3.14% and 4.95%) but are not statistically significant.<br>
- Most pollutants show lower variability during extreme weather, indicating more stable concentrations.
</p>

<h5 style="font-size:18px; color:#333366;">2. Lag Effects After Weather Events:</h5>
<p style="font-size:18px; line-height:1.6;">
- Peak concentrations typically occur within the first <b>1000 hours</b> post-extreme weather.<br>
- Different pollutants show varying recovery patterns:<br>
&nbsp;&nbsp;&nbsp;&nbsp;- <b>CO:</b> Stabilizes quickly after events.<br>
&nbsp;&nbsp;&nbsp;&nbsp;- <b>PM10 and PM2.5:</b> Gradually stabilize over <b>4000 hours</b>.<br>
&nbsp;&nbsp;&nbsp;&nbsp;- <b>O₃:</b> Demonstrates the most pronounced and extended fluctuations.<br>
- 24-hour rolling averages show more stable trends compared to shorter timeframes.
</p>

<h4 style="font-size:20px; color:#004d99;">Summary:</h4>
<p style="font-size:18px; line-height:1.6;">
Extreme weather events have varying impacts on different pollutants, with <b>SO₂ and O₃</b> being most significantly affected. The lag effects persist for considerable periods (<b>1000-4000 hours</b>), suggesting the need for <b>extended monitoring and management strategies</b> post-extreme weather events. This information is crucial for public health response planning and air quality management during and after extreme weather conditions.
</p>
"""
    
    return question2, chart1, chart2, comment

def web_question2():

    question, chart1, chart2, comment = generate_page2()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 2: {question}</h2>""")
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")
    
    gr.HTML(chart1)
    gr.HTML(chart2)

    gr.Markdown(comment)



# Question 3

In [90]:
def generate_page3():
    question3 = "Are there distinct seasonal or monthly patterns in air quality metrics?"
    
    image3 = Image.open(question3_path)
    buf3 = BytesIO()
    image3.save(buf3, format='PNG')
    buf3.seek(0)
    image3_base64 = base64.b64encode(buf3.getvalue()).decode('utf-8')
    chart1 = f'<img src="data:image/png;base64,{image3_base64}" alt="heatmap" style="max-width:100%; height:auto;">'

    buf3.close()
    
    comment = """
<h4 style="font-size:20px; color:#004d99;">Based on the line chart, we can observe the following:</h4>

<p style="font-size:18px; line-height:1.6;">
- <b>Carbon Monoxide (CO):</b> This pollutant stands out with significantly higher concentrations than the others, fluctuating around <b>300–700 µg/m³</b>. It shows a noticeable seasonal trend, with peaks around the last months of the year and lower concentrations in early summer. This suggests that CO levels might be influenced by weather conditions or emissions patterns, possibly from traffic or industrial activities.
</p>

<p style="font-size:18px; line-height:1.6;">
- <b>Other Pollutants:</b> The rest of the pollutants (PM10, PM2.5, NO₂, SO₂, O₃, and US AQI) have relatively low concentrations compared to CO, all staying below <b>100 µg/m³</b>. These pollutants exhibit smaller fluctuations and are generally stable throughout the year.
</p>

<h4 style="font-size:20px; color:#004d99;">Summary:</h4>
<p style="font-size:18px; line-height:1.6;">
Carbon Monoxide levels appear to be the primary concern due to their high concentration and seasonal spikes, while other pollutants remain relatively low and stable across the months. This could indicate targeted issues related to CO pollution sources, potentially suggesting an area for further investigation or intervention.
</p>
"""
    
    return question3, chart1, comment

def web_question3():

    question, chart1, comment = generate_page3()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 3: {question}</h2>""")
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")
    gr.HTML(chart1) 
    gr.Markdown(comment)


# Question 4

In [96]:
def generate_page4():
    
    question4 = "What is the relationship between precipitation and air quality? Does rainfall help reduce pollutant concentrations, and if so, to what extent?"

    image41 = Image.open(question41_path)
    buf41 = BytesIO()
    image41.save(buf41, format='PNG')
    buf41.seek(0)
    image41_base64 = base64.b64encode(buf41.getvalue()).decode('utf-8')
    chart1 = f'<img src="data:image/png;base64,{image41_base64}" alt="health_effects_chart">'
    image42 = Image.open(question42_path)
    buf42 = BytesIO()
    image42.save(buf42, format='PNG')
    buf42.seek(0)
    image42_base64 = base64.b64encode(buf42.getvalue()).decode('utf-8')

    chart2 = f'<img src="data:image/png;base64,{image42_base64}" alt="health_impacts">'
    image43 = Image.open(question43_path)
    buf43 = BytesIO()
    image43.save(buf43, format='PNG')
    buf43.seek(0)
    image43_base64 = base64.b64encode(buf43.getvalue()).decode('utf-8')
    chart3 = f'<img src="data:image/png;base64,{image43_base64}" alt="pollutant_levels_health">'

    buf41.close()
    buf42.close()
    buf43.close()

    comment1 = """
<h4 style="font-size:20px; color:#004d99;">Based on the correlation values between precipitation and pollutant levels in the charts:</h4>
<ul style="font-size:18px; line-height:1.6;">
    <li><b>PM10 and PM2.5:</b> Weak negative correlation (-0.012 and -0.0056), suggesting minimal reduction with rainfall. There are noticeable peaks in PM10 and PM2.5 concentrations at lower precipitation values (around 0–5 mm). This might indicate that low or no rainfall correlates with higher pollutant concentrations, possibly due to stagnant air conditions allowing pollutants to accumulate.</li>
    <li><b>US AQI:</b> Slight positive correlation (0.011), suggesting rainfall may not lower AQI. The US AQI line shows significant variability, with sharp peaks and dips even at lower precipitation levels. This could indicate that air quality is influenced by other environmental factors beyond just precipitation, such as wind speed, temperature, or specific weather events.</li>
</ul>

<h4 style="font-size:20px; color:#004d99;">Summary:</h4>
<p style="font-size:18px; line-height:1.6;">
Rainfall shows minimal impact on pollutant levels.
</p>
"""

    comment2 = """ 
<h4 style="font-size:20px; color:#004d99;">Based on the Average pollutant levels in the chart:</h4>
<ul style="font-size:18px; line-height:1.6;">
    <li><b>PM10:</b> lower during "Rain" (33.36 µg/m³) compared to "No Rain" (35.32 µg/m³), suggesting a slight reduction in particulate matter during rainfall.</li>
    <li><b>PM2.5:</b> lower during "Rain" (22.63 µg/m³) compared to "No Rain" (23.61 µg/m³), indicating that rainfall might help reduce smaller particulates as well.</li>
    <li><b>US AQI:</b> remains relatively stable across different rainfall events, with only minor fluctuations. This implies that rainfall does not significantly influence overall air quality (as represented by AQI).</li>
</ul>

<h4 style="font-size:20px; color:#004d99;">Summary:</h4>
<p style="font-size:18px; line-height:1.6;">
Rainfall seems to contribute slightly to the reduction of particulate pollutants (PM10 and PM2.5), but the effect is relatively minor and not strong enough to significantly impact the overall air quality (as reflected by AQI). This finding implies that while rain can help reduce pollutant levels, it may not be sufficient to substantially improve air quality on its own.
</p>
"""
    return question4, chart1, chart2, chart3, comment1, comment2

def web_question4():

    question, chart1, chart2, chart3, comment1, comment2 = generate_page4()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 4: {question}</h2>""")
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")
    
    gr.HTML(chart1)
    gr.HTML(chart2)
    gr.Markdown(comment1)

    gr.HTML(chart3)
    gr.Markdown(comment2)


# Question 5

In [101]:
def generate_page5():
    question5 = "Are there specific times of day (morning, afternoon, evening) when pollution levels tend to be higher?"
    
    image5 = Image.open(question5_path)
    buf5 = BytesIO()
    image5.save(buf5, format='PNG')
    buf5.seek(0)
    image5_base64 = base64.b64encode(buf5.getvalue()).decode('utf-8')
    chart1 = f'<img src="data:image/png;base64,{image5_base64}" alt="pollution_by_time_of_day">'

    buf5.close()
    
    comment = """
<h4 style="font-size:20px; color:#004d99;">Based on the chart, we can observe the following:</h4>

<p style="font-size:18px; line-height:1.6;">
- <b>Ozone Levels:</b> Ozone concentrations are highest in the morning (<b>95.8 µg/m³</b>) and significantly decrease in the afternoon and evening (<b>31.2 µg/m³</b> and <b>37.6 µg/m³</b>, respectively). This trend could be influenced by sunlight and cloud cover variations throughout the day. Ozone formation is typically affected by sunlight, so lower afternoon and evening values may reflect cloud cover or reduced sunlight intensity as the day progresses.
</p>

<p style="font-size:18px; line-height:1.6;">
- <b>PM10 and PM2.5:</b> PM10 and PM2.5 levels are somewhat consistent across different times of day, with slightly higher concentrations in the afternoon. This pattern suggests that these particulate pollutants might be less sensitive to sunlight or cloud cover and more affected by traffic or industrial activities, which remain steady throughout the day.
</p>

<h4 style="font-size:20px; color:#004d99;">Summary:</h4>
<p style="font-size:18px; line-height:1.6;">
In summary, ozone levels peak in the morning, likely due to favorable conditions for ozone formation, such as sunlight availability and less cloud cover, while PM10 and PM2.5 are consistent with slightly higher concentrations in the afternoon.
</p>
"""
    
    return question5, chart1, comment

def web_question5():
    question, chart1, comment = generate_page5()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 5: {question}</h2>""")
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")
    gr.HTML(chart1) 
    gr.Markdown(comment)


# Question 6

In [108]:
def generate_page6():
    question6 = "How do temperature patterns (temperature_2m, apparent_temperature) vary across different time periods (daily, monthly)? Are there significant anomalies in temperature trends?"
    
    image61 = Image.open(question61_path)
    buf61 = BytesIO()
    image61.save(buf61, format='PNG')
    buf61.seek(0)
    image61_base64 = base64.b64encode(buf61.getvalue()).decode('utf-8')
    chart1 = f'<img src="data:image/png;base64,{image61_base64}" alt="temperature_daily_pattern">'

    image62 = Image.open(question62_path)
    buf62 = BytesIO()
    image62.save(buf62, format='PNG')
    buf62.seek(0)
    image62_base64 = base64.b64encode(buf62.getvalue()).decode('utf-8')
    chart2 = f'<img src="data:image/png;base64,{image62_base64}" alt="temperature_monthly_pattern">'

    buf61.close()
    buf62.close()

    comment = """
<h4 style="font-size:20px; color:#004d99;">Based on the temperature patterns across different time periods:</h4>

<p style="font-size:18px; line-height:1.6;">
- <b>Daily Pattern:</b> 
    <ul>
        <li>Temperature fluctuates between <b>24°C-32°C</b>.</li>
        <li>Significant peak in <b>April-May 2024</b> (above <b>32°C</b>).</li>
        <li>Most notable anomalies in <b>early May 2024</b> (+<b>4.5°C</b> above normal).</li>
        <li>Shows clear cyclical pattern with <b>summer peaks</b> and <b>winter troughs</b>.</li>
    </ul>
</p>

<p style="font-size:18px; line-height:1.6;">
- <b>Monthly Pattern:</b>
    <ul>
        <li><b>April</b> has the highest mean temperature (<b>30.36°C</b>).</li>
        <li><b>December-February</b> show the greatest temperature variability.</li>
        <li><b>Warmest period:</b> March-May (apparent temps up to <b>40.86°C</b>).</li>
        <li><b>Coolest period:</b> July-September (mins around <b>23.7°C</b>).</li>
    </ul>
</p>

<h4 style="font-size:20px; color:#004d99;">Summary:</h4>
<p style="font-size:18px; line-height:1.6;">
The analysis reveals a climate with distinct monthly variations and significant heat stress potential. While actual temperatures show moderate fluctuations, apparent temperatures can exceed actual temperatures by up to <b>10°C</b> during peak 4-5-6 months, indicating significant heat stress conditions. The presence of strong daily cycles and monthly patterns, combined with notable anomalies in early 2024, suggests a changing temperature regime that could have important implications for urban planning and public health considerations.
</p>
"""
    
    return question6, chart1, chart2, comment

def web_question6():
    question, chart1, chart2, comment = generate_page6()

    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Question 6: {question}</h2>""")
    gr.Markdown(f"""<h3 style="font-size:24px; color:#333333;">Answer:</h3>""")
    gr.HTML(chart1) 
    gr.HTML(chart2)
    gr.Markdown(comment)


# Classification model

In [71]:
model_folder = '../model'

model_path = os.path.join(model_folder, 'classification.pkl')
scaler_path = os.path.join(model_folder, 'scaler.pkl')
label_encoder_path = os.path.join(model_folder, 'label_encoder.pkl')

loaded_model = joblib.load(model_path)
loaded_scaler = joblib.load(scaler_path)
label_encoder = joblib.load(label_encoder_path)


def predict_pipeline(precipitation, cloud_cover, wind_direction_10m, pm10, pm2_5,
                     carbon_monoxide, nitrogen_dioxide, sulphur_dioxide, ozone, us_aqi):
    # Prepare input data as a DataFrame
    input_data = {
        'precipitation': [precipitation],
        'cloud_cover': [cloud_cover],
        'wind_direction_10m': [wind_direction_10m],
        'pm10': [pm10],
        'pm2_5': [pm2_5],
        'carbon_monoxide': [carbon_monoxide],
        'nitrogen_dioxide': [nitrogen_dioxide],
        'sulphur_dioxide': [sulphur_dioxide],
        'ozone': [ozone],
        'us_aqi': [us_aqi]
    }
    input_df = pd.DataFrame(input_data)
    input_df = loaded_scaler.transform(input_df)
    # Make prediction
    prediction = loaded_model.predict(input_df)

    # Return the decoded label
    return label_encoder.inverse_transform(prediction)[0]

# demo.launch()

In [109]:
def set_example_values():
    return [10, 50, 180, 25, 15, 0.4, 0.02, 0.01, 0.03, 50]  # Replace with realistic example data
   
def classifier_tab():
    gr.Markdown(f"""<h2 style="font-size:28px; color:#333366;">Weather Status Prediction</h2>""")     
    gr.Markdown("Enter air quality and weather parameters to predict the weather status.")
    inputs = [
        gr.Number(label="Precipitation (mm)", info="Rainfall amount in millimeters"),
        gr.Number(label="Cloud Cover (%)", info="Percentage of cloud cover"),
        gr.Number(label="Wind Direction (°)", info="Direction of wind in degrees"),
        gr.Number(label="PM10 (µg/m³)", info="Particulate matter (10 microns)"),
        gr.Number(label="PM2.5 (µg/m³)", info="Particulate matter (2.5 microns)"),
        gr.Number(label="Carbon Monoxide (ppm)", info="Concentration of CO"),
        gr.Number(label="Nitrogen Dioxide (ppm)", info="Concentration of NO2"),
        gr.Number(label="Sulphur Dioxide (ppm)", info="Concentration of SO2"),
        gr.Number(label="Ozone (ppm)", info="Concentration of O3"),
        gr.Number(label="US AQI", info="Air Quality Index")
    ]
    output_box = gr.Textbox(label="Predicted Weather Status", lines=2, placeholder="Prediction result will appear here...")
    example_button = gr.Button("Try Example")
    predict_button = gr.Button("Predict")

    predict_button.click(fn=predict_pipeline, inputs=inputs, outputs=output_box)
    example_button.click(fn=set_example_values, inputs=[], outputs=inputs)

In [110]:
with gr.Blocks() as demo:

    gr.Markdown("""
    <h1 style="font-size:36px; text-align:center; color:#004d99; margin-bottom:20px;">
        Capydata's Data Science Website: Weather and Air Quality Analysis
    </h1>
    """)
    
    with gr.Tab("Question 1"):
        web_question1()
        
    with gr.Tab("Question 2"):
        web_question2()
        
    with gr.Tab("Question 3"):
        web_question3()
        
    with gr.Tab("Question 4"):
        web_question4() 
    
    with gr.Tab("Question 5"):
        web_question5()

    with gr.Tab("Question 6"):
        web_question6()
    
    with gr.Tab("Weather Prediction"):
        classifier_tab()


In [111]:
demo.close()
demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7883
* Running on public URL: https://9adb0d9ab3bb511913.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




