In [None]:
# Weather Data Visualizer — Summary

## Dataset
- Source: <where you downloaded>
- Period: YYYY-MM-DD to YYYY-MM-DD
- Columns used: date, temp_avg, temp_min, temp_max, rain_mm, humidity

## Key Findings
- Average monthly temperature peaks in <Month> with mean ~X°C.
- Monthly rainfall highest in <Month> (total ~Y mm).
- Humidity correlates negatively/positively with temperature (r = 0.XX).
- Notable anomalies: heavy rain on <date>, sudden temp drop on <date>.

## Visualizations
- `daily_temp.png` — daily temperature trend; shows seasonality.
- `monthly_rainfall.png` — monthly rainfall totals.
- `humidity_vs_temp.png` — scatter plot for humidity vs temperature.
- `combined_figure.png` — combined view for quick story.

## Methods
- Missing values were forward/back-filled; numeric NaNs replaced with medians.
- Dates converted to datetime and data resampled monthly using pandas.

## How to run
1. Open `notebooks/weather_analysis.ipynb` and run cells (ensure `data/raw_weather.csv` present).
2. Or run provided Python scripts from `src/` if you export them.

## Conclusions & Suggestions
- [Write two short actionable insights — e.g., plant trees around campus for hot months, schedule rainwater harvesting for monsoon months.]

## Citation
- Data source: <link or name>


In [None]:
# Group by month name
df['month'] = df.index.month
monthly = df.groupby(df.index.month).agg({'temp_avg':'mean','rain_mm':'sum','humidity':'mean'}).sort_index()
monthly.index = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'][:len(monthly)]
display(monthly)

# Seasonal (DJF, MAM, JJA, SON) approach
def season(month):
    if month in (12,1,2): return 'Winter'
    if month in (3,4,5): return 'Spring'
    if month in (6,7,8): return 'Summer'
    return 'Autumn'

df['season'] = df.index.month.map(season)
seasonal = df.groupby('season').agg({'temp_avg':'mean','rain_mm':'sum','humidity':'mean'})
display(seasonal)


In [None]:
import numpy as np
# daily stats already in rows if CSV is daily
monthly_stats = df.resample('M').agg({
    'temp_avg':['mean','min','max','std'],
    'rain_mm':['sum','mean'],
    'humidity':['mean','std']
})
display(monthly_stats.head())
monthly_stats.to_csv('../outputs/monthly_stats.csv')


In [None]:
daily_temperature_plot(df, temp_col='temp_avg', out_path=OUT_FIG/'daily_temp.png')
monthly_rainfall_bar(df, rain_col='rain_mm', out_path=OUT_FIG/'monthly_rainfall.png')
humidity_vs_temp_scatter(df, temp_col='temp_avg', hum_col='humidity', out_path=OUT_FIG/'humidity_vs_temp.png')
combined_figure(df, temp_col='temp_avg', rain_col='rain_mm', out_path=OUT_FIG/'combined_figure.png')


In [None]:
df_raw.head()
df = prepare_datetime(df_raw, date_col='date')  # or whatever your date column name is
cols = ['temp_avg','temp_min','temp_max','rain_mm','humidity']  # update to your dataset's columns
df = clean_df(df, cols_keep=cols)
save_cleaned(df, '../outputs/cleaned_weather.csv')


In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.viz_tools import load_data, prepare_datetime, clean_df, daily_temperature_plot, monthly_rainfall_bar, humidity_vs_temp_scatter, combined_figure, save_cleaned

DATA = Path('../data/raw_weather.csv')  # adjust path relative to notebook
OUT_FIG = Path('../outputs/figures')
OUT_FIG.mkdir(parents=True, exist_ok=True)

df_raw = load_data(DATA)
