# Weather Data Analysis
This notebook performs complete weather data analysis including loading, cleaning, processing, statistics, visualization, grouping, and exporting.

## Task 1: Load CSV and Inspect

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

plt.style.use('default')

df = pd.read_csv('weather.csv')
df.head()

Unnamed: 0,date,Tmax Normal,Tmax,Forecast Tmax,Tmin Normal,Tmin,Forecast Tmin
0,21 Nov,,28.6,,,15.8,
1,22 Nov,,27.9,,,16.3,
2,23 Nov,,27.1,,,14.5,
3,24 Nov,,27.1,,,,
4,25 Nov,,26.2,,,13.2,


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   date           9 non-null      object 
 1   Tmax Normal    0 non-null      float64
 2   Tmax           7 non-null      float64
 3   Forecast Tmax  2 non-null      float64
 4   Tmin Normal    0 non-null      float64
 5   Tmin           6 non-null      float64
 6   Forecast Tmin  1 non-null      float64
dtypes: float64(6), object(1)
memory usage: 636.0+ bytes


## Task 2: Clean & Process Data

In [3]:
# ---- Detect date column ----
date_candidates = [
    c for c in df.columns 
    if any(k in c.lower() for k in ['date', 'day', 'time', 'timestamp'])
]

if date_candidates:
    date_col = date_candidates[0]
else:
    # Try parsing each column to find a valid datetime
    date_col = None
    for col in df.columns:
        try:
            converted = pd.to_datetime(df[col], errors='raise')
            date_col = col
            break
        except:
            pass

if not date_col:
    raise ValueError("No date column found in the dataset.")

# ---- Convert and standardize date ----
df['date'] = pd.to_datetime(df[date_col], errors='coerce')
df = df.dropna(subset=['date'])

# ---- Normalize numeric columns ----
numeric_map = {}
for col in df.columns:
    if any(k in col.lower() for k in ['temp', 'humidity', 'rain', 'precip']):
        numeric_map[col] = pd.to_numeric(df[col], errors='coerce')

for col, series in numeric_map.items():
    df[col] = series.fillna(series.mean())

# ---- Final sorting ----
df = df.sort_values('date').reset_index(drop=True)
df.head()


  df['date'] = pd.to_datetime(df[date_col], errors='coerce')


Unnamed: 0,date,Tmax Normal,Tmax,Forecast Tmax,Tmin Normal,Tmin,Forecast Tmin


## Task 3: NumPy Statistical Analysis

In [4]:
stats = {}

for col in numeric_map.keys():
    arr = df[col].values
    stats[col] = {
        'mean': np.mean(arr),
        'min': np.min(arr),
        'max': np.max(arr),
        'std': np.std(arr)
    }

stats

{}

## Task 4: Visualization with Matplotlib

In [5]:
if 'temp' in ''.join(df.columns).lower():
    temp_col = [c for c in df.columns if 'temp' in c.lower()][0]

    plt.figure(figsize=(10,5))
    plt.plot(df['date'], df[temp_col])
    plt.title('Daily Temperature Trend')
    plt.xlabel('Date')
    plt.ylabel('Temperature')
    plt.savefig('temperature_trend.png')
    plt.show()

In [6]:
rain_candidates = [c for c in df.columns if 'rain' in c.lower() or 'precip' in c.lower()]
if rain_candidates:
    rain_col = rain_candidates[0]
    df['month'] = df['date'].dt.to_period('M')
    monthly_rain = df.groupby('month')[rain_col].sum()

    plt.figure(figsize=(10,5))
    monthly_rain.plot(kind='bar')
    plt.title('Monthly Rainfall Total')
    plt.xlabel('Month')
    plt.ylabel('Rainfall')
    plt.savefig('monthly_rainfall.png')
    plt.show()

In [7]:
hum_candidates = [c for c in df.columns if 'hum' in c.lower()]
if hum_candidates and 'temp_col' in locals():
    hum_col = hum_candidates[0]

    plt.figure(figsize=(7,5))
    plt.scatter(df[temp_col], df[hum_col])
    plt.title('Humidity vs Temperature')
    plt.xlabel('Temperature')
    plt.ylabel('Humidity')
    plt.savefig('humidity_vs_temperature.png')
    plt.show()

## Task 5: Grouping & Aggregation

In [8]:
df['year'] = df['date'].dt.year
yearly_stats = df.groupby('year')[list(numeric_map.keys())].mean()
yearly_stats

## Task 6: Export Clean Data & Report

In [9]:
# === Export Cleaned Data ===
df.to_csv("weather_cleaned.csv", index=False)
print("✔ Cleaned CSV saved as weather_cleaned.csv")


# === Save All Plots ===

# Temperature Trend
if 'temp' in ''.join(df.columns).lower():
    temp_col = [c for c in df.columns if 'temp' in c.lower()][0]
    
    plt.figure(figsize=(10,5))
    plt.plot(df['date'], df[temp_col])
    plt.title("Daily Temperature Trend")
    plt.xlabel("Date")
    plt.ylabel("Temperature")
    plt.tight_layout()
    plt.savefig("temperature_trend.png")
    plt.close()
    print("✔ temperature_trend.png saved")


# Monthly Rainfall Plot
rain_candidates = [c for c in df.columns if 'rain' in c.lower() or 'precip' in c.lower()]
if rain_candidates:
    rain_col = rain_candidates[0]
    df['month'] = df['date'].dt.to_period('M')
    monthly_rain = df.groupby('month')[rain_col].sum()

    plt.figure(figsize=(10,5))
    monthly_rain.plot(kind='bar')
    plt.title("Monthly Rainfall Total")
    plt.xlabel("Month")
    plt.ylabel("Rainfall")
    plt.tight_layout()
    plt.savefig("monthly_rainfall.png")
    plt.close()
    print("✔ monthly_rainfall.png saved")


# Humidity vs Temperature Scatter Plot
hum_candidates = [c for c in df.columns if 'hum' in c.lower()]
if hum_candidates and 'temp_col' in locals():
    hum_col = hum_candidates[0]

    plt.figure(figsize=(8,6))
    plt.scatter(df[temp_col], df[hum_col])
    plt.title("Humidity vs Temperature")
    plt.xlabel("Temperature")
    plt.ylabel("Humidity")
    plt.tight_layout()
    plt.savefig("humidity_vs_temp.png")
    plt.close()
    print("✔ humidity_vs_temp.png saved")


# === Write Markdown Report ===

report = "# Weather Data Analysis Report\n"

report += "\n## Overview\n"
report += "This report summarizes the cleaned weather dataset, including temperature, rainfall, and humidity patterns.\n"

report += "\n## Key Statistics\n"
for col in numeric_map.keys():
    arr = df[col].values
    report += f"\n### {col.capitalize()}\n"
    report += f"- Mean: {np.mean(arr):.2f}\n"
    report += f"- Min: {np.min(arr):.2f}\n"
    report += f"- Max: {np.max(arr):.2f}\n"
    report += f"- Std Dev: {np.std(arr):.2f}\n"

report += "\n## Visual Insights\n"
report += "- **Daily Temperature Trend:** Shows how temperature fluctuates over time.\n"
report += "- **Monthly Rainfall Total:** Indicates rainfall patterns across different months.\n"
report += "- **Humidity vs Temperature:** Helps reveal correlation between humidity and heat.\n"

report += "\n## Conclusion\n"
report += "The dataset provides meaningful insights into weather behavior, useful for forecasting, environmental analysis, and seasonal trend understanding.\n"

with open("weather_report.md", "w") as f:
    f.write(report)

print("✔ weather_report.md created successfully")

report

✔ Cleaned CSV saved as weather_cleaned.csv
✔ weather_report.md created successfully


'# Weather Data Analysis Report\n\n## Overview\nThis report summarizes the cleaned weather dataset, including temperature, rainfall, and humidity patterns.\n\n## Key Statistics\n\n## Visual Insights\n- **Daily Temperature Trend:** Shows how temperature fluctuates over time.\n- **Monthly Rainfall Total:** Indicates rainfall patterns across different months.\n- **Humidity vs Temperature:** Helps reveal correlation between humidity and heat.\n\n## Conclusion\nThe dataset provides meaningful insights into weather behavior, useful for forecasting, environmental analysis, and seasonal trend understanding.\n'