# Data Visualization Notebook

This notebook creates visualizations from the processed weather data.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

print("Starting data visualization...")

In [None]:
# Set style
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)

In [None]:
# Ensure directories exist
os.makedirs('data', exist_ok=True)
os.makedirs('output', exist_ok=True)
print("Created output directory for visualizations")

In [None]:
# Load the processed data
input_file = 'data/processed_weather_data.csv'
print(f"Loading data from {input_file}")

try:
    df = pd.read_csv(input_file)
    df['date'] = pd.to_datetime(df['date'])
    print(f"Loaded {len(df)} rows of processed data")
except FileNotFoundError:
    print(f"Error: Could not find {input_file}")
    raise

In [None]:
# Create visualizations
def create_visualizations(df):
    # Time series plot of temperature
    plt.figure(figsize=(12, 6))
    plt.plot(df['date'], df['temperature'], label='Daily Temperature', color='red', alpha=0.7)
    plt.plot(df['date'], df['temperature_7day_avg'], label='7-day Average', color='blue', linewidth=2)
    plt.title('Temperature Over Time')
    plt.xlabel('Date')
    plt.ylabel('Temperature (°C)')
    plt.legend()
    plt.tight_layout()
    plt.savefig('output/temperature_time_series.png')
    plt.close()
    print("Created temperature time series plot")
    
    # Correlation heatmap
    plt.figure(figsize=(10, 8))
    numeric_cols = ['temperature', 'humidity', 'pressure', 
                    'temperature_7day_avg', 'humidity_7day_avg', 'pressure_7day_avg']
    corr = df[numeric_cols].corr()
    sns.heatmap(corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title('Correlation Between Weather Metrics')
    plt.tight_layout()
    plt.savefig('output/correlation_heatmap.png')
    plt.close()
    print("Created correlation heatmap")
    
    # Box plot by day of week
    plt.figure(figsize=(14, 7))
    sns.boxplot(x='day_of_week', y='temperature', data=df, 
                order=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
    plt.title('Temperature Distribution by Day of Week')
    plt.xlabel('Day of Week')
    plt.ylabel('Temperature (°C)')
    plt.tight_layout()
    plt.savefig('output/temperature_by_day.png')
    plt.close()
    print("Created temperature by day of week plot")
    
    # Create a summary report
    with open('output/weather_report.md', 'w') as f:
        f.write('# Weather Data Analysis Report\n\n')
        f.write(f'Analysis date: {pd.Timestamp.now().strftime("%Y-%m-%d")}\n\n')
        f.write(f'Data period: {df["date"].min().strftime("%Y-%m-%d")} to {df["date"].max().strftime("%Y-%m-%d")}\n\n')
        f.write('## Summary Statistics\n\n')
        f.write('### Temperature\n')
        f.write(f'- Average: {df["temperature"].mean():.2f}°C\n')
        f.write(f'- Minimum: {df["temperature"].min():.2f}°C\n')
        f.write(f'- Maximum: {df["temperature"].max():.2f}°C\n\n')
        f.write('### Humidity\n')
        f.write(f'- Average: {df["humidity"].mean():.2f}%\n')
        f.write(f'- Minimum: {df["humidity"].min():.2f}%\n')
        f.write(f'- Maximum: {df["humidity"].max():.2f}%\n\n')
        f.write('### Pressure\n')
        f.write(f'- Average: {df["pressure"].mean():.2f} hPa\n')
        f.write(f'- Minimum: {df["pressure"].min():.2f} hPa\n')
        f.write(f'- Maximum: {df["pressure"].max():.2f} hPa\n\n')
        f.write('## Visualizations\n\n')
        f.write('1. Temperature Over Time (temperature_time_series.png)\n')
        f.write('2. Correlation Between Weather Metrics (correlation_heatmap.png)\n')
        f.write('3. Temperature Distribution by Day of Week (temperature_by_day.png)\n')
    print("Created summary report")

In [None]:
# Create the visualizations
create_visualizations(df)
print("Visualizations created and saved to the output directory")

In [None]:
print("Data visualization complete!")