# üåç Climate Analysis

> **PM Accelerator Mission**: "By making industry-leading tools and education available to individuals from all backgrounds, we level the playing field for future PM leaders."

---

## Objectives
1. **Long-term Climate Patterns**: Analyze temperature trends over time
2. **Regional Variations**: Compare climate across different regions and zones
3. **Seasonal Analysis**: Study seasonal patterns and variations

In [1]:
# Import Libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Libraries loaded!")

‚úÖ Libraries loaded!


In [2]:
# Load CLEANED data
df = pd.read_csv("../data/weather_cleaned.csv", parse_dates=['last_updated'])
df['date'] = df['last_updated'].dt.date
df['month'] = df['last_updated'].dt.month
df['year'] = df['last_updated'].dt.year

print(f"üìä Dataset: {len(df):,} records, {df['country'].nunique()} countries")

üìä Dataset: 114,203 records, 204 countries


## 1. Climate Zone Classification

In [3]:
# Define climate zones based on latitude
def get_climate_zone(lat):
    lat = abs(lat)
    if lat < 23.5:
        return 'Tropical'
    elif lat < 35:
        return 'Subtropical'
    elif lat < 55:
        return 'Temperate'
    elif lat < 66.5:
        return 'Subarctic'
    else:
        return 'Polar'

def get_hemisphere(lat):
    return 'Northern' if lat >= 0 else 'Southern'

df['climate_zone'] = df['latitude'].apply(get_climate_zone)
df['hemisphere'] = df['latitude'].apply(get_hemisphere)

# Climate zone distribution
zone_counts = df['climate_zone'].value_counts()
fig = px.pie(values=zone_counts.values, names=zone_counts.index,
             title='üåç Data Distribution by Climate Zone',
             color_discrete_sequence=px.colors.qualitative.Set2)
fig.update_layout(template='plotly_dark')
fig.show()

## 2. Long-term Temperature Trends

In [4]:
# Monthly average temperature by climate zone
monthly_by_zone = df.groupby(['climate_zone', 'month'])['temperature_celsius'].mean().reset_index()

fig = px.line(monthly_by_zone, x='month', y='temperature_celsius', color='climate_zone',
              title='üå°Ô∏è Monthly Temperature Patterns by Climate Zone',
              labels={'temperature_celsius': 'Temperature (¬∞C)', 'month': 'Month'})
fig.update_layout(template='plotly_dark', height=500)
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 13)),
                 ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
fig.show()

In [5]:
# Temperature statistics by climate zone
zone_stats = df.groupby('climate_zone')['temperature_celsius'].agg(['mean', 'std', 'min', 'max']).round(2)
zone_stats.columns = ['Mean (¬∞C)', 'Std Dev', 'Min (¬∞C)', 'Max (¬∞C)']
zone_stats

Unnamed: 0_level_0,Mean (¬∞C),Std Dev,Min (¬∞C),Max (¬∞C)
climate_zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Subarctic,10.78,8.29,-15.7,32.3
Subtropical,25.04,9.27,-3.3,49.2
Temperate,16.8,10.32,-24.9,43.2
Tropical,25.73,4.8,-0.9,47.1


## 3. Hemisphere Comparison

In [6]:
# Hemisphere seasonal comparison
hemi_monthly = df.groupby(['hemisphere', 'month'])['temperature_celsius'].mean().reset_index()

fig = px.line(hemi_monthly, x='month', y='temperature_celsius', color='hemisphere',
              title='üåç Northern vs Southern Hemisphere - Seasonal Patterns',
              labels={'temperature_celsius': 'Temperature (¬∞C)', 'month': 'Month'})
fig.update_layout(template='plotly_dark', height=500)
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 13)),
                 ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
fig.show()

In [7]:
# Temperature variability by hemisphere
fig = go.Figure()
for hemi in ['Northern', 'Southern']:
    data = df[df['hemisphere'] == hemi]['temperature_celsius']
    fig.add_trace(go.Box(y=data, name=hemi))

fig.update_layout(
    title='üìä Temperature Distribution by Hemisphere',
    yaxis_title='Temperature (¬∞C)',
    template='plotly_dark',
    height=500
)
fig.show()

## 4. Regional Temperature Heatmap

In [8]:
# Top 20 countries by data volume
top_countries = df['country'].value_counts().head(20).index.tolist()
df_top = df[df['country'].isin(top_countries)]

# Create heatmap data
heatmap_data = df_top.pivot_table(
    values='temperature_celsius', 
    index='country', 
    columns='month', 
    aggfunc='mean'
).round(1)

fig = px.imshow(heatmap_data, 
                labels=dict(x='Month', y='Country', color='Temp (¬∞C)'),
                title='üå°Ô∏è Monthly Temperature Heatmap - Top 20 Countries',
                color_continuous_scale='RdYlBu_r')
fig.update_layout(template='plotly_dark', height=600)
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 13)),
                 ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
fig.show()

## 5. Seasonal Amplitude Analysis

In [9]:
# Calculate seasonal amplitude (difference between max and min monthly avg)
country_seasonal = df.groupby(['country', 'month'])['temperature_celsius'].mean().reset_index()
amplitude = country_seasonal.groupby('country')['temperature_celsius'].agg(['max', 'min'])
amplitude['amplitude'] = amplitude['max'] - amplitude['min']
amplitude = amplitude.sort_values('amplitude', ascending=False)

# Top 20 countries with highest seasonal variation
top_amplitude = amplitude.head(20)

fig = go.Figure(data=[
    go.Bar(
        x=top_amplitude['amplitude'].values,
        y=top_amplitude.index,
        orientation='h',
        marker_color='#FF6B6B'
    )
])

fig.update_layout(
    title='üå°Ô∏è Countries with Highest Seasonal Temperature Variation',
    xaxis_title='Temperature Amplitude (¬∞C)',
    yaxis_title='Country',
    template='plotly_dark',
    height=600
)
fig.show()

## 6. Climate Insights Summary

In [10]:
print("="*60)
print("üåç CLIMATE ANALYSIS - KEY INSIGHTS")
print("="*60)

print(f"\nüìä Climate Zone Coverage:")
for zone in zone_stats.index:
    print(f"   ‚Ä¢ {zone}: Mean {zone_stats.loc[zone, 'Mean (¬∞C)']}¬∞C")

print(f"\nüå°Ô∏è Temperature Extremes:")
print(f"   ‚Ä¢ Hottest Zone Avg: {zone_stats['Mean (¬∞C)'].idxmax()} ({zone_stats['Mean (¬∞C)'].max()}¬∞C)")
print(f"   ‚Ä¢ Coldest Zone Avg: {zone_stats['Mean (¬∞C)'].idxmin()} ({zone_stats['Mean (¬∞C)'].min()}¬∞C)")

print(f"\nüîÑ Seasonal Variation:")
print(f"   ‚Ä¢ Most Variable: {amplitude['amplitude'].idxmax()} ({amplitude['amplitude'].max():.1f}¬∞C range)")
print(f"   ‚Ä¢ Most Stable: {amplitude['amplitude'].idxmin()} ({amplitude['amplitude'].min():.1f}¬∞C range)")

print("\n" + "="*60)

üåç CLIMATE ANALYSIS - KEY INSIGHTS

üìä Climate Zone Coverage:
   ‚Ä¢ Subarctic: Mean 10.78¬∞C
   ‚Ä¢ Subtropical: Mean 25.04¬∞C
   ‚Ä¢ Temperate: Mean 16.8¬∞C
   ‚Ä¢ Tropical: Mean 25.73¬∞C

üå°Ô∏è Temperature Extremes:
   ‚Ä¢ Hottest Zone Avg: Tropical (25.73¬∞C)
   ‚Ä¢ Coldest Zone Avg: Subarctic (10.78¬∞C)

üîÑ Seasonal Variation:
   ‚Ä¢ Most Variable: Mongolia (41.5¬∞C range)
   ‚Ä¢ Most Stable: Estonie (0.0¬∞C range)

