# Climate Change EDA
This notebook performs data preprocessing and EDA.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from scipy.stats import zscore

# Load dataset
df = pd.read_csv('../data/sample_climate_data.csv')
df['year'] = pd.to_datetime(df['year'], format='%Y')

# Handle missing values (if any)
df.fillna(method='ffill', inplace=True)

# Add new column: decade
df['decade'] = df['year'].dt.year // 10 * 10

# Remove outliers using Z-score
df['z_temp'] = zscore(df['temperature_anomaly'])
df = df[df['z_temp'].abs() < 3]

# Summary statistics
print(df.describe())

# Line plot
sns.set_theme(style='darkgrid')
plt.figure(figsize=(10, 5))
sns.lineplot(data=df, x='year', y='temperature_anomaly')
plt.title('Global Temperature Anomaly Over Time')
plt.xlabel('Year')
plt.ylabel('Temperature Anomaly (°C)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../visuals/temp_anomaly_over_time.png')
plt.show()