
# Climate Analysis: Vancouver
This notebook analyzes historical climate data for Vancouver, focusing on temperature and precipitation trends, seasonal patterns, and forecasting.

## Objectives
1. Clean and preprocess the data for analysis.
2. Explore trends and seasonal patterns in temperature and precipitation.
3. Perform time-series forecasting using ARIMA and Prophet models.
4. Investigate relationships between additional variables, if available.
5. Generate visualizations for effective interpretation and communication.

---


## Data Cleaning and Preprocessing

In [None]:

# Convert the 'date' column to datetime format
data['date'] = pd.to_datetime(data['date'], errors='coerce')

# Convert all relevant columns to numeric to handle any non-numeric values
for col in data.columns:
    if col.startswith(('min_', 'max_', 'temperature', 'humidity', 'precipitation', 'wind_speed')):
        data[col] = pd.to_numeric(data[col], errors='coerce')

# Calculate averages for min/max pairs
columns_to_drop = []
for col in data.columns:
    if col.startswith('min_') and col.replace('min_', 'max_') in data.columns:
        base_name = col.replace('min_', '')
        data[f'avg_{base_name}'] = (data[col] + data[f'max_{base_name}']) / 2
        columns_to_drop.extend([col, f'max_{base_name}'])

# Drop temperature min/max columns and irrelevant columns
columns_to_drop.extend([col for col in data.columns if col.endswith('_c')])
data.drop(columns=columns_to_drop, inplace=True)

# Drop rows with missing values in key columns
cleaned_data = data.dropna(subset=['date', 'avg_temperature_v', 'precipitation_v']).set_index('date')

# Check cleaned data
cleaned_data.info()
cleaned_data.describe()


## Exploratory Data Analysis (EDA)

In [None]:

# Visualize trends in temperature and precipitation
plt.figure(figsize=(14, 6))
plt.plot(cleaned_data['avg_temperature_v'], label='Average Temperature', alpha=0.8)
plt.plot(cleaned_data['precipitation_v'], label='Precipitation', alpha=0.8)
plt.title('Temperature and Precipitation Trends in Vancouver')
plt.legend()
plt.show()

# Seasonal decomposition of temperature
from statsmodels.tsa.seasonal import seasonal_decompose

decomposition = seasonal_decompose(cleaned_data['avg_temperature_v'], model='additive', period=12)
decomposition.plot()
plt.show()

# Correlation heatmap
corr = cleaned_data.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


## Time-Series Modeling and Forecasting

In [None]:

# ARIMA model for temperature forecasting
from statsmodels.tsa.arima.model import ARIMA

temperature_model = ARIMA(cleaned_data['avg_temperature_v'], order=(1, 1, 1))
temperature_results = temperature_model.fit()

# Forecast future values
forecast_steps = 12  # Forecast for one year
forecast = temperature_results.get_forecast(steps=forecast_steps)
forecast_ci = forecast.conf_int()

# Plot forecast
plt.figure(figsize=(14, 6))
plt.plot(cleaned_data['avg_temperature_v'], label='Observed')
plt.plot(forecast.predicted_mean, label='Forecast', color='red')
plt.fill_between(forecast_ci.index, forecast_ci.iloc[:, 0], forecast_ci.iloc[:, 1], color='pink', alpha=0.3)
plt.title('Temperature Forecast with ARIMA')
plt.legend()
plt.show()


In [None]:

# Prophet model for precipitation forecasting
prophet_data = cleaned_data.reset_index()[['date', 'precipitation_v']]
prophet_data.columns = ['ds', 'y']

model = Prophet()
model.fit(prophet_data)

future = model.make_future_dataframe(periods=12, freq='M')
forecast = model.predict(future)

# Plot forecast
model.plot(forecast)
plt.title('Precipitation Forecast with Prophet')
plt.show()
