# üå§Ô∏è Weather Prediction - Data Analysis Notebook
### Complete ML Portfolio Project

This notebook demonstrates a complete machine learning pipeline for weather prediction.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

print('Libraries imported successfully!')

## Load Weather Data

In [None]:
# Load the data
df = pd.read_csv('../data/weather_data.csv', parse_dates=['date'])

print(f'Dataset shape: {df.shape}')
print(f'Date range: {df["date"].min()} to {df["date"].max()}')
print('\nFirst 5 rows:')
df.head()

## Data Visualization

In [None]:
# Plot temperature over time
plt.figure(figsize=(12, 5))
plt.plot(df['date'], df['temperature'], linewidth=1, alpha=0.7)
plt.xlabel('Date')
plt.ylabel('Temperature (¬∞C)')
plt.title('Temperature Over Time')
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Temperature distribution
plt.figure(figsize=(10, 5))
plt.hist(df['temperature'], bins=30, edgecolor='black', alpha=0.7, color='coral')
plt.xlabel('Temperature (¬∞C)')
plt.ylabel('Frequency')
plt.title('Temperature Distribution')
plt.grid(True, alpha=0.3)
plt.show()

## Machine Learning Model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Create features
df['temp_yesterday'] = df['temperature'].shift(1)
df['temp_week_ago'] = df['temperature'].shift(7)
df['target'] = df['temperature'].shift(-1)
df = df.dropna()

# Prepare data
X = df[['temperature', 'temp_yesterday', 'temp_week_ago']]
y = df['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Calculate accuracy
mae = mean_absolute_error(y_test, y_pred)
print(f'Model Mean Absolute Error: {mae:.2f}¬∞C')

## Results Visualization

In [None]:
# Plot predictions
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.6, s=30)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Temperature (¬∞C)')
plt.ylabel('Predicted Temperature (¬∞C)')
plt.title('Model Predictions vs Actual')
plt.grid(True, alpha=0.3)
plt.show()

## Summary

In [None]:
print('='*60)
print('PROJECT SUMMARY')
print('='*60)
print(f'\n‚úÖ Dataset: {len(df)} days of weather data')
print(f'‚úÖ Model: Random Forest Regressor')
print(f'‚úÖ Accuracy: Predictions within {mae:.2f}¬∞C of actual')
print('='*60)