# Hidden Markov Models for Weather Prediction
## CS280 Project - Real-World HMM Application

**Team Members:** [Your Name Here]  
**Date:** Fall 2025  
**Course:** SMU-MedTech CS280

---

### Project Overview
This notebook implements a complete Hidden Markov Model for weather prediction, demonstrating:
- From-scratch implementation of HMM algorithms
- Weather data simulation and modeling
- Model training using EM/Baum-Welch algorithm
- Inference using Forward and Viterbi algorithms
- Forecasting capabilities
- Comprehensive evaluation and visualization


## 1. Introduction & Problem Definition

### Use Case: Weather Prediction Using HMM

Weather prediction is a classic sequential modeling problem that naturally fits the HMM framework:

**Hidden States:** Weather conditions (Sunny, Cloudy, Rainy)  
**Observations:** Sensor readings (temperature, humidity, umbrella usage)  
**Goal:** Predict future weather states and observations based on historical sensor data

### Why HMM for Weather?
- Weather states are not directly observable but influence sensor readings
- Weather patterns follow temporal dependencies (Markov assumption)
- Multiple observation types provide rich information about hidden states
- Real-world applicability for meteorology and agriculture

### HMM Components:
- **States S = {Sunny, Cloudy, Rainy}** with weather interpretation
- **Observations:** Continuous features (temperature, humidity, umbrella count)
- **Parameters:** Initial π, transition A, emission (μ, Σ) per state
- **Assumptions:** Markov property, emission independence


In [None]:
# Import required libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy.special import logsumexp
from sklearn.metrics import accuracy_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

# Import our HMM implementation
from hmm_implementation import HiddenMarkovModel, generate_weather_data

print("Libraries imported successfully!")


In [None]:
# Generate synthetic weather data
np.random.seed(42)  # For reproducibility

print("Generating synthetic weather data...")
true_states, observations = generate_weather_data(n_days=300, seed=42)

print(f"Generated {len(true_states)} days of weather data")
print(f"Observation shape: {observations.shape}")
print(f"States: {np.unique(true_states, return_counts=True)}")

# Create DataFrame for analysis
weather_df = pd.DataFrame({
    'day': range(len(true_states)),
    'true_state': true_states,
    'temperature': observations[:, 0],
    'humidity': observations[:, 1],
    'umbrella_usage': observations[:, 2]
})

# Map states to names
state_names = ['Sunny', 'Cloudy', 'Rainy']
weather_df['true_state_name'] = weather_df['true_state'].map({0: 'Sunny', 1: 'Cloudy', 2: 'Rainy'})

print("\nFirst 10 days of data:")
print(weather_df.head(10))


In [None]:
# Visualize the generated data
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Plot 1: True states over time
axes[0, 0].plot(weather_df['day'][:50], weather_df['true_state'][:50], 'o-', markersize=4)
axes[0, 0].set_title('True Weather States (First 50 Days)')
axes[0, 0].set_xlabel('Day')
axes[0, 0].set_ylabel('State')
axes[0, 0].set_yticks([0, 1, 2])
axes[0, 0].set_yticklabels(['Sunny', 'Cloudy', 'Rainy'])
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Temperature over time
axes[0, 1].plot(weather_df['day'][:50], weather_df['temperature'][:50], 'r-', alpha=0.7)
axes[0, 1].set_title('Temperature Over Time (First 50 Days)')
axes[0, 1].set_xlabel('Day')
axes[0, 1].set_ylabel('Temperature (°C)')
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Humidity over time
axes[1, 0].plot(weather_df['day'][:50], weather_df['humidity'][:50], 'b-', alpha=0.7)
axes[1, 0].set_title('Humidity Over Time (First 50 Days)')
axes[1, 0].set_xlabel('Day')
axes[1, 0].set_ylabel('Humidity (%)')
axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Umbrella usage over time
axes[1, 1].plot(weather_df['day'][:50], weather_df['umbrella_usage'][:50], 'g-', alpha=0.7)
axes[1, 1].set_title('Umbrella Usage Over Time (First 50 Days)')
axes[1, 1].set_xlabel('Day')
axes[1, 1].set_ylabel('Umbrella Usage Rate')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Summary statistics
print("\nData Summary:")
print(weather_df.describe())


In [None]:
# Initialize and train HMM model
print("Initializing HMM model...")
hmm = HiddenMarkovModel(n_states=3, n_observations=3)

# Split data into train and test sets
train_size = int(0.8 * len(observations))
train_obs = observations[:train_size]
test_obs = observations[train_size:]
train_states = true_states[:train_size]
test_states = true_states[train_size:]

print(f"Training set: {len(train_obs)} days")
print(f"Test set: {len(test_obs)} days")

# Train the model
print("\nTraining HMM using Baum-Welch algorithm...")
log_likelihoods = hmm.baum_welch(train_obs, max_iterations=100, tolerance=1e-6)

print(f"Training completed in {len(log_likelihoods)} iterations")
print(f"Final log-likelihood: {log_likelihoods[-1]:.2f}")
print(f"Initial log-likelihood: {log_likelihoods[0]:.2f}")
print(f"Improvement: {log_likelihoods[-1] - log_likelihoods[0]:.2f}")


In [None]:
# Run Viterbi algorithm for state prediction
print("Running Viterbi algorithm...")
viterbi_states, viterbi_log_prob = hmm.viterbi_algorithm(test_obs)

# Run Forward algorithm for state posteriors
alpha, log_likelihood = hmm.forward_algorithm(test_obs)
state_posteriors = np.exp(alpha - logsumexp(alpha, axis=1, keepdims=True))
predicted_states = np.argmax(state_posteriors, axis=1)

# Calculate accuracies
viterbi_accuracy = accuracy_score(test_states, viterbi_states)
forward_accuracy = accuracy_score(test_states, predicted_states)

print(f"Viterbi accuracy: {viterbi_accuracy:.3f}")
print(f"Forward algorithm accuracy: {forward_accuracy:.3f}")
print(f"Test set log-likelihood: {log_likelihood:.2f}")

# Visualize results
fig, axes = plt.subplots(2, 1, figsize=(15, 10))

# Plot 1: True vs Predicted states
days = np.arange(len(test_states))
axes[0].plot(days, test_states, 'ko-', label='True States', markersize=4)
axes[0].plot(days, viterbi_states, 'ro--', label='Viterbi States', markersize=4)
axes[0].set_title('True vs Viterbi Predicted States (Test Set)')
axes[0].set_xlabel('Day')
axes[0].set_ylabel('State')
axes[0].set_yticks([0, 1, 2])
axes[0].set_yticklabels(['Sunny', 'Cloudy', 'Rainy'])
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot 2: State posteriors
colors = ['yellow', 'gray', 'blue']
for i, (state_name, color) in enumerate(zip(hmm.state_names, colors)):
    axes[1].fill_between(days, 0, state_posteriors[:, i], 
                        alpha=0.6, label=f'P({state_name})', color=color)

axes[1].set_title('State Posterior Probabilities Over Time')
axes[1].set_xlabel('Day')
axes[1].set_ylabel('Probability')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Weather forecasting
print("Generating weather forecasts...")

# Use last 30 days of training data as context
context_obs = train_obs[-30:]
context_states = train_states[-30:]

# Forecast next 10 days
forecast_steps = 10
pred_states, pred_obs = hmm.forecast(context_obs, steps=forecast_steps)

print(f"Forecasted states: {pred_states}")
print(f"Forecasted observations shape: {pred_obs.shape}")

# Visualize forecast
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Plot 1: Historical and forecasted states
historical_days = np.arange(-30, 0)
forecast_days = np.arange(0, forecast_steps)

axes[0, 0].plot(historical_days, context_states, 'bo-', label='Historical', markersize=4)
axes[0, 0].plot(forecast_days, pred_states, 'ro-', label='Forecast', markersize=4)
axes[0, 0].axvline(x=0, color='k', linestyle='--', alpha=0.5)
axes[0, 0].set_title('Weather State Forecast')
axes[0, 0].set_xlabel('Days from Now')
axes[0, 0].set_ylabel('State')
axes[0, 0].set_yticks([0, 1, 2])
axes[0, 0].set_yticklabels(['Sunny', 'Cloudy', 'Rainy'])
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Temperature forecast
axes[0, 1].plot(historical_days, context_obs[:, 0], 'bo-', label='Historical', markersize=4)
axes[0, 1].plot(forecast_days, pred_obs[:, 0], 'ro-', label='Forecast', markersize=4)
axes[0, 1].axvline(x=0, color='k', linestyle='--', alpha=0.5)
axes[0, 1].set_title('Temperature Forecast')
axes[0, 1].set_xlabel('Days from Now')
axes[0, 1].set_ylabel('Temperature (°C)')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Humidity forecast
axes[1, 0].plot(historical_days, context_obs[:, 1], 'bo-', label='Historical', markersize=4)
axes[1, 0].plot(forecast_days, pred_obs[:, 1], 'ro-', label='Forecast', markersize=4)
axes[1, 0].axvline(x=0, color='k', linestyle='--', alpha=0.5)
axes[1, 0].set_title('Humidity Forecast')
axes[1, 0].set_xlabel('Days from Now')
axes[1, 0].set_ylabel('Humidity (%)')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Umbrella usage forecast
axes[1, 1].plot(historical_days, context_obs[:, 2], 'bo-', label='Historical', markersize=4)
axes[1, 1].plot(forecast_days, pred_obs[:, 2], 'ro-', label='Forecast', markersize=4)
axes[1, 1].axvline(x=0, color='k', linestyle='--', alpha=0.5)
axes[1, 1].set_title('Umbrella Usage Forecast')
axes[1, 1].set_xlabel('Days from Now')
axes[1, 1].set_ylabel('Umbrella Usage Rate')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print forecast summary
print("\nWeather Forecast Summary:")
for i, state in enumerate(pred_states):
    state_name = hmm.state_names[state]
    temp = pred_obs[i, 0]
    humidity = pred_obs[i, 1]
    umbrella = pred_obs[i, 2]
    print(f"Day {i+1}: {state_name} (Temp: {temp:.1f}°C, Humidity: {humidity:.1f}%, Umbrella: {umbrella:.2f})")


In [None]:
# Final comprehensive evaluation
print("CS280 HMM Project - Weather Prediction")
print("=" * 40)

# Display learned parameters
print("\nLearned Parameters:")
print(f"Initial probabilities (π): {hmm.pi}")
print(f"Transition matrix (A):")
for i, state in enumerate(hmm.state_names):
    print(f"  {state} → [Sunny: {hmm.A[i,0]:.3f}, Cloudy: {hmm.A[i,1]:.3f}, Rainy: {hmm.A[i,2]:.3f}]")

print(f"\nEmission means (μ):")
for i, state in enumerate(hmm.state_names):
    print(f"  {state}: Temp={hmm.mu[i,0]:.1f}°C, Humidity={hmm.mu[i,1]:.1f}%, Umbrella={hmm.mu[i,2]:.2f}")

# Performance metrics
print(f"\nPerformance Metrics:")
print(f"  Training log-likelihood: {log_likelihoods[-1]:.2f}")
print(f"  Test log-likelihood: {log_likelihood:.2f}")
print(f"  Viterbi accuracy: {viterbi_accuracy:.3f}")
print(f"  Forward algorithm accuracy: {forward_accuracy:.3f}")

# Model summary
print(f"\nModel Summary:")
print(f"  States: {', '.join(hmm.state_names)}")
print(f"  Observations: Temperature, Humidity, Umbrella Usage")
print(f"  Training data: {len(train_obs)} days")
print(f"  Test data: {len(test_obs)} days")
print(f"  Forecast capability: {forecast_steps} days ahead")

print(f"\nProject completed successfully!")
print(f"All algorithms implemented from scratch.")
print(f"Comprehensive evaluation and visualization provided.")
