<a href="https://colab.research.google.com/github/moushumipriya/Norway-Snowfall-Prediction-using-LSTM/blob/main/Norway_Snowfall_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 Load and Preview Dataset

In [None]:
import pandas as pd


df = pd.read_csv("norway_snowfall_dataset.csv")

print(df.head())

print(f"Dataset shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")


Data Preprocessing and Visualization

Check Missing Values and Data Summary

In [None]:

print(df.isnull().sum())

print(df.info())
print(df.describe())


Plot Temperature and Snowfall Trends Over Time


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Convert 'datetime' column to datetime type
df['datetime'] = pd.to_datetime(df['datetime'])

# Temperature over time plot
plt.figure(figsize=(14,6))
sns.lineplot(x='datetime', y='temperature_C', data=df)
plt.title('Temperature over Time')
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.show()

# Snowfall over time plot
plt.figure(figsize=(14,6))
sns.lineplot(x='datetime', y='snowfall_mm', data=df)
plt.title('Snowfall Amount over Time')
plt.xlabel('Date')
plt.ylabel('Snowfall (mm)')
plt.show()


 Feature Engineering

 Extract datetime features

In [None]:
# Extract useful features from datetime
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['day'] = df['datetime'].dt.day
df['hour'] = df['datetime'].dt.hour
df['dayofweek'] = df['datetime'].dt.dayofweek


Create lag features

In [None]:
df['lag_1'] = df['snowfall_mm'].shift(1)
df['lag_2'] = df['snowfall_mm'].shift(2)
df['lag_3'] = df['snowfall_mm'].shift(3)


Drop rows with NaN (due to lagging)

In [None]:
df = df.dropna()


Final feature columns to use

In [None]:
features = ['temperature_C', 'humidity_%', 'pressure_hPa', 'wind_speed_mps', 'year', 'month', 'day', 'hour', 'dayofweek', 'lag_1', 'lag_2', 'lag_3']
target = 'snowfall_mm'

 Train-Test Split

 Define split ratio and split

In [None]:
# Define train size (e.g., 80%)
train_size = int(len(df) * 0.8)

# Split features and target
X = df[features]
y = df[target]

# Train and test split
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

 Scale features

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler_y = StandardScaler() # Scaler for target variable

# Fit on train and transform both train and test
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Scale the target variable
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

Build and Train LSTM Model
LSTM (Long Short-Term Memory) is perfect for time series forecasting as it learns temporal dependencies.



 Reshape input for LSTM

In [None]:
# Reshape input to be 3D for LSTM [samples, timesteps, features]
X_train_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

print("LSTM input shape:", X_train_lstm.shape)

Build the model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(64, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))  # Output layer

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

model.summary()


Train the model

In [None]:
history = model.fit(
    X_train_lstm, y_train_scaled, # Use scaled target variable
    validation_data=(X_test_lstm, y_test_scaled), # Use scaled target variable
    epochs=50,
    batch_size=32,
    verbose=1
)

Plot training history

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))

# Loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('MSE')
plt.legend()

# MAE
plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Train MAE')
plt.plot(history.history['val_mae'], label='Val MAE')
plt.title('MAE Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend()

plt.tight_layout()
plt.show()


 Make Predictions and Evaluate Performance


 Predict on Test Set

In [None]:
# Make predictions
y_pred = model.predict(X_test_lstm)

# Reverse scaling
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Inverse transform predictions and actual values
y_pred_inverse = scaler_y.inverse_transform(y_pred) # Use scaler_y
y_test_inverse = scaler_y.inverse_transform(y_test_scaled) # Use scaled target variable for inverse transform

# Calculate performance metrics
mae = mean_absolute_error(y_test_inverse, y_pred_inverse)
mse = mean_squared_error(y_test_inverse, y_pred_inverse)
rmse = mse**0.5
r2 = r2_score(y_test_inverse, y_pred_inverse)

print(f"MAE:  {mae:.2f}")
print(f"MSE:  {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²:   {r2:.4f}")

Plot Actual vs Predicted Snowfall

In [None]:
plt.figure(figsize=(14, 6))
plt.plot(y_test_inverse, label='Actual Snowfall')
plt.plot(y_pred_inverse, label='Predicted Snowfall')
plt.title('Actual vs Predicted Snowfall')
plt.xlabel('Time Step')
plt.ylabel('Snowfall (mm)')
plt.legend()
plt.grid(True)
plt.show()


In [None]:

df['datetime'] = pd.to_datetime(df['datetime'])
df['Month'] = df['datetime'].dt.month_name()

# Group by Month
month_avg = df.groupby("Month")["snowfall_mm"].mean().reindex([
    'January','February','March','April','May','June',
    'July','August','September','October','November','December'
])

# Plot
plt.figure(figsize=(12,6))
sns.barplot(x=month_avg.index, y=month_avg.values, palette="coolwarm")
plt.title("📅 Average Snowfall by Month (mm)", fontsize=16)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Average Snowfall (mm)", fontsize=12)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()