In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [2]:
# Load the data
data=pd.read_csv('weather_classification_data.csv')

In [3]:
# View Dimensions of Dataset
data.shape

(13200, 11)

In [4]:
# Preview the dataset
data.head()

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Cloud Cover,Atmospheric Pressure,UV Index,Season,Visibility (km),Location,Weather Type
0,14.0,73,9.5,82.0,partly cloudy,1010.82,2,Winter,3.5,inland,Rainy
1,39.0,96,8.5,71.0,partly cloudy,1011.43,7,Spring,10.0,inland,Cloudy
2,30.0,64,7.0,16.0,clear,1018.72,5,Spring,5.5,mountain,Sunny
3,38.0,83,1.5,82.0,clear,1026.25,7,Spring,1.0,coastal,Sunny
4,27.0,74,17.0,66.0,overcast,990.67,1,Winter,2.5,mountain,Rainy


In [5]:
# Count of labels for targeted column
data['Weather Type'].value_counts()

Weather Type
Rainy     3300
Cloudy    3300
Sunny     3300
Snowy     3300
Name: count, dtype: int64

In [6]:
# The target column
X = data.drop(columns=['Weather Type'])  
y = data['Weather Type']
# Encode categorical features
categorical_columns = X.select_dtypes(include=['object']).columns
X = pd.get_dummies(X, columns=categorical_columns)
# Encode target column if it's categorical
if y.dtype == 'object':
    le = LabelEncoder()
    y = le.fit_transform(y)
# Convert y to dummy variables (one-hot encoding)
y2 = np.zeros((y.shape[0], np.max(y) + 1), dtype=np.float32)
y2[np.arange(y.shape[0]), y] = 1.0

In [7]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y2, test_size=0.3, random_state=42)

In [8]:
# View dimensions of train and test set
X_train.shape, X_test.shape

((9240, 18), (3960, 18))

In [9]:
# Check data types in X_train
X_train.dtypes

Temperature                  float64
Humidity                       int64
Wind Speed                   float64
Precipitation (%)            float64
Atmospheric Pressure         float64
UV Index                       int64
Visibility (km)              float64
Cloud Cover_clear               bool
Cloud Cover_cloudy              bool
Cloud Cover_overcast            bool
Cloud Cover_partly cloudy       bool
Season_Autumn                   bool
Season_Spring                   bool
Season_Summer                   bool
Season_Winter                   bool
Location_coastal                bool
Location_inland                 bool
Location_mountain               bool
dtype: object

In [10]:
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

In [11]:
X_train[:5]

array([[[ 0.26483744,  0.31895701,  1.11711666,  0.95642653,
         -0.16389665, -0.26298232, -0.14256814, -0.44110105,
         -0.17775095,  1.08948715, -0.73257801, -0.48728304,
         -0.49593819,  2.08351637, -0.85706464, -0.61838964,
         -0.75557545,  1.3334586 ]],

       [[ 1.53366033,  0.02114849, -0.91351934, -0.32651198,
         -4.44857653,  2.06679395,  0.00604757, -0.44110105,
          5.62584897, -0.91786305, -0.73257801, -0.48728304,
          2.01638029, -0.47995783, -0.85706464, -0.61838964,
          1.32349457, -0.74992954]],

       [[ 0.26483744,  0.91457405,  1.18963937,  0.36189405,
          0.15485699, -0.78071038, -1.18287805, -0.44110105,
         -0.17775095, -0.91786305,  1.36504233, -0.48728304,
          2.01638029, -0.47995783, -0.85706464, -0.61838964,
          1.32349457, -0.74992954]],

       [[-1.06165921,  1.65909536,  3.65541165,  1.14417363,
         -0.47340317,  0.25474574, -0.43979954, -0.44110105,
         -0.17775095,  1.0894871

In [12]:
# Model
model = Sequential()
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(y2.shape[1], activation='softmax'))  
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  super().__init__(**kwargs)


In [13]:
# Train the model
model.fit(X_train, y_train, epochs=200, batch_size=32, validation_split=0.3)

Epoch 1/200
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.6752 - loss: 1.0149 - val_accuracy: 0.8535 - val_loss: 0.4736
Epoch 2/200
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8293 - loss: 0.5064 - val_accuracy: 0.8788 - val_loss: 0.3687
Epoch 3/200
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8482 - loss: 0.4292 - val_accuracy: 0.8878 - val_loss: 0.3215
Epoch 4/200
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8485 - loss: 0.4013 - val_accuracy: 0.8932 - val_loss: 0.3074
Epoch 5/200
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8513 - loss: 0.3894 - val_accuracy: 0.8979 - val_loss: 0.2888
Epoch 6/200
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8582 - loss: 0.3715 - val_accuracy: 0.8990 - val_loss: 0.2770
Epoch 7/200
[1m203/20

<keras.src.callbacks.history.History at 0x28d2439aad0>

In [14]:
# Calculate Accuracy
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy}')

[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 973us/step - accuracy: 0.8982 - loss: 0.2907
Test Accuracy: 0.9020202159881592


In [68]:
# Predict classes
pred = model.predict(X_test)
predict_classes = np.argmax(pred, axis=1)
print("Predicted classes: ", predict_classes)

[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Predicted classes:  [3 3 3 ... 2 3 3]


In [69]:
# Precision, recall, f1score, support
true_classes = np.argmax(y_test, axis=1)
print(classification_report(true_classes, predict_classes))

              precision    recall  f1-score   support

           0       0.89      0.86      0.88       955
           1       0.87      0.89      0.88       982
           2       0.90      0.94      0.92      1033
           3       0.92      0.89      0.91       990

    accuracy                           0.90      3960
   macro avg       0.90      0.90      0.90      3960
weighted avg       0.90      0.90      0.90      3960

