In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

data = pd.read_csv('/kaggle/input/weather-type-classification/weather_classification_data.csv')
print(data.head())
print(data.columns)


   Temperature  Humidity  Wind Speed  Precipitation (%)    Cloud Cover  \
0         14.0        73         9.5               82.0  partly cloudy   
1         39.0        96         8.5               71.0  partly cloudy   
2         30.0        64         7.0               16.0          clear   
3         38.0        83         1.5               82.0          clear   
4         27.0        74        17.0               66.0       overcast   

   Atmospheric Pressure  UV Index  Season  Visibility (km)  Location  \
0               1010.82         2  Winter              3.5    inland   
1               1011.43         7  Spring             10.0    inland   
2               1018.72         5  Spring              5.5  mountain   
3               1026.25         7  Spring              1.0   coastal   
4                990.67         1  Winter              2.5  mountain   

  Weather Type  
0        Rainy  
1       Cloudy  
2        Sunny  
3        Sunny  
4        Rainy  
Index(['Temperature'

In [14]:
x = data[['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)', 'Cloud Cover',
          'Atmospheric Pressure', 'UV Index', 'Season', 'Visibility (km)', 'Location']]
y = data['Weather Type']
#considering y as the target variable as always

In [15]:
#Encoding categorical data
x.columns = x.columns.str.strip()
label_encoders = {}
categorical_columns = ['Cloud Cover', 'Location', 'Season']

for column in categorical_columns:
    le = LabelEncoder()
    x.loc[:, column] = le.fit_transform(x.loc[:, column])
    label_encoders[column] = le
le_weather_type = LabelEncoder()
y = le_weather_type.fit_transform(y)
# Normalize numerical data
numerical_features = ['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)',
                       'Atmospheric Pressure', 'UV Index', 'Visibility (km)']
scaler = StandardScaler()
x.loc[:,numerical_features] = x.loc[:,numerical_features].astype('float64')
x.loc[:, numerical_features] = scaler.fit_transform(x.loc[:, numerical_features])

 -1.52082893]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  x.loc[:, numerical_features] = scaler.fit_transform(x.loc[:, numerical_features])
  0.25781258]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  x.loc[:, numerical_features] = scaler.fit_transform(x.loc[:, numerical_features])


In [16]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(10560, 10)
(2640, 10)
(10560,)
(2640,)


In [18]:
#Model definition
model = Sequential()
model.add(Dense(64, input_dim=x_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(le_weather_type.classes_), activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


**TRAINING THE MODEL AND THE ACCURACY BELOW REFERS TO TRAINING'S ACCURACY**

In [19]:
#Model training
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=20, batch_size=10, validation_split=0.1)
train_loss = history.history['loss']
train_accuracy = history.history['accuracy']
val_loss = history.history['val_loss']
val_accuracy = history.history['val_accuracy']
print(f"Final Training Accuracy: {train_accuracy[-1]:.4f}")
print(f"Final Validation Accuracy: {val_accuracy[-1]:.4f}")


Epoch 1/20
[1m951/951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7903 - loss: 0.6447 - val_accuracy: 0.9148 - val_loss: 0.2964
Epoch 2/20
[1m951/951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9022 - loss: 0.2859 - val_accuracy: 0.9138 - val_loss: 0.2550
Epoch 3/20
[1m951/951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8991 - loss: 0.2716 - val_accuracy: 0.9167 - val_loss: 0.2355
Epoch 4/20
[1m951/951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9074 - loss: 0.2358 - val_accuracy: 0.9205 - val_loss: 0.2244
Epoch 5/20
[1m951/951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9064 - loss: 0.2325 - val_accuracy: 0.9223 - val_loss: 0.2175
Epoch 6/20
[1m951/951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9101 - loss: 0.2178 - val_accuracy: 0.9223 - val_loss: 0.1977
Epoch 7/20
[1m951/951[0m 

**THE FOLLOWING OUTPUTS REPRESENT THE TESTING PART**

In [20]:
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9011 - loss: 0.2408
Test Loss: 0.2377
Test Accuracy: 0.9011


In [21]:
predictions = model.predict(x_test)
predictions = predictions.argmax(axis=-1)
conf_matrix = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(conf_matrix)
class_report = classification_report(y_test, predictions, target_names=le_weather_type.classes_)
print("Classification Report:")
print(class_report)

[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Confusion Matrix:
[[564  55   8  24]
 [ 39 588   4  16]
 [ 11  17 655  18]
 [ 37  25   7 572]]
Classification Report:
              precision    recall  f1-score   support

      Cloudy       0.87      0.87      0.87       651
       Rainy       0.86      0.91      0.88       647
       Snowy       0.97      0.93      0.95       701
       Sunny       0.91      0.89      0.90       641

    accuracy                           0.90      2640
   macro avg       0.90      0.90      0.90      2640
weighted avg       0.90      0.90      0.90      2640



**FINAL INFERENCES**

In [22]:
print("Training Accuracy: ",train_accuracy[-1])
print("Testing Accuracy: ",test_accuracy)

Training Accuracy:  0.929713785648346
Testing Accuracy:  0.9011363387107849


**Thus, the case of overfitting is not encountered and testing and training both have similar accruacies. Hence the neural netowrk works fine, probably :)**