# <center>Building and Evaluating Recurrent Neural Network Models for Weather Prediction</center>

<p>This notebook explores the use of recurrent neural network (RNN) models for weather prediction. The dataset is preprocessed, normalized, and split into training and testing sets. Different RNN models with varying architectures and training epochs are built and evaluated to predict weather patterns. The notebook provides insights into the impact of model architecture and training duration on the predictive performance for weather forecasting.</p>

- Import libraries

In [35]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN


- Load the dataset

In [42]:
data = pd.read_csv('clean_weather.csv')
print(data.shape)
data.head()

(13509, 5)


Unnamed: 0.1,Unnamed: 0,tmax,tmin,rain,tmax_tomorrow
0,1970-01-01,60.0,35.0,0.0,52.0
1,1970-01-02,52.0,39.0,0.0,52.0
2,1970-01-03,52.0,35.0,0.0,53.0
3,1970-01-04,53.0,36.0,0.0,52.0
4,1970-01-05,52.0,35.0,0.0,50.0


- One of the columns is unwanted, so let's drop it

In [43]:
data = data.drop(columns='Unnamed: 0')

- Check for missing values

In [44]:
data.isnull().sum()

tmax              11
tmin              14
rain             281
tmax_tomorrow     11
dtype: int64

- Drop missing values

In [45]:
data = data.dropna()
data.shape

(13203, 4)

- Preprocessing: Normalizing the data

In [46]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data[['tmax', 'tmin', 'rain', 'tmax_tomorrow']])

- Split the data into target and features

In [47]:
X = scaled_data[:, :-1]
y = scaled_data[:, -1]
print(X.shape)
print(y.shape)

(13203, 3)
(13203,)


- Split the data into train and test sets

In [52]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [53]:
X_train[1]

array([0.20238095, 0.72058824, 0.00837989])

- Reshaping the data

In [55]:
X_train = X_train.reshape(-1, 1, X_train.shape[1])
X_test = X_test.reshape(-1, 1, X_test.shape[1])
print(X_train[1])

- Model building: Simple RNN

<p>Model is build with 64 hidden neurons. 50 times the full dataset is iterated through. The batch size is 32 which means no of training samples used while one iteration</p>

In [57]:
model = Sequential()
model.add(SimpleRNN(64, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/50


  super().__init__(**kwargs)


[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 0.0105 - val_loss: 0.0031
Epoch 2/50
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0032 - val_loss: 0.0031
Epoch 3/50
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0032 - val_loss: 0.0031
Epoch 4/50
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0031 - val_loss: 0.0031
Epoch 5/50
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0034 - val_loss: 0.0031
Epoch 6/50
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0031 - val_loss: 0.0040
Epoch 7/50
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0034 - val_loss: 0.0031
Epoch 8/50
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0032 - val_loss: 0.0032
Epoch 9/50
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x258f6fa0f50>

- Evaluate the model

In [58]:

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Make predictions
predictions = model.predict(X_test)

# Optionally, inverse transform the predictions to get actual values
predictions_actual = scaler.inverse_transform(np.concatenate((X_test.reshape(X_test.shape[0], -1), predictions.reshape(-1, 1)), axis=1))[:, -1]


[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0030
Test Loss: 0.00307648116722703
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


Model evaluated with test loss of 0.003076 which is pretty good.

- Let's check whether the loss can be reduced further. Let's try with 30 epoch.

In [59]:

# Define the RNN model
model1 = Sequential()
model1.add(SimpleRNN(58, input_shape=(X_train.shape[1], X_train.shape[2])))
model1.add(Dense(1))

# Compile the model
model1.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model1.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))


# Evaluate the model
loss = model1.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Make predictions
predictions = model1.predict(X_test)

# Optionally, inverse transform the predictions to get actual values
predictions_actual = scaler.inverse_transform(np.concatenate((X_test.reshape(X_test.shape[0], -1), predictions.reshape(-1, 1)), axis=1))[:, -1]



Epoch 1/30


  super().__init__(**kwargs)


[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0872 - val_loss: 0.0047
Epoch 2/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0044 - val_loss: 0.0034
Epoch 3/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0035 - val_loss: 0.0032
Epoch 4/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0032 - val_loss: 0.0032
Epoch 5/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0032 - val_loss: 0.0031
Epoch 6/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0031 - val_loss: 0.0031
Epoch 7/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0031 - val_loss: 0.0035
Epoch 8/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0033 - val_loss: 0.0031
Epoch 9/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━

Model evaluated with test loss of 0.003180 which is not bad but it is higher when compared with 50 epochs.

Now let's try adding 1 new hidden layer.

In [60]:

# Define the RNN model
model2 = Sequential()
model2.add(SimpleRNN(58, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model2.add(SimpleRNN(32))
model2.add(Dense(1))

# Compile the model
model2.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model2.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))


# Evaluate the model
loss = model2.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Make predictions
predictions = model2.predict(X_test)

# Optionally, inverse transform the predictions to get actual values
predictions_actual = scaler.inverse_transform(np.concatenate((X_test.reshape(X_test.shape[0], -1), predictions.reshape(-1, 1)), axis=1))[:, -1]
loss = model2.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")


Epoch 1/30


  super().__init__(**kwargs)


[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 0.0039 - val_loss: 0.0031
Epoch 2/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0033 - val_loss: 0.0031
Epoch 3/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0034 - val_loss: 0.0046
Epoch 4/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0035 - val_loss: 0.0036
Epoch 5/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0033 - val_loss: 0.0031
Epoch 6/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0032 - val_loss: 0.0032
Epoch 7/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0033 - val_loss: 0.0035
Epoch 8/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0034 - val_loss: 0.0031
Epoch 9/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━

Now the test loss is 0.003149 which is also not bad but it is still higher when compared with 50 epochs.

Last let's try adding more neurons in the new hidden layer.

In [61]:

# Define the RNN model
model3 = Sequential()
model3.add(SimpleRNN(58, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model3.add(SimpleRNN(40))
model3.add(Dense(1))

# Compile the model
model3.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model3.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))


# Evaluate the model
loss = model3.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Make predictions
predictions = model3.predict(X_test)

# Optionally, inverse transform the predictions to get actual values
predictions_actual = scaler.inverse_transform(np.concatenate((X_test.reshape(X_test.shape[0], -1), predictions.reshape(-1, 1)), axis=1))[:, -1]
loss = model3.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")


Epoch 1/30


  super().__init__(**kwargs)


[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - loss: 0.0113 - val_loss: 0.0031
Epoch 2/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0031 - val_loss: 0.0031
Epoch 3/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0034 - val_loss: 0.0044
Epoch 4/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0033 - val_loss: 0.0033
Epoch 5/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0033 - val_loss: 0.0033
Epoch 6/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0033 - val_loss: 0.0030
Epoch 7/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0032 - val_loss: 0.0039
Epoch 8/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.0035 - val_loss: 0.0035
Epoch 9/30
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━

That doesn't seem to improve the test loss much.