In [None]:
!pip install keras_tuner
!pip install scikeras


Collecting keras_tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras_tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras_tuner
Successfully installed keras_tuner-1.4.7 kt-legacy-1.0.5
Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0


#DATA PREPROCESSING

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import joblib

#load and sort the data
data = pd.read_csv('/content/merged_data_v0.1.csv')
data['Month-Year'] = pd.to_datetime(data['Month-Year'], format='%b-%y')
data = data.sort_values('Month-Year').reset_index(drop=True)
data_features = data.drop(columns=['Month-Year'])

num_years = len(data) // 12

# Get 80% of the years as training and 20% as testing
num_train_years = int(num_years * 0.8)
split_index = num_train_years * 12

# Split the data into training and testing sets
train_data = data[:split_index]
test_data = data[split_index:]


train_features = train_data.drop(columns=['Month-Year'])
test_features = test_data.drop(columns=['Month-Year'])

# Normalize features based on the training data and save scaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_features = scaler.fit_transform(train_features)
scaled_test_features = scaler.transform(test_features)

joblib.dump(scaler, 'scaler.pkl')

#Split into X and Y for training and testing
sequence_length = 12
X_train, y_train = [], []
for i in range(sequence_length, len(scaled_train_features)):
    X_train.append(scaled_train_features[i - sequence_length:i])
    y_train.append(scaled_train_features[i])

X_train, y_train = np.array(X_train), np.array(y_train)

X_test, y_test = [], []
for i in range(sequence_length, len(scaled_test_features)):
    X_test.append(scaled_test_features[i - sequence_length:i])
    y_test.append(scaled_test_features[i])

X_test, y_test = np.array(X_test), np.array(y_test)


Input shape (X): (1164, 12, 9), Output shape (y): (1164, 9)


#LSTM MODEL

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()

# Create 3 lstm layers with dropout
model.add(LSTM(units=128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))

model.add(LSTM(units=64, return_sequences=True))
model.add(Dropout(0.1))

model.add(LSTM(units=64))
model.add(Dropout(0.1))

# Output dense layer
model.add(Dense(units=y_train.shape[1]))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(**kwargs)


In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=200, batch_size=32, validation_split=0.2, verbose=1)


Epoch 1/200
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 94ms/step - loss: 0.1264 - val_loss: 0.0447
Epoch 2/200
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - loss: 0.0475 - val_loss: 0.0479
Epoch 3/200
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.0418 - val_loss: 0.0468
Epoch 4/200
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - loss: 0.0406 - val_loss: 0.0504
Epoch 5/200
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - loss: 0.0370 - val_loss: 0.0450
Epoch 6/200
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0325 - val_loss: 0.0376
Epoch 7/200
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - loss: 0.0314 - val_loss: 0.0245
Epoch 8/200
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0262 - val_loss: 0.0195
Epoch 9/200
[1m30/30[0m [32m━━━━━━━━━

#PREDICT

In [None]:
# Found the total number of years in the data
num_test_years = len(test_data) // 12

#List for predictions
all_predictions = []

# Loop through each year in the test set
for year in range(min(num_test_years, len(X_test) // 12)):

    #Get the data for each year
    start_index = year * 12
    last_sequence = X_test[start_index]

    # Current prediction list
    year_predictions = []

    # Predict for the next 12 months of the current year
    for _ in range(12):
        # Predict the next month
        next_pred = model.predict(np.array([last_sequence]))

        # Inverse transform the prediction to get actual values
        next_pred_actual = scaler.inverse_transform(next_pred)
        year_predictions.append(next_pred_actual[0])

        # Update the sequence by removing the first month and adding the prediction at the end
        next_sequence = np.append(last_sequence[1:], next_pred, axis=0)
        last_sequence = next_sequence

    # Append the predictions for the current year to the overall predictions list
    all_predictions.extend(year_predictions)

# Convert all predictions to a DataFrame
predictions_df = pd.DataFrame(all_predictions, columns=data_features.columns)
print(predictions_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 451ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

#EVALUATION

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

actual_values = test_features.iloc[12:].values
model_predictions = predictions_df.iloc[:]
baseline_predictions = test_features.iloc[:-12].values

lstm_mae = mean_absolute_error(actual_values, model_predictions)
lstm_mse = mean_squared_error(actual_values, model_predictions)
lstm_rmse = np.sqrt(lstm_mse)
lstm_r2 = r2_score(actual_values, model_predictions)

baseline_mae = mean_absolute_error(actual_values, baseline_predictions)
baseline_mse = mean_squared_error(actual_values, baseline_predictions)
baseline_rmse = np.sqrt(baseline_mse)
baseline_r2 = r2_score(actual_values, baseline_predictions)

# Print metrics

print(f"LSTM MAE: {lstm_mae}")
print(f'LSTM MSE: {lstm_mse}')
print(f'LSTM RMSE: {lstm_rmse}')
print(f'LSTM R²: {lstm_r2}')
print("-----------------------------")
print(f"Base MAE: {baseline_mae}")
print(f'Base MSE: {baseline_mse}')
print(f'Base RMSE: {baseline_rmse}')
print(f'Base R²: {baseline_r2}')

LSTM MAE: 4.1741691367841725
LSTM MSE: 93.12444516807987
LSTM RMSE: 9.650100785384568
LSTM R²: 0.6638381642198322
-----------------------------
Base MAE: 5.467283950617284
Base MSE: 171.44037037037037
Base RMSE: 13.093523985939399
Base R²: 0.4217153472141438


In [None]:
model.save('/content/model.keras')


#2024 PREDICTION

In [5]:
import pandas as pd
import joblib
import numpy as np
from tensorflow.keras.models import load_model

model = load_model('/content/model.keras')
#Read and get the data of the last year
data = pd.read_csv('/content/merged_data_v0.1.csv')

# data['Month-Year'] = pd.to_datetime(data['Month-Year'], format='%b-%y')
# data = data.sort_values('Month-Year').reset_index(drop=True)
data_features = data.drop(columns=['Month-Year'])

test_data = data.tail(12)

test_features = test_data.drop(columns=['Month-Year'])

#Scale the data based on the saved scaler
scaler = joblib.load('scaler.pkl')
scaled_test_features = scaler.transform(test_features)


X_test = []
X_test.append(scaled_test_features)
X_test = np.array(X_test)


In [6]:


last_sequence = X_test[0]
year_predictions = []

# Predict the next 12 months
for _ in range(12):

    # Predict the next month
    next_pred = model.predict(np.array([last_sequence]))

    # Inverse transform the prediction to get actual values
    next_pred_actual = scaler.inverse_transform(next_pred)
    year_predictions.append(next_pred_actual[0])

    # Update the sequence by removing the first month and adding the prediction at the end
    next_sequence = np.append(last_sequence[1:], next_pred, axis=0)
    last_sequence = next_sequence



# Convert all predictions to a DataFrame
predictions_df = pd.DataFrame(year_predictions, columns=data_features.columns)
print(predictions_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 456ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
    Cloud cover  Frost days  potential evapo-transpiration  \
0     84.022560   13.062772                       0.466247   
1     82.61

In [None]:
#conver the monthly data into yearly
yearly_summary = predictions_df.agg({
    'Frost days': 'sum',
    'Wet days': 'sum',
    'Precipitation rate': 'mean',
    'Minimum 2m temperature': 'mean',
    'Mean 2m temperature': 'mean',
    'Maximum 2m temperature': 'mean',
    'potential evapo-transpiration': 'mean',
    'Cloud cover': 'mean',
    'Vapour pressure': 'mean'
}).reset_index()

yearly_summary.to_csv('2024_prediction.csv', index=False)