<h2> LSTM Models <h2>


Create various LSTM Models and evaluating them<br>

**NOTE: Using a dataset with 10 of the best performing feature based on another student analysis**

**Features (x):**<br>
- *soil_vwc*
- *soil_ec*
- *irrigation_24h*
- *irrigation_36h*
- *irrigation_48h*
- *relative_humidity_ds_24h*
- *relative_humidity_ds_36h*
- *relative_humidity_ds_48h*
- *precipitation_36h*
- *precipitation_24h*
- *precipitation_48h*

**Target (y)** is *soil_vwc*

<br><br>


Reading the dataset file

In [4]:
import pandas as pd

# reading the ds file
df_scaled = pd.read_excel("./2023-stuard/df_best_scaled.xlsx", index_col=0)


In [16]:
df_scaled

Unnamed: 0_level_0,soil_vwc,soil_ec,irrigation_24h,irrigation_36h,irrigation_48h,relative_humidity_ds_24h,relative_humidity_ds_36h,relative_humidity_ds_48h,precipitation_36h,precipitation_24h,precipitation_48h
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-07-11 10:10:00,-0.211271,-0.568325,-0.546472,-0.697505,-0.850408,0.635486,0.676346,0.719121,-0.319756,-0.267801,-0.362414
2023-07-11 10:20:00,-0.162457,-0.446523,-0.546472,-0.697505,-0.850408,0.612772,0.652443,0.693960,-0.319756,-0.267801,-0.362414
2023-07-11 10:30:00,-0.143942,-0.389065,-0.546472,-0.697505,-0.850408,0.578156,0.616018,0.655617,-0.319756,-0.267801,-0.362414
2023-07-11 10:40:00,-0.123743,-0.360037,-0.546472,-0.697505,-0.850408,0.537642,0.573385,0.610740,-0.319756,-0.267801,-0.362414
2023-07-11 10:50:00,-0.118693,-0.350317,-0.546472,-0.697505,-0.850408,0.505280,0.539330,0.574893,-0.319756,-0.267801,-0.362414
...,...,...,...,...,...,...,...,...,...,...,...
2023-09-19 23:10:00,-0.446924,-0.722202,-0.546472,-0.697505,-0.850408,0.969098,0.898885,1.135242,-0.319756,-0.267801,-0.362414
2023-09-19 23:20:00,-0.448607,-0.722202,-0.546472,-0.697505,-0.850408,0.958112,0.885717,1.129158,-0.319756,-0.267801,-0.362414
2023-09-19 23:30:00,-0.446924,-0.722202,-0.546472,-0.697505,-0.850408,0.946782,0.873174,1.122946,-0.319756,-0.267801,-0.362414
2023-09-19 23:40:00,-0.445241,-0.713316,-0.546472,-0.697505,-0.850408,0.934079,0.861515,1.116228,-0.319756,-0.267801,-0.362414


<h3> Organizing data in time series <h3>

In [20]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator


# Time Series Parameters
WINDOW = 5
SPLIT = 0.8
SAMPLING_RATE = 1
BATCH_SIZE = 1



features=df_scaled
target=df_scaled['soil_vwc'] 
t_size = 1 - SPLIT

x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=t_size, random_state=123, shuffle=False)
train_generator = TimeseriesGenerator( x_train, y_train, length=WINDOW,sampling_rate=SAMPLING_RATE, batch_size=BATCH_SIZE)
test_generator = TimeseriesGenerator( x_test, y_test, length=WINDOW,sampling_rate=SAMPLING_RATE, batch_size=BATCH_SIZE )
train_generator[0]

(array([[[-0.21127124, -0.56832529, -0.54647248, -0.69750535,
          -0.85040843,  0.6354863 ,  0.67634596,  0.71912063,
          -0.31975597, -0.26780144, -0.36241371],
         [-0.16245744, -0.44652314, -0.54647248, -0.69750535,
          -0.85040843,  0.61277166,  0.65244339,  0.69395999,
          -0.31975597, -0.26780144, -0.36241371],
         [-0.14394185, -0.38906519, -0.54647248, -0.69750535,
          -0.85040843,  0.57815639,  0.61601781,  0.65561722,
          -0.31975597, -0.26780144, -0.36241371],
         [-0.12374304, -0.36003717, -0.54647248, -0.69750535,
          -0.85040843,  0.53764236,  0.57338497,  0.61074048,
          -0.31975597, -0.26780144, -0.36241371],
         [-0.11869333, -0.35031651, -0.54647248, -0.69750535,
          -0.85040843,  0.50528004,  0.53933016,  0.57489325,
          -0.31975597, -0.26780144, -0.36241371]]]),
 array([-0.11532686]))

In [38]:
x_train.shape

(8120, 11)

In [39]:
x_test.shape

(2031, 11)

## Building LSTM Model

In [24]:
import keras

lstm_model1 = keras.Sequential()
lstm_model1.add(keras.layers.LSTM(8, input_shape= (WINDOW, 11), return_sequences= False))

lstm_model1.add(keras.layers.Dense(1))


In [25]:
lstm_model1.summary()

In [29]:
from tensorflow.keras.optimizers import Adam

lstm_model1.compile(loss = 'MeanSquaredError', metrics = ['MAE'], optimizer = Adam(learning_rate = 0.001)) 
history = lstm_model1.fit(train_generator, epochs=50, validation_data=test_generator, shuffle=False)

Epoch 1/50
[1m8115/8115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - MAE: 0.0920 - loss: 0.0334 - val_MAE: 0.3904 - val_loss: 0.2240
Epoch 2/50
[1m8115/8115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 3ms/step - MAE: 0.0703 - loss: 0.0182 - val_MAE: 0.1201 - val_loss: 0.0260
Epoch 3/50
[1m8115/8115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - MAE: 0.0571 - loss: 0.0121 - val_MAE: 0.0638 - val_loss: 0.0080
Epoch 4/50
[1m8115/8115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - MAE: 0.0500 - loss: 0.0105 - val_MAE: 0.1101 - val_loss: 0.0205
Epoch 5/50
[1m8115/8115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - MAE: 0.0442 - loss: 0.0084 - val_MAE: 0.1284 - val_loss: 0.0317
Epoch 6/50
[1m8115/8115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - MAE: 0.0392 - loss: 0.0065 - val_MAE: 0.2170 - val_loss: 0.0733
Epoch 7/50
[1m8115/8115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [35]:
lstm_model1.evaluate(test_generator, verbose=0)

[0.006065803579986095, 0.06474777311086655]

In [36]:
predictions=lstm_model1.predict(test_generator)

[1m2026/2026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step


In [37]:
predictions.shape[0]

2026

In [40]:
predictions

array([[-0.00355233],
       [-0.00612921],
       [-0.01167025],
       ...,
       [-0.56078506],
       [-0.5592885 ],
       [-0.55771035]], dtype=float32)

In [41]:
y_test

timestamp
2023-09-05 19:30:00    0.002500
2023-09-05 19:40:00    0.004183
2023-09-05 19:50:00   -0.002550
2023-09-05 20:00:00   -0.000867
2023-09-05 20:10:00    0.004183
                         ...   
2023-09-19 23:10:00   -0.446924
2023-09-19 23:20:00   -0.448607
2023-09-19 23:30:00   -0.446924
2023-09-19 23:40:00   -0.445241
2023-09-19 23:50:00   -0.445241
Name: soil_vwc, Length: 2031, dtype: float64

In [42]:
x_test

Unnamed: 0_level_0,soil_vwc,soil_ec,irrigation_24h,irrigation_36h,irrigation_48h,relative_humidity_ds_24h,relative_humidity_ds_36h,relative_humidity_ds_48h,precipitation_36h,precipitation_24h,precipitation_48h
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-09-05 19:30:00,0.002500,0.132748,-0.546472,-0.697505,-0.850408,-0.831696,-0.495596,-0.204026,-0.319756,-0.267801,-0.362414
2023-09-05 19:40:00,0.004183,0.132748,-0.546472,-0.697505,-0.850408,-0.834201,-0.503651,-0.208328,-0.319756,-0.267801,-0.362414
2023-09-05 19:50:00,-0.002550,0.132748,-0.546472,-0.697505,-0.850408,-0.835699,-0.511599,-0.212841,-0.319756,-0.267801,-0.362414
2023-09-05 20:00:00,-0.000867,0.121916,-0.546472,-0.697505,-0.850408,-0.836070,-0.519428,-0.217497,-0.319756,-0.267801,-0.362414
2023-09-05 20:10:00,0.004183,0.132748,-0.546472,-0.697505,-0.850408,-0.835314,-0.526930,-0.221747,-0.319756,-0.267801,-0.362414
...,...,...,...,...,...,...,...,...,...,...,...
2023-09-19 23:10:00,-0.446924,-0.722202,-0.546472,-0.697505,-0.850408,0.969098,0.898885,1.135242,-0.319756,-0.267801,-0.362414
2023-09-19 23:20:00,-0.448607,-0.722202,-0.546472,-0.697505,-0.850408,0.958112,0.885717,1.129158,-0.319756,-0.267801,-0.362414
2023-09-19 23:30:00,-0.446924,-0.722202,-0.546472,-0.697505,-0.850408,0.946782,0.873174,1.122946,-0.319756,-0.267801,-0.362414
2023-09-19 23:40:00,-0.445241,-0.713316,-0.546472,-0.697505,-0.850408,0.934079,0.861515,1.116228,-0.319756,-0.267801,-0.362414


In [44]:
x_test.iloc[:, 1:][WINDOW:]


Unnamed: 0_level_0,soil_ec,irrigation_24h,irrigation_36h,irrigation_48h,relative_humidity_ds_24h,relative_humidity_ds_36h,relative_humidity_ds_48h,precipitation_36h,precipitation_24h,precipitation_48h
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-09-05 20:20:00,0.121916,-0.546472,-0.697505,-0.850408,-0.833923,-0.534271,-0.225509,-0.319756,-0.267801,-0.362414
2023-09-05 20:30:00,0.132748,-0.546472,-0.697505,-0.850408,-0.832049,-0.541570,-0.228871,-0.319756,-0.267801,-0.362414
2023-09-05 20:40:00,0.132748,-0.546472,-0.697505,-0.850408,-0.829656,-0.549013,-0.232178,-0.319756,-0.267801,-0.362414
2023-09-05 20:50:00,0.143605,-0.546472,-0.697505,-0.850408,-0.827157,-0.556321,-0.235485,-0.319756,-0.267801,-0.362414
2023-09-05 21:00:00,0.132748,-0.546472,-0.697505,-0.850408,-0.824832,-0.563383,-0.238891,-0.319756,-0.267801,-0.362414
...,...,...,...,...,...,...,...,...,...,...
2023-09-19 23:10:00,-0.722202,-0.546472,-0.697505,-0.850408,0.969098,0.898885,1.135242,-0.319756,-0.267801,-0.362414
2023-09-19 23:20:00,-0.722202,-0.546472,-0.697505,-0.850408,0.958112,0.885717,1.129158,-0.319756,-0.267801,-0.362414
2023-09-19 23:30:00,-0.722202,-0.546472,-0.697505,-0.850408,0.946782,0.873174,1.122946,-0.319756,-0.267801,-0.362414
2023-09-19 23:40:00,-0.713316,-0.546472,-0.697505,-0.850408,0.934079,0.861515,1.116228,-0.319756,-0.267801,-0.362414


In [45]:
df_pred=pd.concat([pd.DataFrame(predictions), pd.DataFrame(x_test.iloc[:, 1:][WINDOW:])], axis=1)

In [46]:
rev_trans=scaler.inverse_transform(df_pred)

NameError: name 'scaler' is not defined

In [32]:
# isolating the timestamp column to merge with the predictions in a second moment
df_copy = df_scaled
df_copy = df_copy.reset_index()
df_copy = df_copy[['timestamp']]

In [None]:
# TRAIN DATA VISUALIZATION

train_predictions = lstm_model1.predict(X_train).flatten()
y_train = y_train.flatten()

# Ensure df_copy contains the 'timestamp' column
timestamp_training = df_copy['timestamp'].iloc[:int(SPLIT * len(X_temp))]
train_results = pd.DataFrame(data={'timestamp':timestamp_training, 'Train Predictions':train_predictions, 'Actuals':y_train})

# Plot the test results
plt.figure(figsize=(12, 5))
plt.plot(train_results['timestamp'], train_results['Actuals'], label='Actuals')
plt.plot(train_results['timestamp'], train_results['Train Predictions'], label='Train Predictions')

# Format x-axis to show only dates
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval= 2))  # Adjusts to show ticks daily

ax = plt.gca()  
ax.xaxis.grid(which="major", color="black", linestyle="--", alpha=0.3)  # Set x-axis grid

plt.xlabel('Time')
plt.ylabel('Volumetric Water Content Scaled')
plt.title('Train Predictions vs. Actuals')
plt.legend(['Actuals', 'Train Predictions'])
plt.xticks(rotation=45)  # Rotate the x-axis labels if they overlap
plt.show()
