In [25]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

Load the data

In [26]:
input_file = 'data/processed_data.csv'
df = pd.read_csv(input_file)

Rename the columns for better readability

In [27]:
df.columns = ['DateTime', 'Year', 'Month', 'Date', 'Time', 'Minute', 'Temperature', 'Previous Day Average', 'Two Days Before Average',
              'Three Days Before average', 'Last 7 Days Average', 'Previous Day Wind Speed', 'Previous Day Rainfall']

Convert the 'Date' and 'Time' columns to integers

In [28]:
df['Date'] = df['Date'].astype(int)
df['Time'] = df['Time'].astype(int)

Fill leading zeros for the 'Time' column

In [29]:
df['Time'] = df['Time'].apply(lambda x: str(x).zfill(4))

Combine the 'Date' and 'Time' columns into a single 'DateTime' column

In [30]:
df['DateTime'] = pd.to_datetime(df['DateTime'].astype(int).astype(str) + df['Time'].astype(int).astype(str) + df['Minute'].astype(str), format='%Y%m%d%H%M')

Remove rows with a specific value (e.g., 32767) in 'Temperature' column

In [31]:
df = df[df['Temperature'] != 32767]

Calculate the mean and standard deviation of Y

In [32]:
threshold = 5
mean_Y = np.mean(df['Temperature'])
std_Y = np.std(df['Temperature'])

Define the range of acceptable Y values

In [33]:
lower_bound = mean_Y - threshold * std_Y
upper_bound = mean_Y + threshold * std_Y

Filter out rows with Y values outside the acceptable range

In [34]:
df = df[(df['Temperature'] >= lower_bound) & (df['Temperature'] <= upper_bound)]

Prepare the data for LSTM

In [35]:
time_steps = 60  # Number of time steps for the LSTM model
scaler = MinMaxScaler(feature_range=(0, 1))  # Scale the data to [0, 1]

Scale the temperature values

In [36]:
scaled_temperature = scaler.fit_transform(df['Temperature'].values.reshape(-1, 1))

Create sequences of input data and corresponding target values

In [37]:
data = []
target = []
for i in range(len(scaled_temperature) - time_steps):
    data.append(scaled_temperature[i:i+time_steps])
    target.append(scaled_temperature[i+time_steps])

In [38]:
data = np.array(data)
target = np.array(target)

Split the dataset into training and validation sets (80% for training, 20% for validation)

In [39]:
train_size = int(0.8 * len(data))
train_data, train_target = data[:train_size], target[:train_size]
val_data, val_target = data[train_size:], target[train_size:]

Build the LSTM model architecture

In [40]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(time_steps, 1)),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

2023-06-28 21:44:45.341116: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-28 21:44:45.343842: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-28 21:44:45.344983: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Compile the model

In [41]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='mean_absolute_error', optimizer=optimizer)

In [42]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

Train the LSTM model

In [43]:
batch_size = 64
epochs = 100
history = model.fit(train_data, train_target, batch_size=batch_size, epochs=epochs, validation_data=(val_data, val_target), callbacks=[early_stopping, lr_scheduler])

Epoch 1/100


2023-06-28 21:44:46.087934: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-28 21:44:46.089541: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-28 21:44:46.090488: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Make predictions using the trained LSTM model

In [None]:
predictions = model.predict(val_data)

Rescale the predictions back to the original range

In [None]:
scaled_predictions = predictions.reshape(-1, 1)
predicted_temperature = scaler.inverse_transform(scaled_predictions)

Visualize the actual vs. predicted temperatures for the validation set

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(range(len(val_target)), val_target, label='Actual')
plt.plot(range(len(val_target)), predicted_temperature, label='Predicted')
plt.xlabel('Time')
plt.ylabel('Temperature')
plt.title('Actual vs. Predicted Temperatures (Validation Set)')
plt.legend()
plt.show()

In [None]:
model.save("lstm.keras")