In [168]:
#!/usr/bin/env python
# coding: utf-8

In[121]:

In [169]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

Load the data

In[122]:

In [170]:
input_file = 'data/processed_data.csv'
df = pd.read_csv('data/processed_data.csv',
                 parse_dates=['DateTime'], index_col='DateTime')
df = df.resample('60T').mean()
df = df[(df['Temperature'] >= df['Temperature'].quantile(0.1)) &
        (df['Temperature'] <= df['Temperature'].quantile(0.9))]
# df["DateTime"] = pd.to_datetime(df.index)
df = df.reset_index()

Rename the columns for better readability

In[123]:

df.columns = ['DateTime', 'Year', 'Month', 'Date', 'Time', 'Minute', 'Temperature', 'Previous Day Average', 'Two Days Before Average',<br>
'Three Days Before average', 'Last 7 Days Average', 'Previous Day Wind Speed', 'Previous Day Rainfall']

Convert the 'Date' and 'Time' columns to integers

In[124]:

In [171]:
df['Date'] = df['Date'].astype(int)
df['Time'] = df['Time'].astype(int)

Fill leading zeros for the 'Time' column

In[125]:

In [172]:
df['Time'] = df['Time'].apply(lambda x: str(x).zfill(4))

Combine the 'Date' and 'Time' columns into a single 'DateTime' column

In[126]:

df['DateTime'] = pd.to_datetime(df["DateTime"], format="%Y-%m-%d %H:%M:%S")

In[ ]:

Remove rows with a specific value (e.g., 32767) in 'Temperature' column

In[127]:

In [173]:
df = df[df['Temperature'] != 32767]

Calculate the mean and standard deviation of Y

In[128]:

In [174]:
threshold = 5
mean_Y = np.mean(df['Temperature'])
std_Y = np.std(df['Temperature'])

Define the range of acceptable Y values

In[129]:

In [175]:
lower_bound = mean_Y - threshold * std_Y
upper_bound = mean_Y + threshold * std_Y

Filter out rows with Y values outside the acceptable range

In[130]:

In [176]:
df = df[(df['Temperature'] >= lower_bound) &
        (df['Temperature'] <= upper_bound)]

Prepare the data for LSTM

In[131]:

In [177]:
time_steps = 60  # Number of time steps for the LSTM model
scaler = MinMaxScaler(feature_range=(0, 1))  # Scale the data to [0, 1]

Create sequences of input data and corresponding target values

In[133]:

Filter out the outliers and invalid values for the new features<br>
Replace the specific invalid values (e.g., 32767) with np.nan

In [178]:
df['Wind Speed'] = df['Wind Speed'].replace(32767, np.nan)
df['Rainfall'] = df['Rainfall'].replace(32767, np.nan)
df['Wind Direction'] = df['Wind Direction'].replace(32767, np.nan)
df = df[(df['Wind Direction'] >= 0) & (df['Wind Direction'] <= 360)]


Remove rows with missing values

In [179]:
df = df.dropna()

In [None]:
df

Scale the temperature, wind speed, rainfall, and wind direction values

In [180]:
features = df[['Temperature', 'Wind Speed',
               'Rainfall', 'Wind Direction']].values
scaled_features = scaler.fit_transform(features)

Create sequences of input data and corresponding target values

In [181]:
data = []
target = []
for i in range(24, len(scaled_features) - time_steps):
    data.append(scaled_features[i-24:i+time_steps-24])
    # Only the temperature is the target
    target.append(scaled_features[i+time_steps, 0])

In [182]:
data = np.array(data)
target = np.array(target)

In[135]:

In [183]:
df["DateTime"]

2       2008-01-02 16:00:00
3       2008-01-03 09:00:00
5       2008-01-03 12:00:00
6       2008-01-03 13:00:00
7       2008-01-03 14:00:00
                ...        
99991   2023-06-29 10:00:00
99992   2023-06-29 18:00:00
99993   2023-06-29 19:00:00
99994   2023-06-29 20:00:00
99995   2023-06-29 21:00:00
Name: DateTime, Length: 59107, dtype: datetime64[ns]

Define exclude date (year, month, and day)

In[136]:

In [184]:
exclude_year = 2022
exclude_month = 6
exclude_day = 1

Split the dataset into training and validation sets

In[137]:

In [185]:
exclude_date = pd.to_datetime(f"{exclude_year}-{exclude_month}-{exclude_day}")
exclude_index = df[df['DateTime'] >= exclude_date].index[0]
print(exclude_index)
train_data, train_target = data[:exclude_index], target[:exclude_index]
val_data, val_target = data[exclude_index:], target[exclude_index:]

99483


Build the LSTM model architecture

In[138]:

In [186]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(256, return_sequences=True,
                         input_shape=(time_steps, 4)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(256, return_sequences=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(256, return_sequences=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(256),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1)
])

2023-06-29 22:26:25.897179: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-29 22:26:25.899043: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-29 22:26:25.900056: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Compile the model

In[139]:

In [187]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='mean_absolute_error', optimizer=optimizer)

Define early stopping and learning rate scheduler

In[140]:

In [188]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=3, restore_best_weights=True)
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

Train the LSTM model

In[141]:

In [189]:
batch_size = 64
epochs = 100
history = model.fit(train_data, train_target, batch_size=batch_size, epochs=epochs, validation_data=(val_data, val_target),
                    callbacks=[early_stopping, lr_scheduler])

Epoch 1/100


2023-06-29 22:26:27.429929: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-29 22:26:27.431725: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-29 22:26:27.432747: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



KeyboardInterrupt: 

Make predictions using the trained LSTM model

In[142]:

In [None]:
predictions = model.predict(val_data)

Rescale the predictions back to the original range

In[143]:

In [None]:
scaled_predictions = predictions.reshape(-1, 1)
predicted_temperature = scaler.inverse_transform(scaled_predictions)

Calculate MSE and MAE

In[144]:

In [None]:
mse = mean_squared_error(scaler.inverse_transform(
    val_target), predicted_temperature)
mae = mean_absolute_error(scaler.inverse_transform(
    val_target), predicted_temperature)

In[145]:

In [None]:
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)

Visualize the actual vs. predicted temperatures for the validation set

In[146]:

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(range(len(val_target)), scaler.inverse_transform(
    val_target), label='Actual')
plt.plot(range(len(val_target)), predicted_temperature, label='Predicted')
plt.xlabel('Time')
plt.ylabel('Temperature')
plt.title('Actual vs. Predicted Temperatures (Validation Set)')
plt.legend()
plt.show()

Visualize the actual vs. predicted temperatures for the training set

In[147]:

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(range(len(train_target)), train_target, label='Actual')
plt.plot(range(len(train_target)), model.predict(
    train_data), label='Predicted')
plt.xlabel('Time')
plt.ylabel('Temperature')
plt.title('Actual vs. Predicted Temperatures (Training Set)')
plt.legend()
plt.show()

Save the trained model

In[148]:

In [None]:
model.save("lstm.keras")

In [191]:
# Load the pre-trained model
from datetime import timedelta

dates = pd.date_range('2023-06-29 00:00:00', '2023-06-29 23:00:00', freq='H')
model = tf.keras.models.load_model("lstm.keras")

# Predict temperatures for each hour on June 30, 2023
predicted_temperatures = []
for i in range(len(dates)):
    try:
        end_index = df[df["DateTime"] == dates[i] - timedelta(days=1)].index[0]
        input_data = scaled_features[end_index-time_steps:end_index]
        input_data = input_data.reshape(4, time_steps, 1)
        prediction = model.predict(input_data)
        predicted_temperature = scaler.inverse_transform(prediction)[0][0]
        predicted_temperatures.append(predicted_temperature)
        scaled_temperature = np.append(scaled_temperature, prediction, axis=0)
    except IndexError as e:
        print(e)

# Print the predicted temperatures per hour for June 30, 2023
for i, dt in enumerate(dates):
    print(f'{dt}: {predicted_temperatures[i]:.2f}°C')


2023-06-29 22:30:42.521147: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-29 22:30:42.524014: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-29 22:30:42.525809: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

too many indices for array: array is 0-dimensional, but 2 were indexed
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
too many indices for array: array is 0-dimensional, but 2 were indexed
too many indices for array: array is 0-dimensional, but 2 were indexed
too many indices for array: array is 0-dimensional, but 2 were indexed
too many indices for array: array is 0-dimensional, but 2 were indexed
too many indices for array: array is 0-dimensional, but 2 were indexed
too many indices for array: array is 0-dimensional, but 2 were indexed
too many indices for array: array is 0-dimensional, but 2 were indexed
too many indices for array: array is 0-dimensional, but 2 were indexed
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
too many indices for array: array is 0-dimensional, but 2 were indexed
too many indices for array: array is 0-dimensional, but 2 were indexed
too many indices for array:

2023-06-29 22:30:42.789570: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-29 22:30:42.806040: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-29 22:30:42.828704: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

IndexError: list index out of range