In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np


file_uri = 'D:\Work\Home-2023\may-project\labs\outputsx\merged\merged_บ้านสบป้าด.csv.csv'
# Load the dataset
data = pd.read_csv(file_uri)

# Select relevant features (assuming 'SO2' is your target)
features = ['Value_TEMP', 'Value_RH', 'Value_SO2', 'Value_NO2', 'Value_PM10']
data_selected = data[features]

# Fill missing values if needed
data_selected = data_selected.interpolate(method='linear')

# Normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data_selected)

# Function to create dataset for LSTM
def create_dataset(dataset, look_back=48):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back), 0:-1]
        dataX.append(a)
        dataY.append(dataset[i + look_back, -1])
    return np.array(dataX), np.array(dataY)

# Prepare data for LSTM
look_back = 48
X, y = create_dataset(data_scaled, look_back)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dense

# GPU configuration (optional, for fine-tuning performance)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

# Rest of your model code
model = Sequential()

model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2]), activation='tanh', recurrent_activation='sigmoid', return_sequences=True))
model.add(LSTM(50, activation='tanh', recurrent_activation='sigmoid'))

model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=72, validation_data=(X_test, y_test), verbose=2, shuffle=False)



Epoch 1/20
1169/1169 - 16s - loss: 0.0040 - val_loss: 0.0032 - 16s/epoch - 14ms/step
Epoch 2/20
1169/1169 - 12s - loss: 0.0033 - val_loss: 0.0030 - 12s/epoch - 10ms/step
Epoch 3/20
1169/1169 - 12s - loss: 0.0031 - val_loss: 0.0029 - 12s/epoch - 11ms/step
Epoch 4/20
1169/1169 - 13s - loss: 0.0030 - val_loss: 0.0029 - 13s/epoch - 11ms/step
Epoch 5/20
1169/1169 - 13s - loss: 0.0028 - val_loss: 0.0028 - 13s/epoch - 11ms/step
Epoch 6/20
1169/1169 - 13s - loss: 0.0027 - val_loss: 0.0026 - 13s/epoch - 11ms/step
Epoch 7/20
1169/1169 - 13s - loss: 0.0025 - val_loss: 0.0025 - 13s/epoch - 11ms/step
Epoch 8/20
1169/1169 - 13s - loss: 0.0024 - val_loss: 0.0023 - 13s/epoch - 11ms/step
Epoch 9/20
1169/1169 - 14s - loss: 0.0023 - val_loss: 0.0022 - 14s/epoch - 12ms/step
Epoch 10/20
1169/1169 - 13s - loss: 0.0022 - val_loss: 0.0021 - 13s/epoch - 12ms/step
Epoch 11/20
1169/1169 - 13s - loss: 0.0021 - val_loss: 0.0021 - 13s/epoch - 11ms/step
Epoch 12/20
1169/1169 - 13s - loss: 0.0021 - val_loss: 0.0021 -

<keras.callbacks.History at 0x266420a9dc0>

In [4]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Make predictions
y_pred = model.predict(X_test)

# Invert scaling for prediction - We need to create a dummy array with the same shape as the input features
y_pred_rescaled = np.concatenate((np.zeros((y_pred.shape[0], data_scaled.shape[1]-1)), y_pred), axis=1)
y_pred_inv = scaler.inverse_transform(y_pred_rescaled)[:, -1]  # Select only the last column (SO2)

# Invert scaling for actual values
y_test_rescaled = np.concatenate((np.zeros((y_test.shape[0], data_scaled.shape[1]-1)), y_test.reshape(-1, 1)), axis=1)
y_test_inv = scaler.inverse_transform(y_test_rescaled)[:, -1]  # Select only the last column (SO2)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))
print('Test RMSE: %.3f' % rmse)


Test RMSE: 19.600


In [7]:
import matplotlib.pyplot as plt
import pandas as pd

# Assuming 'data' is your original DataFrame and 'model' is your trained LSTM model

## Assuming 'data' is your original DataFrame and 'model' is your trained LSTM model

predicted_SO2 = []
input_data = data_scaled[-look_back:]  # Initial input data

for i in range(48):  # Predicting 48 steps
    # Reshape and predict
    input_data_reshaped = np.reshape(input_data, (1, look_back, data_scaled.shape[1]))
    prediction = model.predict(input_data_reshaped)

    # Inverse scale the prediction and append to results
    prediction_rescaled = np.concatenate((np.zeros((prediction.shape[0], data_scaled.shape[1]-1)), prediction), axis=1)
    prediction_SO2 = scaler.inverse_transform(prediction_rescaled)[:, -1]
    predicted_SO2.append(prediction_SO2[0])

    # Update input_data with the new prediction
    input_data = np.vstack((input_data[1:], prediction))

# Generate timestamps for the next 48 hours
predicted_timestamps = pd.date_range(start=data['Datetime'].iloc[-1], periods=49, freq='H')[1:]

# Plot
plt.figure(figsize=(12, 6))
plt.plot(predicted_timestamps, predicted_SO2, label='Predicted SO2')
plt.xlabel('Time')
plt.ylabel('SO2 Levels')
plt.title('SO2 Level Prediction for the Next 48 Hours')
plt.legend()
plt.show()




ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 5 and the array at index 1 has size 1