In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.saving import register_keras_serializable
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow.keras.backend as K


In [2]:
#define metrics
# Decorate the custom rmse function
@register_keras_serializable()
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

def r_squared(y_true, y_pred):
    ss_res = K.sum(K.square(y_true - y_pred))
    ss_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return (1 - ss_res/(ss_tot + K.epsilon()))

In [4]:
#read in training data
#read in numpy arrays 
X_train=np.load("/Users/ttrefoni/Documents/tt_pm25_tuning/data/updt_seq_npy_arrays_80_20/x_train.npy")
X_test=np.load("/Users/ttrefoni/Documents/tt_pm25_tuning/data/updt_seq_npy_arrays_80_20/x_test.npy")
y_train=np.load("/Users/ttrefoni/Documents/tt_pm25_tuning/data/updt_seq_npy_arrays_80_20/y_train.npy")
y_test=np.load("/Users/ttrefoni/Documents/tt_pm25_tuning/data/updt_seq_npy_arrays_80_20/y_test.npy")

In [8]:
#define HPS
epoch=30
batchsize=32
units1=50
units2=50
lrate=0.001

In [9]:
#Define model
model = Sequential()
model.add(LSTM(units1, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units2, return_sequences=False))  # Second LSTM layer with 100 units, return_sequences is False for the final LSTM layer
model.add(Dense(1))
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')


In [10]:
# Train the model
history = model.fit(X_train, y_train, epochs=epoch, batch_size=batchsize, verbose=2)
# Create a DataFrame from the training history
history_df = pd.DataFrame(history.history)
history_df['epoch'] = history_df.index + 1  # Add epoch column

# Display the DataFrame
print(history_df)

Epoch 1/30
19110/19110 - 128s - 7ms/step - loss: 33.4058
Epoch 2/30
19110/19110 - 127s - 7ms/step - loss: 26.2592
Epoch 3/30
19110/19110 - 125s - 7ms/step - loss: 24.7968
Epoch 4/30
19110/19110 - 124s - 6ms/step - loss: 23.8085
Epoch 5/30
19110/19110 - 126s - 7ms/step - loss: 22.8927
Epoch 6/30
19110/19110 - 125s - 7ms/step - loss: 22.1253
Epoch 7/30
19110/19110 - 131s - 7ms/step - loss: 21.4958
Epoch 8/30
19110/19110 - 126s - 7ms/step - loss: 20.8727
Epoch 9/30
19110/19110 - 126s - 7ms/step - loss: 20.4871
Epoch 10/30
19110/19110 - 127s - 7ms/step - loss: 20.0671
Epoch 11/30
19110/19110 - 127s - 7ms/step - loss: 19.6238
Epoch 12/30
19110/19110 - 127s - 7ms/step - loss: 19.1411
Epoch 13/30
19110/19110 - 127s - 7ms/step - loss: 18.7575
Epoch 14/30
19110/19110 - 128s - 7ms/step - loss: 18.5906
Epoch 15/30
19110/19110 - 129s - 7ms/step - loss: 18.2843
Epoch 16/30
19110/19110 - 128s - 7ms/step - loss: 17.9994
Epoch 17/30
19110/19110 - 128s - 7ms/step - loss: 17.7998
Epoch 18/30
19110/19110

In [7]:
#save history as df
history_df.to_csv('history_df_TEST_v1.csv', index=False)

In [11]:
# Predict and calc metrics  
y_pred = model.predict(X_test)
print(y_pred)
#calculate rmse
rmse_value = np.sqrt(mean_squared_error(y_test,y_pred))
#calculate r2
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {rmse_value}')
print(f'R^2 Score: {r2}')

[1m4778/4778[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step
[[21.92774 ]
 [ 9.668981]
 [ 8.990298]
 ...
 [11.316569]
 [14.353521]
 [20.27524 ]]
Mean Squared Error: 4.2392018192576835
R^2 Score: 0.8587570916512193


In [ ]:
#pull out humidity, temp, org purple air for prediction visulizaiton 
X_test

In [12]:
#record model parameters and metrics 
#save metrics
data = {'epoch': epoch, 
        'batch':batchsize,
        'units1': units1,
        'units2':units2,
        'lrate':lrate,
        'layers':[2],
        'rmse': [rmse_value],
        'r2': [r2]}

df = pd.DataFrame(data)

# Save to CSV
df.to_csv('metrics.csv', index=False)

In [12]:
#write predictions to csv
df = pd.DataFrame({'prediction_python': y_pred.flatten(), 'y_test_column': y_test.flatten()})

print(df)

df.to_csv('C:/Users/ttrefoni/Documents/tt_pm25_tuning/predictions/pred_python_base_LSTM.csv')

        prediction_python  y_test_column
0               21.927740           17.0
1                9.668981           12.0
2                8.990298            9.0
3                8.050897            6.0
4                8.584639           23.0
...                   ...            ...
152874           2.727704            0.9
152875           2.840891            2.7
152876          11.316569           10.2
152877          14.353521           17.0
152878          20.275240           17.9

[152879 rows x 2 columns]


In [14]:
#save model
model.save("LSTM_50_64_200_100_python_updtd.keras")