In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error

df = pd.read_csv('../City-Specific Data/Tuscon.csv')
df = df.dropna()

features = df[['Temperature', 'DewPoint', 'RelativeHumidity', 'Precipitation', 'WindSpeed', 'Pressure']].values
energy_demand = df['D'].values.reshape(-1, 1)
dates = df['time'].values  

scaler_features = MinMaxScaler()
scaler_energy = MinMaxScaler()
features_normalized = scaler_features.fit_transform(features)
energy_demand_normalized = scaler_energy.fit_transform(energy_demand)

def create_sequences(data, target, dates, seq_length):
    sequences = []
    targets = []
    sequence_dates = []
    for i in range(len(data) - seq_length):
        seq = data[i:i+seq_length]
        target_seq = target[i+seq_length]
        date_seq = dates[i+seq_length] 
        sequences.append(seq)
        targets.append(target_seq)
        sequence_dates.append(date_seq)
    return np.array(sequences), np.array(targets), np.array(sequence_dates)

seq_length = 24
X, y, dates_seq = create_sequences(features_normalized, energy_demand_normalized, dates, seq_length)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

mse_scores = []
mae_scores = []
mape_scores = []

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    non_zero_indices = y_true != 0
    return np.mean(np.abs((y_true[non_zero_indices] - y_pred[non_zero_indices]) / y_true[non_zero_indices])) * 100

result_df = pd.DataFrame()

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    dates_test = dates_seq[test_index]

    model = Sequential([
        LSTM(50, activation='relu', input_shape=(seq_length, X.shape[2])),
        Dense(1)
    ])

    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

    history = model.fit(X_train, y_train, epochs=25, batch_size=32, validation_split=0.1, verbose=1)

    predictions_normalized = model.predict(X_test)

    predictions = scaler_energy.inverse_transform(predictions_normalized)
    y_test_original = scaler_energy.inverse_transform(y_test)

    mse = mean_squared_error(y_test_original, predictions)
    mae = mean_absolute_error(y_test_original, predictions)
    mape = mean_absolute_percentage_error(y_test_original, predictions)
    mse_scores.append(mse)
    mae_scores.append(mae)
    mape_scores.append(mape)

    fold_results = pd.DataFrame({
        'Date': dates_test,
        'Actual': y_test_original.flatten(),
        'Predicted': predictions.flatten()
    })
    result_df = pd.concat([result_df, fold_results], ignore_index=True)

result_df.to_csv('/Users/sanjeevsubramanian/Downloads/tuscon_predictions.csv', index=False)

average_mse = np.mean(mse_scores)
average_mae = np.mean(mae_scores)
average_mape = np.mean(mape_scores)
print(f"Average Mean Squared Error: {average_mse}")
print(f"Average Mean Absolute Error: {average_mae}")
print(f"Average Mean Absolute Percentage Error: {average_mape}")

plt.figure(figsize=(12, 6))
plt.plot(result_df['Actual'][:100], label='Actual')
plt.plot(result_df['Predicted'][:100], label='Predicted')
plt.title('Predicted vs Actual Energy Demand for Baltimore (Last Fold)')
plt.xlabel('Time')
plt.ylabel('Energy Demand')
plt.legend()
plt.show()

Epoch 1/25


  super().__init__(**kwargs)


[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - loss: 1.8979e-04 - val_loss: 4.3879e-05
Epoch 2/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - loss: 5.0268e-05 - val_loss: 1.3091e-05
Epoch 3/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - loss: 3.9053e-05 - val_loss: 9.5149e-06
Epoch 4/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 5.0321e-05 - val_loss: 9.7921e-06
Epoch 5/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 4.6562e-05 - val_loss: 1.3423e-05
Epoch 6/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - loss: 7.8105e-05 - val_loss: 1.4013e-05
Epoch 7/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - loss: 1.2244e-04 - val_loss: 8.5516e-06
Epoch 8/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - los

  super().__init__(**kwargs)


[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - loss: 1.8639e-04 - val_loss: 1.2421e-05
Epoch 2/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 8.9044e-05 - val_loss: 1.4891e-05
Epoch 3/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 5.0201e-05 - val_loss: 1.0437e-05
Epoch 4/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - loss: 1.0888e-04 - val_loss: 2.2348e-05
Epoch 5/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - loss: 6.8543e-05 - val_loss: 1.1271e-05
Epoch 6/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - loss: 7.7667e-05 - val_loss: 1.3798e-05
Epoch 7/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - loss: 3.6183e-05 - val_loss: 1.3762e-05
Epoch 8/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - loss:

  super().__init__(**kwargs)


[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - loss: 1.0888e-04 - val_loss: 1.1520e-05
Epoch 2/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 3.5628e-05 - val_loss: 1.3218e-05
Epoch 3/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - loss: 2.8122e-05 - val_loss: 1.5812e-05
Epoch 4/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - loss: 2.2155e-05 - val_loss: 1.2221e-05
Epoch 5/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 6.7260e-05 - val_loss: 9.8784e-06
Epoch 6/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 7.8784e-05 - val_loss: 1.2909e-05
Epoch 7/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - loss: 2.7802e-05 - val_loss: 1.1883e-05
Epoch 8/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss:

  super().__init__(**kwargs)


[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - loss: 9.1355e-05 - val_loss: 2.5006e-05
Epoch 2/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 3.0530e-05 - val_loss: 1.3775e-05
Epoch 3/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 2.7253e-05 - val_loss: 1.3691e-05
Epoch 4/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 7.4265e-05 - val_loss: 1.0262e-05
Epoch 5/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 3.8311e-05 - val_loss: 9.6433e-06
Epoch 6/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 3.1412e-05 - val_loss: 1.2860e-05
Epoch 7/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 3.3070e-05 - val_loss: 9.4243e-06
Epoch 8/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 3.7

  super().__init__(**kwargs)


[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 11ms/step - loss: 5.2955e-05 - val_loss: 1.7908e-05
Epoch 2/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 2.3781e-05 - val_loss: 1.9424e-05
Epoch 3/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 2.0808e-05 - val_loss: 9.5735e-06
Epoch 4/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - loss: 1.9385e-05 - val_loss: 7.5521e-06
Epoch 5/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 9ms/step - loss: 1.8488e-05 - val_loss: 9.4958e-06
Epoch 6/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - loss: 1.8402e-05 - val_loss: 7.8133e-06
Epoch 7/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - loss: 1.8289e-05 - val_loss: 1.0160e-05
Epoch 8/25
[1m1706/1706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 8ms/step - lo

OSError: Cannot save file into a non-existent directory: '../predictions'

In [8]:
result_df.to_csv('/Users/sanjeevsubramanian/Downloads/tuscon_predictions.csv', index=False)