<a href="https://colab.research.google.com/github/nitsundon/100DaysofML/blob/main/Day01/LSTMrev02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import plotly.express as px
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt

In [4]:
df= pd.read_pickle("/content/drive/MyDrive/Libraries/pickle/preprocessed_demand_df.pkl")
df['datetime']=pd.to_datetime(df['datetime'])


In [5]:
def create_datetime_features(df):


  df['block'] = 1+df['datetime'].dt.hour * 4 + df['datetime'].dt.minute // 15
  df['month'] = df['datetime'].dt.month
  df['day'] = df['datetime'].dt.day
  df['day_of_week'] = df['datetime'].dt.dayofweek
  df['year'] = df['datetime'].dt.year

  return df.copy();

In [6]:
def add_cyclic_features(df, col, max_val,drop_original=True):
    df[f'{col}_sin'] = np.sin(2 * np.pi * df[col]/max_val)
    df[f'{col}_cos'] = np.cos(2 * np.pi * df[col]/max_val)
    if(drop_original):
      df.drop(col,axis=1,inplace=True)
    return df


In [7]:
def inverse_cyclic_features(df, col, max_val):
    # Calculate the angle (theta) back from sin and cos
    theta = np.arctan2(df[f'{col}_sin'], df[f'{col}_cos'])

    # Normalize theta to be in [0, 2*pi]
    theta = (theta + 2 * np.pi) % (2 * np.pi)

    # Recover the original value
    df[col] = (theta * max_val) / (2 * np.pi)
    df[col]=df[col].astype(int)
    # Optionally, you can drop sin and cos columns if you want
    df.drop([f'{col}_sin', f'{col}_cos'], axis=1, inplace=True)

    return df

In [8]:
def create_lags(df,lag_steps):
  for i in range(1,lag_steps+1):
    df[f'demand_lag_{i}']=df['demand'].shift(i)
  return df

In [9]:
def create_lstm_feature_and_target(df):
  features = df.drop('demand', axis=1)
  target = df['demand']
  return features,target

In [10]:
def createXY(dataset,n_past):
    dataX = []
    dataY = []
    for i in range(n_past, len(dataset)):
            dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
            dataY.append(dataset[i,0])
    return np.array(dataX),np.array(dataY)




In [11]:
def inverse_scaled_features_df(df, scaler):
    df1 = df.copy()

    # Inverse cyclic features
    df1 = inverse_cyclic_features(df1, 'month', 12)
    df1 = inverse_cyclic_features(df1, 'day_of_week', 7)
    df1 = inverse_cyclic_features(df1, 'block', 96)
    df1 = inverse_cyclic_features(df1, 'day', 31)

    # Inverse scale selected columns
    cols_to_inverse = ['demand', 'year']
    s = scaler.inverse_transform(df1[cols_to_inverse])

    # Convert back to DataFrame to maintain structure
    s_df = pd.DataFrame(s, columns=cols_to_inverse, index=df1.index)

    # Update df1 with inverse transformed values
    df1[cols_to_inverse] = s_df

    return df1, scaler

In [12]:
def create_scaled_features_df(df,scaler):
  df1=df.copy()
  df1=add_cyclic_features(df1, 'month', 12, drop_original=True)
  df1=add_cyclic_features(df1,'day_of_week', 7, drop_original=True)
  df1=add_cyclic_features(df1,'block', 96, drop_original=True)
  df1=add_cyclic_features(df1,'day',31, drop_original=True)

  s= scaler.fit_transform(df1[['demand','year']])
  df1['demand']=s[:,0]
  df1['year']=s[:,1]
  return df1,scaler

In [13]:
def create_sequences_multi_output(data, seq_length, pred_length):
    X = []
    y = []
    for i in range(seq_length, len(data) - pred_length):
        X.append(data[i-seq_length:i, :-1])  # input features
        y.append(data[i:i+pred_length, -1])  # 192 future targets
    return np.array(X), np.array(y)



In [14]:
df1=create_datetime_features(df)
df1.columns
scalar=MinMaxScaler()
df1,scalar=create_scaled_features_df(df1,scalar)
df1,type(df1)

(                  datetime    demand  year  month_sin  month_cos  \
 0      2022-01-01 00:00:00  0.326487   0.0   0.500000   0.866025   
 1      2022-01-01 00:15:00  0.325481   0.0   0.500000   0.866025   
 2      2022-01-01 00:30:00  0.320685   0.0   0.500000   0.866025   
 3      2022-01-01 00:45:00  0.327153   0.0   0.500000   0.866025   
 4      2022-01-01 01:00:00  0.328219   0.0   0.500000   0.866025   
 ...                    ...       ...   ...        ...        ...   
 115153 2025-04-14 12:00:00  0.849394   1.0   0.866025  -0.500000   
 115154 2025-04-14 12:15:00  0.843943   1.0   0.866025  -0.500000   
 115155 2025-04-14 12:30:00  0.838462   1.0   0.866025  -0.500000   
 115156 2025-04-14 12:45:00  0.842602   1.0   0.866025  -0.500000   
 115157 2025-04-14 13:00:00  0.837958   1.0   0.866025  -0.500000   
 
         day_of_week_sin  day_of_week_cos  block_sin  block_cos   day_sin  \
 0             -0.974928        -0.222521   0.065403   0.997859  0.201299   
 1             -

In [15]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt

# Step 1: Read Data (if not already)
df = pd.read_pickle("/content/drive/MyDrive/Libraries/pickle/preprocessed_demand_df.pkl")# Load your dataset
df['datetime'] = pd.to_datetime(df['datetime'])
# df.set_index('datetime', inplace=True) # Remove this line to keep the features as columns
#Instead of setting datetime as index , we will use it to extract features.
df=create_datetime_features(df)
# Features and target
features = ['block', 'day', 'day_of_week', 'year', 'month']
target = ['demand']

# Step 2: Scale Data
scaler = MinMaxScaler()
#Use reset_index to make sure datetime is not index
scaled_data = scaler.fit_transform(df.reset_index()[features + target])

# Step 3: Create Sequences
def create_sequences_multi_output(data, seq_length, pred_length):
    X = []
    y = []
    for i in range(seq_length, len(data) - pred_length):
        X.append(data[i-seq_length:i, :-1])  # Input features
        y.append(data[i:i+pred_length, -1])  # Future demand
    return np.array(X), np.array(y)

# Set sequence length and prediction length
seq_length = 672  # 7 days (672 blocks)
pred_length = 192  # 2 days (192 blocks)

# Create sequences
X, y = create_sequences_multi_output(scaled_data, seq_length, pred_length)

# Step 4: Split Data into Training and Testing
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Step 5: Build LSTM Model
model = Sequential()
model.add(LSTM(128, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(64, activation='relu'))
model.add(Dense(pred_length))  # Output layer has 192 units for the 192 steps forecast

model.compile(optimizer='adam', loss='mse')

# Show model summary
model.summary()

# Step 6: Early Stopping and Reduce Learning Rate Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

# Step 7: Train Model
history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test),
                    callbacks=[early_stop, reduce_lr])

# Step 8: Plot Training History
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.show()

# Step 9: Make Predictions for 192 Future Blocks (next 2 days)
# Take last known sequence (latest 672 blocks)
last_sequence = scaled_data[-seq_length:, :-1]
last_sequence = np.expand_dims(last_sequence, axis=0)  # Shape (1, seq_length, features)

# Predict future demand for 192 blocks ahead
future_pred = model.predict(last_sequence)

# Inverse scale the predictions
demand_scaler = MinMaxScaler()
demand_scaler.min_, demand_scaler.scale_ = scaler.min_[-1], scaler.scale_[-1]
future_pred_rescaled = demand_scaler.inverse_transform(future_pred.reshape(-1, 1))

# Step 10: Plot the Forecast (192 future blocks)
plt.figure(figsize=(10, 6))
plt.plot(future_pred_rescaled, label='Forecasted Demand (Next 192 Blocks)')
plt.title("Forecasted Demand for Next 2 Days (192 Blocks)")
plt.xlabel('Time (blocks)')
plt.ylabel('Demand')
plt.legend()
plt.show()

  super().__init__(**kwargs)


Epoch 1/50
[1m 167/1422[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m39:31[0m 2s/step - loss: nan

KeyboardInterrupt: 