<a href="https://colab.research.google.com/github/nitsundon/Load-Forecast/blob/main/XGBoostModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import pandas as pd
import drive.MyDrive.Libraries.wrldc_file_handler as wfh
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import numpy as np

import plotly.express as px

In [11]:
df4= pd.read_pickle("/content/drive/MyDrive/Libraries/pickle/preprocessed_demand_df.pkl")

df4=df4

lag_iteration=2

In [12]:
feature_col=[ 'Hour', 'DayOfWeek', 'Month', 'Quarter', 'Year',
       'DayOfyear', 'DayOfmonth', 'WeekOfYear', 'BlockNumber','lag_A','lag_B']
output_col=['demand']

df7=df4[~df4['demand'].isna()].copy()
wfh.add_datetime_features(df7)

for i in range(1,lag_iteration):
  df7['lag_'+str(i)] = df7['demand'].shift(i)
  feature_col.append('lag_'+str(i))

df7['lag_A'] = df7['demand'].shift(96)
df7['lag_B'] = df7['demand'].shift(672)

split_ratio = 0.99
split_index = int(len(df7) * split_ratio)

train, test = df7[:split_index], df7[split_index:]
X=train[feature_col]
Y=train[output_col]
X_test=test[feature_col]
y_test=test[output_col]
model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5)

# Train Model
model.fit(X,Y)

# Make Predictions
y_pred = model.predict(X_test)

if split_ratio<1:
  # Evaluate Performance
  mae = mean_absolute_error(y_test, y_pred)
  print(f'Mean Absolute Error: {mae}')



Mean Absolute Error: 138.24615478515625


In [13]:
df7['DayOfWeek'].unique(),df7.columns

(array([6, 7, 1, 2, 3, 4, 5], dtype=int32),
 Index(['datetime', 'demand', 'Hour', 'DayOfWeek', 'Month', 'Quarter', 'Year',
        'DayOfyear', 'DayOfmonth', 'WeekOfYear', 'BlockNumber', 'lag_1',
        'lag_A', 'lag_B'],
       dtype='object'))

In [14]:
df8=df4[~df4['demand'].isna()].copy()
last_datetime = df8['datetime'].max()
future_datetimes = pd.date_range(start=last_datetime + pd.Timedelta(minutes=15), periods=192, freq='15min')
for i in range(192):
  df8=df8[['datetime','demand']]
  new_row = {
            'datetime': pd.to_datetime(future_datetimes[i]),
            'demand': 0  # or np.nan if using numpy
        }
  new_row_df = pd.DataFrame([new_row])

  df8 = pd.concat([df8, new_row_df], ignore_index=True)
  wfh.add_datetime_features(df8)
  for i in range(1,lag_iteration):
    df8['lag_'+str(i)] = df8['demand'].shift(i)

  df8['lag_A'] = df8['demand'].shift(96)
  df8['lag_B'] = df8['demand'].shift(672)
  input=df8.tail(1)[feature_col]
  output= model.predict(input)
  df8.at[df8.index[-1], output_col[0]] = output[0]

In [15]:
df9=df4[~df4['demand'].isna()].copy()


In [16]:
import plotly.graph_objects as go

# Create base figure
fig = go.Figure()

# Add line plot for df8 (last 96 rows)
fig.add_trace(go.Scatter(
    x=df8['datetime'],
    y=df8['demand'],
    mode='lines',
    name='Predicted Demand'
))

# Update layout
fig.update_layout(
    title="State Demand Over Time (15-Min Blocks)",
    xaxis_title="Time",
    yaxis_title="Demand (MW)",
    template="plotly_dark",
    # yaxis=dict(range=[0, 35000])
)

fig.show()

In [17]:
importance_dict = model.get_booster().get_score(importance_type='weight')  # 'weight', 'gain', or 'cover'

# Convert to DataFrame for easy plotting
importance_df = pd.DataFrame({
    'Feature': list(importance_dict.keys()),
    'Importance': list(importance_dict.values())
}).sort_values(by='Importance', ascending=False)

print(importance_df)

        Feature  Importance
7   BlockNumber      1044.0
10        lag_1       807.0
0          Hour       312.0
4     DayOfyear       294.0
8         lag_A       208.0
9         lag_B        97.0
3          Year        90.0
6    WeekOfYear        82.0
1     DayOfWeek        62.0
2         Month        47.0
5    DayOfmonth        25.0
