# Predict Energy Consumption

In [72]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.optimizers import Adam
from keras.regularizers import l2
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping

## Load Data

In [4]:
#load Traning data
df_train=pd.read_csv('df_train.csv')
df_test=pd.read_csv('df_test.csv')

In [6]:
df_train.head()
df_test.head()

Unnamed: 0,date,power_consumption,year,semester,quarter,day_in_week,week_in_year,day_in_year,month
0,4/1/2010,1687.44,2010,1,2,Thu,13,91,4
1,4/2/2010,1648.622,2010,1,2,Fri,14,92,4
2,4/3/2010,1805.278,2010,1,2,Sat,14,93,4
3,4/4/2010,1690.388,2010,1,2,Sun,14,94,4
4,4/5/2010,1118.82,2010,1,2,Mon,14,95,4


## Data Cleaning

Convert the class of the date column from "character" to "Date", and the class of day_in_week column from character to factor for both datasets. Create indicator variables for factor columns

In [10]:
# Check the current data type of the 'date' column
print(df_train['date'].dtype)

object


In [12]:
# Convert 'date' column to datetime
df_train['date'] = pd.to_datetime(df_train['date'], format='%m/%d/%Y')
df_test['date'] = pd.to_datetime(df_test['date'], format='%m/%d/%Y')

In [14]:
# Check the current data type of the 'day_in_week' column
print(df_train['day_in_week'].dtype) 
print(df_test['day_in_week'].dtype) 

# Convert 'day_in_week' column to factor (categorical) type
df_train['day_in_week'] = pd.Categorical(df_train['day_in_week'])
df_test['day_in_week'] = pd.Categorical(df_test['day_in_week'])

object
object


In [16]:
df_train.head()

Unnamed: 0,date,power_consumption,year,semester,quarter,day_in_week,week_in_year,day_in_year,month
0,2006-12-16,1209.176,2006,2,4,Sat,50,350,12
1,2006-12-17,3390.46,2006,2,4,Sun,51,351,12
2,2006-12-18,2203.826,2006,2,4,Mon,51,352,12
3,2006-12-19,1666.194,2006,2,4,Tue,51,353,12
4,2006-12-20,2225.748,2006,2,4,Wed,51,354,12


In [18]:
df_train_one_hot = pd.get_dummies(df_train, columns=['day_in_week'])
df_test_one_hot = pd.get_dummies(df_test, columns=['day_in_week'])

In [20]:
df_train_one_hot.head()

Unnamed: 0,date,power_consumption,year,semester,quarter,week_in_year,day_in_year,month,day_in_week_Fri,day_in_week_Mon,day_in_week_Sat,day_in_week_Sun,day_in_week_Thu,day_in_week_Tue,day_in_week_Wed
0,2006-12-16,1209.176,2006,2,4,50,350,12,False,False,True,False,False,False,False
1,2006-12-17,3390.46,2006,2,4,51,351,12,False,False,False,True,False,False,False
2,2006-12-18,2203.826,2006,2,4,51,352,12,False,True,False,False,False,False,False
3,2006-12-19,1666.194,2006,2,4,51,353,12,False,False,False,False,False,True,False
4,2006-12-20,2225.748,2006,2,4,51,354,12,False,False,False,False,False,False,True


In [22]:
df_train_one_hot.shape

(1202, 15)

In [24]:
df_train_one_hot.describe()

Unnamed: 0,date,power_consumption,year,semester,quarter,week_in_year,day_in_year,month
count,1202,1202.0,1202.0,1202.0,1202.0,1202.0,1202.0,1202.0
mean,2008-08-07 12:00:00,1592.959253,2008.123128,1.472546,2.415141,25.455907,175.1797,6.25624
min,2006-12-16 00:00:00,0.0,2006.0,1.0,1.0,1.0,1.0,1.0
25%,2007-10-12 06:00:00,1179.8415,2007.0,1.0,1.0,11.0,76.0,3.0
50%,2008-08-07 12:00:00,1583.971,2008.0,1.0,2.0,25.0,171.0,6.0
75%,2009-06-03 18:00:00,1952.9515,2009.0,2.0,3.0,39.0,271.0,9.0
max,2010-03-31 00:00:00,4773.386,2010.0,2.0,4.0,53.0,366.0,12.0
std,,632.497568,0.972467,0.499453,1.153442,15.623968,109.371519,3.573317


In [26]:
df_train_one_hot.isnull().sum()

date                 0
power_consumption    0
year                 0
semester             0
quarter              0
week_in_year         0
day_in_year          0
month                0
day_in_week_Fri      0
day_in_week_Mon      0
day_in_week_Sat      0
day_in_week_Sun      0
day_in_week_Thu      0
day_in_week_Tue      0
day_in_week_Wed      0
dtype: int64

In [28]:
df_test_one_hot.isnull().sum()

date                 0
power_consumption    0
year                 0
semester             0
quarter              0
week_in_year         0
day_in_year          0
month                0
day_in_week_Fri      0
day_in_week_Mon      0
day_in_week_Sat      0
day_in_week_Sun      0
day_in_week_Thu      0
day_in_week_Tue      0
day_in_week_Wed      0
dtype: int64

## Prepare training and testing data for modeling

In [31]:
# Define features and target
X_train = df_train_one_hot.drop(['date', 'power_consumption'], axis=1)
y_train = df_train_one_hot['power_consumption']

X_test = df_train_one_hot.drop(['date', 'power_consumption'], axis=1)
y_test = df_train_one_hot['power_consumption']

X_train

Unnamed: 0,year,semester,quarter,week_in_year,day_in_year,month,day_in_week_Fri,day_in_week_Mon,day_in_week_Sat,day_in_week_Sun,day_in_week_Thu,day_in_week_Tue,day_in_week_Wed
0,2006,2,4,50,350,12,False,False,True,False,False,False,False
1,2006,2,4,51,351,12,False,False,False,True,False,False,False
2,2006,2,4,51,352,12,False,True,False,False,False,False,False
3,2006,2,4,51,353,12,False,False,False,False,False,True,False
4,2006,2,4,51,354,12,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1197,2010,1,1,13,86,3,False,False,True,False,False,False,False
1198,2010,1,1,13,87,3,False,False,False,True,False,False,False
1199,2010,1,1,13,88,3,False,True,False,False,False,False,False
1200,2010,1,1,13,89,3,False,False,False,False,False,True,False


In [49]:
X_test

Unnamed: 0,year,semester,quarter,week_in_year,day_in_year,month,day_in_week_Fri,day_in_week_Mon,day_in_week_Sat,day_in_week_Sun,day_in_week_Thu,day_in_week_Tue,day_in_week_Wed
0,2006,2,4,50,350,12,False,False,True,False,False,False,False
1,2006,2,4,51,351,12,False,False,False,True,False,False,False
2,2006,2,4,51,352,12,False,True,False,False,False,False,False
3,2006,2,4,51,353,12,False,False,False,False,False,True,False
4,2006,2,4,51,354,12,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1197,2010,1,1,13,86,3,False,False,True,False,False,False,False
1198,2010,1,1,13,87,3,False,False,False,True,False,False,False
1199,2010,1,1,13,88,3,False,True,False,False,False,False,False
1200,2010,1,1,13,89,3,False,False,False,False,False,True,False


In [53]:
X_val, X_test, y_val, y_test = train_test_split(X_train, y_train, test_size=0.3, random_state=30)

## Normalizing the data

In [57]:
scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)
X_val_norm = scaler.transform(X_val)
X_test_norm = scaler.transform(X_test)

## Train the Model

In [258]:
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.initializers import glorot_uniform

# Define the model
model = Sequential()

# Add the input layer with specified shape
model.add(Input(shape=(X_train_norm.shape[1],)))

# Add Dense layers with Xavier initialization
model.add(Dense(100, activation='relu', kernel_initializer=glorot_uniform()))
model.add(Dense(50, activation='relu', kernel_initializer=glorot_uniform()))
model.add(Dense(15, activation='relu', kernel_initializer=glorot_uniform()))

# Output layer (typically doesn't require Xavier initialization)
model.add(Dense(1, activation='linear', kernel_initializer=glorot_uniform()))

# Print model summary
model.summary()


In [260]:
# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mse'])

# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [262]:
# Ensure all data types are correct
X_train_norm = X_train_norm.astype(float)
X_val_morm = X_val_norm.astype(float)
X_test_morm = X_test_norm.astype(float)
y_train = y_train.astype(float)
y_val = y_val.astype(float)
y_test = y_test.astype(float)


In [278]:
history = model.fit(X_train_norm, y_train, epochs=5000, verbose=1, validation_data=(X_val_norm, y_val),batch_size=1)

Epoch 1/5000
[1m1202/1202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 223348.9688 - mse: 223348.9688 - val_loss: 207026.9688 - val_mse: 207026.9688
Epoch 2/5000
[1m1202/1202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 208957.2344 - mse: 208957.2344 - val_loss: 220940.7500 - val_mse: 220940.7500
Epoch 3/5000
[1m1202/1202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 195158.2969 - mse: 195158.2969 - val_loss: 207287.7812 - val_mse: 207287.7812
Epoch 4/5000
[1m1202/1202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 225520.7812 - mse: 225520.7812 - val_loss: 211902.3281 - val_mse: 211902.3281
Epoch 5/5000
[1m1202/1202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 206029.6719 - mse: 206029.6719 - val_loss: 207572.2969 - val_mse: 207572.2969
Epoch 6/5000
[1m1202/1202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 198500.0781 - mse: 198

KeyboardInterrupt: 

In [285]:
# Evaluate the model
loss, mae = model.evaluate(X_test_norm, y_test)
print(f"Mean Absolute Error: {mae}")

plt.figure(figsize=(14, 7))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

AttributeError: 'LinearRegression' object has no attribute 'evaluate'

In [287]:
# Predict the Concrete Compressive Strength for the test dataset 
y_pred = model.predict(X_test_norm)


# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# R-squared
r2 = r2_score(y_test, y_pred)



In [289]:
r2

-3178.610680606098