In [2]:
import pandas as pd

file_path = "E:\citysense360\data\energy\household_power_consumption.txt"

df = pd.read_csv(
    file_path,
    sep=";",              # IMPORTANT
    low_memory=False,
    na_values="?"         # Handle missing values
)

df.head()


Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
0,16/12/2006,17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
1,16/12/2006,17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0
2,16/12/2006,17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
3,16/12/2006,17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
4,16/12/2006,17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0


In [4]:
df["datetime"] = pd.to_datetime(
    df["Date"] + " " + df["Time"],
    format="%d/%m/%Y %H:%M:%S"
)
df = df.sort_values("datetime")
df.head()

Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,datetime
0,16/12/2006,17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0,2006-12-16 17:24:00
1,16/12/2006,17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0,2006-12-16 17:25:00
2,16/12/2006,17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0,2006-12-16 17:26:00
3,16/12/2006,17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0,2006-12-16 17:27:00
4,16/12/2006,17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0,2006-12-16 17:28:00


In [5]:
df = df[["datetime", "Global_active_power"]]
df = df.dropna()


In [6]:
df.rename(columns={"Global_active_power": "power"}, inplace=True)


In [7]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[["power"]])


In [8]:
def create_sequences(data, window=24):
    X, y = [], []
    for i in range(len(data) - window):
        X.append(data[i:i+window])
        y.append(data[i+window])
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_data, window=24)
print(X.shape, y.shape)


(2049256, 24, 1) (2049256, 1)


In [9]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout





In [11]:
model = Sequential([
    GRU(64, return_sequences=True, input_shape=(24, 1)),
    Dropout(0.2),
    GRU(32),
    Dense(1)
])
model.compile(
    optimizer="adam",
    loss="mse",
    metrics=["mae"]
)






In [13]:
history = model.fit(
    X_train, y_train,
    epochs=2,
    batch_size=32,
    validation_split=0.1,
    verbose=1
)


Epoch 1/2
Epoch 2/2


In [14]:
loss, mae = model.evaluate(X_test, y_test)
print("Test MAE:", mae)


Test MAE: 0.007379232905805111


In [16]:
import joblib
model.save("E:\citysense360\models\energy_gru_model.h5")
joblib.dump(scaler, "E:\citysense360\models\energy_scaler.pkl")



  saving_api.save_model(


['E:\\citysense360\\models\\energy_scaler.pkl']

In [18]:
last_window = scaled_data[-24:].reshape(1, 24, 1)

scaled_prediction = model.predict(last_window)

predicted_power = scaler.inverse_transform(scaled_prediction)

print("Predicted energy usage (kW):", predicted_power[0][0])


Predicted energy usage (kW): 0.923737
