In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import seaborn as sns

In [7]:
#data2=pd.read_csv('smart_home_dataset.csv')
url = "https://huggingface.co/datasets/panda04/smart-home-dataset/raw/main/smart_home_dataset.csv"
data2 = pd.read_csv(url)
data2.head(10)

Unnamed: 0,Unix Timestamp,Transaction_ID,Television,Dryer,Oven,Refrigerator,Microwave,Line Voltage,Voltage,Apparent Power,Energy Consumption (kWh),Month,Day of the Week,Hour of the Day,Offloading Decision
0,1577836800,1,0,0,0,1,0,237,233,1559,24.001763,January,Wednesday,0,Local
1,1577839322,2,0,1,0,0,1,232,230,1970,31.225154,January,Wednesday,0,Remote
2,1577841845,3,0,1,0,0,0,223,222,1684,70.4607,January,Wednesday,1,Remote
3,1577844368,4,1,0,1,1,0,225,224,1694,32.264043,January,Wednesday,2,Remote
4,1577846891,5,1,0,0,1,0,222,214,1889,32.728111,January,Wednesday,2,Local
5,1577849414,6,0,1,0,1,0,235,227,1503,63.93842,January,Wednesday,3,Local
6,1577851937,7,0,1,0,0,1,237,237,1739,87.59289,January,Wednesday,4,Local
7,1577854460,8,0,1,1,1,0,235,227,1996,37.796262,January,Wednesday,4,Local
8,1577856983,9,0,0,1,1,0,238,235,1549,74.55393,January,Wednesday,5,Local
9,1577859506,10,0,0,0,1,1,231,230,1771,48.763756,January,Wednesday,6,Remote


# Encoding

In [8]:
data2=data2.drop(['Transaction_ID', 'Unix Timestamp'], axis=1)

In [9]:
data2['is_peak_hour'] = data2['Hour of the Day'].apply(lambda x: 1 if (6 <= x <= 9) or (18 <= x <= 21) else 0)

In [10]:
data2['total_appliance_usage'] = data2[['Television', 'Dryer', 'Oven', 'Refrigerator', 'Microwave']].sum(axis=1)

In [11]:
consumption_threshold = data2['Energy Consumption (kWh)'].quantile(0.75)
data2['is_high_consumption'] = data2['Energy Consumption (kWh)'].apply(lambda x: 1 if x > consumption_threshold else 0)

In [12]:
data2['part_of_day'] = pd.cut(data2['Hour of the Day'], 
                              bins=[0, 6, 12, 18, 24],
                              labels=['night', 'morning', 'afternoon', 'evening'])

In [13]:
data2['is_weekend'] = data2['Day of the Week'].apply(lambda x: 1 if x in ['Saturday', 'Sunday'] else 0)

In [14]:
season_dict = {
    'December': 'Winter', 'January': 'Winter', 'February': 'Winter',
    'March': 'Spring', 'April': 'Spring', 'May': 'Spring',
    'June': 'Summer', 'July': 'Summer', 'August': 'Summer',
    'September': 'Fall', 'October': 'Fall', 'November': 'Fall'
}
data2['Season'] = data2['Month'].map(season_dict)

In [15]:
data2['hour_sin'] = np.sin(2 * np.pi * data2['Hour of the Day'] / 24)
data2['hour_cos'] = np.cos(2 * np.pi * data2['Hour of the Day'] / 24)

In [16]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
features_to_scale = ['Line Voltage', 'Voltage', 'Apparent Power', 'Energy Consumption (kWh)']
data2[features_to_scale] = scaler.fit_transform(data2[features_to_scale])

In [17]:
appliances = ['Television', 'Dryer', 'Oven', 'Refrigerator', 'Microwave']
for appliance in appliances:
    data2[f'{appliance}_efficiency_ratio'] = data2[appliance] / (data2['Energy Consumption (kWh)'] + 1e-6)  # Prevent division by zero

data2['power_factor'] = data2['Apparent Power'] / (data2['Line Voltage'] * data2['Voltage'] + 1e-6)

data2['active_appliances'] = data2[appliances].sum(axis=1)
data2['energy_per_active_appliance'] = data2['Energy Consumption (kWh)'] / (data2['active_appliances'] + 1e-6)

In [18]:
data2.columns

Index(['Television', 'Dryer', 'Oven', 'Refrigerator', 'Microwave',
       'Line Voltage', 'Voltage', 'Apparent Power', 'Energy Consumption (kWh)',
       'Month', 'Day of the Week', 'Hour of the Day', 'Offloading Decision',
       'is_peak_hour', 'total_appliance_usage', 'is_high_consumption',
       'part_of_day', 'is_weekend', 'Season', 'hour_sin', 'hour_cos',
       'Television_efficiency_ratio', 'Dryer_efficiency_ratio',
       'Oven_efficiency_ratio', 'Refrigerator_efficiency_ratio',
       'Microwave_efficiency_ratio', 'power_factor', 'active_appliances',
       'energy_per_active_appliance'],
      dtype='object')