In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

num_houses = 5

start_date = datetime(2015, 1, 1)
end_date = datetime(2023, 12, 31)

timestamps = pd.date_range(start=start_date, end=end_date, freq='H')

dataset = pd.DataFrame(index=timestamps)

for house in range(1, num_houses + 1):
    energy_consumption = np.zeros(len(timestamps))

    for i, timestamp in enumerate(timestamps):
        hour = timestamp.hour

        if 6 <= hour <= 10 or 17 <= hour <= 22:
            energy_consumption[i] += np.random.normal(loc=0.4, scale=0.1)
        else:
            energy_consumption[i] += np.random.normal(loc=0.2, scale=0.1)

        month = timestamp.month
        if 3 <= month <= 6:  # Summer months (March to June)
            energy_consumption[i] *= np.random.uniform(1.4, 1.8)
        elif 10 <= month <= 12:  # Post-monsoon months (October to December)
            energy_consumption[i] *= np.random.uniform(1.1, 1.3)

        if house == 1:
            energy_consumption[i] *= np.random.uniform(1.2, 1.5)
        elif house == 2:
            energy_consumption[i] *= np.random.uniform(0.8, 1.2)
        elif house == 3:
            energy_consumption[i] *= np.random.uniform(1.0, 1.3)
        elif house == 4:
            energy_consumption[i] *= np.random.uniform(0.7, 1.9)
        elif house == 5:
            energy_consumption[i] *= np.random.uniform(1.5, 2.3)

        energy_consumption[i] *= 0.347

        energy_consumption[i] = round(energy_consumption[i], 3)

    renewable_energy_consumption = energy_consumption * np.where((6 <= timestamps.hour) & (timestamps.hour <= 18),
                                                                 np.random.uniform(0.6, 1.2),
                                                                 np.random.uniform(0.2, 0.6))

    renewable_energy_consumption = np.minimum(renewable_energy_consumption, energy_consumption)

    renewable_energy_consumption = np.round(renewable_energy_consumption, 3)

    dataset[f"Energy_Consumption_House_{house}"] = energy_consumption
    dataset[f"Renewable_Energy_Consumption_House_{house}"] = renewable_energy_consumption


dataset["Hour_of_the_Day"] = dataset.index.hour
dataset["Day_of_the_Week"] = dataset.index.dayofweek
dataset["Month"] = dataset.index.month

temperature_day = np.round(np.random.uniform(low=25, high=37, size=len(timestamps)), 1)
temperature_night = np.round(np.random.uniform(low=20, high=25, size=len(timestamps)), 1)
temperature = np.where((6 <= dataset["Hour_of_the_Day"]) & (dataset["Hour_of_the_Day"] <= 18),
                      temperature_day, temperature_night)

dataset["Temperature"] = temperature

dataset.loc[dataset["Day_of_the_Week"].isin([5, 6]), [f"Energy_Consumption_House_{house}" for house in range(1, num_houses + 1)]] *= 1.2

dataset = np.absolute(dataset)
dataset["Date"] = dataset.index.date

dataset.to_csv("usage_data_non_neg.csv", index_label="Timestamp")


In [None]:
data = pd.read_csv('usage_data_non_neg.csv')

summer_increase = 5  # Increase temperature during summer
rainy_winter_decrease = 3  # Decrease temperature during rainy and winter months
other_seasons_decrease = 2  # Decrease temperature during other seasons

data.loc[data['Month'].isin([3, 4, 5, 6]), 'Temperature'] += summer_increase
data.loc[data['Month'].isin([7, 8, 9,10]), 'Temperature'] -= rainy_winter_decrease
data.loc[data['Month'].isin([11,12,1,2]), 'Temperature'] -= other_seasons_decrease


In [None]:
temperature_threshold = 30

data.loc[data["Temperature"] > temperature_threshold, ['Energy_Consumption_House_1', 'Energy_Consumption_House_3', 'Energy_Consumption_House_4', 'Energy_Consumption_House_5']] *= 1.2

data.loc[data["Temperature"] > temperature_threshold, ['Renewable_Energy_Consumption_House_1', 'Renewable_Energy_Consumption_House_3', 'Renewable_Energy_Consumption_House_4', 'Renewable_Energy_Consumption_House_5']] += 0.09

energy_columns = ['Energy_Consumption_House_1', 'Energy_Consumption_House_3', 'Energy_Consumption_House_4', 'Energy_Consumption_House_5']
renewable_columns = ['Renewable_Energy_Consumption_House_1', 'Renewable_Energy_Consumption_House_3', 'Renewable_Energy_Consumption_House_4', 'Renewable_Energy_Consumption_House_5']


for energy_col, renewable_col in zip(energy_columns, renewable_columns):
    data[renewable_col] = np.minimum(data[renewable_col], data[energy_col])


In [None]:
data.to_csv('final_energy_consumption_dataset.csv',index=False)

In [None]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('final_energy_consumption_dataset.csv')

columns_to_drop = ['Timestamp',"Date"]
data = data.drop(columns=columns_to_drop)

X = data.drop(columns=energy_columns+renewable_columns)
y = data[['Energy_Consumption_House_1', 'Renewable_Energy_Consumption_House_1']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, r)

xgb_model = xgb.XGBRegressor()

model = MultiOutputRegressor(xgb_model)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('Mean Squared Error:', mse)
print('R-squared Score:', r2)
