In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler

In [2]:
cols = ["Relative Compactness", "Surface Area", "Wall Area", "Roof Area", "Overall Height","Orientation", "Glazing Area", "Glazing Area Distribution", "Heating Load", "Cooling Load"]
dataset = pd.read_excel("energy_efficient.xlsx", names=cols)
dataset

Unnamed: 0,Relative Compactness,Surface Area,Wall Area,Roof Area,Overall Height,Orientation,Glazing Area,Glazing Area Distribution,Heating Load,Cooling Load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.90,563.5,318.5,122.50,7.0,2,0.0,0,20.84,28.28
...,...,...,...,...,...,...,...,...,...,...
763,0.64,784.0,343.0,220.50,3.5,5,0.4,5,17.88,21.40
764,0.62,808.5,367.5,220.50,3.5,2,0.4,5,16.54,16.88
765,0.62,808.5,367.5,220.50,3.5,3,0.4,5,16.44,17.11
766,0.62,808.5,367.5,220.50,3.5,4,0.4,5,16.48,16.61


In [3]:
threshold_heating = dataset["Heating Load"].median()
dataset["Heating Load"] = (dataset["Heating Load"] <= threshold_heating).astype(int)
threshold_cooling = dataset["Cooling Load"].median()
dataset["Cooling Load"] = (dataset["Cooling Load"] <= threshold_cooling).astype(int)

In [4]:
dataset_heating = dataset.iloc[:, :-1]
dataset_heating

Unnamed: 0,Relative Compactness,Surface Area,Wall Area,Roof Area,Overall Height,Orientation,Glazing Area,Glazing Area Distribution,Heating Load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,1
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,1
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,1
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,1
4,0.90,563.5,318.5,122.50,7.0,2,0.0,0,0
...,...,...,...,...,...,...,...,...,...
763,0.64,784.0,343.0,220.50,3.5,5,0.4,5,1
764,0.62,808.5,367.5,220.50,3.5,2,0.4,5,1
765,0.62,808.5,367.5,220.50,3.5,3,0.4,5,1
766,0.62,808.5,367.5,220.50,3.5,4,0.4,5,1


In [5]:
train, test = train_test_split(dataset_heating, test_size=0.2,  random_state=42)

In [6]:
# for label in cols[:-2]:
#     plt.hist(dataset[dataset["Heating Load"] == 1][label], color="blue", label="high", alpha=0.7, density=True)
#     plt.hist(dataset[dataset["Heating Load"] == 0][label], color="red", label="low", alpha=0.7, density=True)
#     plt.title(label)
#     plt.ylabel("Heating Load")
#     plt.xlabel(label)
#     plt.legend()
#     plt.show()

In [7]:
def scale_dataset(dataframe, oversample=False):
    X = dataframe.iloc[:, :-1].values
    y = dataframe.iloc[:, -1].values

    scaler = StandardScaler()
    X_scale = scaler.fit_transform(X)

    if oversample:
        ros = RandomOverSampler()
        X, y = ros.fit_resample(X, y)

    data = np.hstack((X , np.reshape(y, (-1, 1))))

    return data, X , y

In [8]:
train, X_train, y_train = scale_dataset(train, oversample=True)
test, X_test, y_test = scale_dataset(test, oversample=False)

In [9]:
len(X_train), len(X_test)

(626, 154)

In [10]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [11]:
knn_model = KNeighborsClassifier(n_neighbors=1)
knn_model.fit(X_train, y_train)

In [12]:
y_pred = knn_model.predict(X_test)

In [13]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.96      0.98        83
           1       0.96      1.00      0.98        71

    accuracy                           0.98       154
   macro avg       0.98      0.98      0.98       154
weighted avg       0.98      0.98      0.98       154



In [14]:
from sklearn.naive_bayes import GaussianNB 

In [15]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

In [16]:
y_pred = nb_model.predict(X_test)

In [17]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.98      0.98        83
           1       0.97      0.99      0.98        71

    accuracy                           0.98       154
   macro avg       0.98      0.98      0.98       154
weighted avg       0.98      0.98      0.98       154



In [18]:
from sklearn.linear_model import LogisticRegression

In [None]:
lg_model = LogisticRegression()
lg_model.fit(X_train, y_train)

In [20]:
y_pred = lg_model.predict(X_test)

In [21]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.98      0.98        83
           1       0.97      0.99      0.98        71

    accuracy                           0.98       154
   macro avg       0.98      0.98      0.98       154
weighted avg       0.98      0.98      0.98       154

