In [1]:
import os
import csv
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [2]:
data_folder = '../dataset/vectors/'
#changed files (original one changed) #here for concept
paths, coolings, heatings = [], [], []
with open('all.csv', 'r') as reader:
    for line in list(reader)[1:1470]:
        sample = line.split(';')[1:]
        paths.append('/'.join(sample[0].split('/')))
        coolings.append(int(sample[2]))
        heatings.append(int(sample[3][:-2]))
heatings = np.array(heatings)
coolings = np.array(coolings)
num_samples = len(paths)
print(num_samples)
#All paths
paths_all = []
with open('all.csv', 'r') as reader:
    for line in list(reader)[1:]:
        sample = line.split(';')[1:]
        paths_all.append('/'.join(sample[0].split('/')))

num_samples = len(paths_all)
print(num_samples)

1469
101002


In [3]:
room_types = ['living_room', 'kitchen', 'bedroom',
    'bathroom', 'restroom', 'washing_room',
    'office', 'closet', 'balcony',
    'corridor', 'dining_room','laundry_room',
    'PS']


In [4]:
def extract_file(file) :
    area = 0
    door_length, wall_length = 0, 0
    nb_door, nb_wall = 0,0
    max_number = 0
    with open(file,'r') as reader:
            lines_wall = []
            lines_door = []
            for i, line in enumerate(reader): 
                line_main = line.split()
                line = [int(float(x)) for x in line_main[:4]]
                for coord in line :
                    if coord > max_number :
                        max_number = coord
                if 'door' in line_main :
                    door_length += (abs(line[0] - line[2]) + abs(line[1]-line[3]))
                    nb_door+=1
                elif 'wall' in line_main :
                    wall_length += abs(line[0] - line[2])+ abs(line[1]-line[3])
                    nb_wall+=1
                elif line_main[4] in room_types:
                    area += (abs(line[0] - line[2]) * abs(line[1]-line[3]))

    return max_number, door_length, wall_length, area, nb_door, nb_wall

In [5]:
with open('energy_main__.csv', 'w') as f :
    writer = csv.writer(f, delimiter=';')
    writer.writerow(['max_number', 'door_length', 'wall_length', 'area','nb_door','nb_wall' ,'cooling', 'heating'])
    for index, path in enumerate(paths) :
        file = os.listdir(os.path.join(data_folder, path))[0]
        max_number, door_length, wall_length, area, nb_door, nb_wall = extract_file(os.path.join(data_folder, path, file))
        writer.writerow([max_number, door_length, wall_length, area, nb_door, nb_wall, coolings[index], heatings[index]])

In [6]:
def process(path_data, training=True, MinMax=None):

    x_columns = ['max_number', 'door_length', 'wall_length', 'area','nb_door','nb_wall']
    #x_columns = ['volume', 'nb_people', 'wall_area', 'door_area','roof_area', 'space']
    y1_column = ['cooling']
    y2_column = ['heating']
    
    data = pd.read_csv(path_data, delimiter =";")
    
    print(data)
    X = data[x_columns]
    Y1 = data[y1_column]
    Y2 = data[y2_column]
    if training :
        """ Splitting """
        X_train_div, X_test_div, y1_train, y1_test = train_test_split(
            X, Y1, random_state=5, test_size=0.2)
        X_train_div, X_test_div, y2_train, y2_test = train_test_split(
            X, Y2, random_state=5, test_size=0.2)

        """ Scaling """
        MinMax = MinMaxScaler(feature_range=(0, 1))
        X_train_div = MinMax.fit_transform(X_train_div)
        X_test_div = MinMax.transform(X_test_div)
        #return X_train_div, X_test_div, y1_train.to_numpy(), y1_test.to_numpy(), y2_train.to_numpy(), y2_test.to_numpy(), MinMax
        return X_train_div, X_test_div, y1_train, y1_test, y2_train, y2_test, MinMax, data

    """ Scaling with previous scaler - If test """

    X = MinMax.transform(X)
    return X, data


In [7]:
path_data = 'energy_main__.csv'
X_train_div, X_test_div, y1_train, y1_test, y2_train, y2_test, minmax, data = process(
    path_data, training=True)

      max_number  door_length  wall_length   area  nb_door  nb_wall  cooling  \
0            454          865         3338  18000       22       59  1558596   
1            616          487         2476   6000        6       21    52084   
2           1869         1850         6927   6000        8       21    53471   
3            437          418         1899   9940        9       33   104082   
4           1092         1158         5793   6000        7       22    53315   
...          ...          ...          ...    ...      ...      ...      ...   
1464         418          490         2196   7000       10       28    63586   
1465         329          188          993   5000        7       19    44009   
1466        1035          752         4560  10000        8       30   113349   
1467         632          566         2851   4000        4       13    37302   
1468         473          930         2700   9000       11       30    95234   

      heating  
0       10819  
1      

In [8]:
#Random Forest
param_grid = {'max_features': ['auto', 'log2'],
              'max_depth': [100]}  # 10,15,20,30,50,60, also possible change n_estimators
# model
model = RandomForestRegressor(random_state=5, n_estimators=2000, n_jobs=-1)



In [9]:
#XGB
params = {'max_depth':10 , 'min_samples_split': 2,
          'learning_rate': 0.0001, 'loss': 'ls'}
model = GradientBoostingRegressor(n_estimators = 100)

In [10]:
""" Cooling """
grid_search_rf = MultiOutputRegressor(GridSearchCV(
    model, param_grid, cv=5, return_train_score=True, verbose=2))
grid_search_rf.fit(X_train_div, y1_train)

print('The Train R2 score for cooling load is', r2_score(
    y1_train, grid_search_rf.predict(X_train_div)))
print('The Test R2 score for cooling load is', r2_score(
    y1_test, grid_search_rf.predict(X_test_div)))

""" Heating """
grid_search_rf2 = MultiOutputRegressor(GridSearchCV(
    model, param_grid, cv=5, return_train_score=True, verbose=2))
grid_search_rf2.fit(X_train_div, y2_train)

print('The Train R2 score for heating load is', r2_score(
    y2_train, grid_search_rf2.predict(X_train_div)))
print('The Test R2 score for heating load is', r2_score(
    y2_test, grid_search_rf2.predict(X_test_div)))


Fitting 5 folds for each of 2 candidates, totalling 10 fits
[CV] max_depth=100, max_features=auto ................................
[CV] ................. max_depth=100, max_features=auto, total=   0.4s
[CV] max_depth=100, max_features=auto ................................
[CV] ................. max_depth=100, max_features=auto, total=   0.3s
[CV] max_depth=100, max_features=auto ................................
[CV] ................. max_depth=100, max_features=auto, total=   0.3s
[CV] max_depth=100, max_features=auto ................................
[CV] ................. max_depth=100, max_features=auto, total=   0.3s
[CV] max_depth=100, max_features=auto ................................
[CV] ................. max_depth=100, max_features=auto, total=   0.3s
[CV] max_depth=100, max_features=log2 ................................
[CV] ................. max_depth=100, max_features=log2, total=   0.1s
[CV] max_depth=100, max_features=log2 ................................
[CV] ............

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    2.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    2.4s finished


In [11]:
### TEST ###
PATH = '../dataset/vectors/'

paths_test, coolings_test, heatings_test = [], [], []

print('Test data loading...')

for path_1 in os.listdir(PATH):
    if path_1 == '_DS_Store' or path_1 == '.DS_Store':
        continue
    PATH_1 = os.path.join(PATH, path_1)
    for path_2 in os.listdir(PATH_1):
        if path_2 == '.DS_Store':
            continue
        PATH_2 = os.path.join(PATH_1, path_2)
        for path_3 in os.listdir(PATH_2):
            if path_3 == '.DS_Store':
                continue
            PATH_3 = os.path.join(PATH_2, path_3)
            txt_path = os.listdir(PATH_3)[0]
            txt_path = os.path.join(PATH_3, txt_path)
            paths_test.append(txt_path)
print(len(paths_test))

Test data loading...
101046


In [34]:
### TEST ###
import glob
PATH = '../outputs/04-29_18-55-48/rtv/'

paths_test, coolings_test, heatings_test = [], [], []

print('Test data loading...')
#paths_test = glob.glob(PATH + '/*_sum.txt')
paths_test = ['../outputs/06-06_18-53-00/rtv/sample_2_gap_7_dist_5_length_5_wall_0.01_floorplan.txt']
print(paths_test)
print(len(paths_test))

Test data loading...
['../outputs/06-06_18-53-00/rtv/sample_2_gap_7_dist_5_length_5_wall_0.01_floorplan.txt']
1


In [35]:
with open('features_output.csv', 'w') as f :
    writer = csv.writer(f, delimiter=';')
    writer.writerow(['paths','max_number', 'door_length', 'wall_length', 'area','nb_door','nb_wall' ,'cooling', 'heating'])
    for index, path in enumerate(paths_test) :
        max_number, door_length, wall_length, area, nb_door, nb_wall = extract_file(path)
        writer.writerow([path[19:-9],max_number, door_length, wall_length, area, nb_door, nb_wall])

In [65]:
path_data_prediction = 'features_output.csv'
# MinMax to scale prediction data
X, data_test = process(path_data_prediction, training=False, MinMax=minmax)

                                               paths  max_number  door_length  \
0  -53-00/rtv/sample_2_gap_7_dist_5_length_5_wall...         238          127   

   wall_length  area  nb_door  nb_wall  cooling  heating  
0         1112     0        4       22      NaN      NaN  


In [83]:
cooling_pred = grid_search_rf.predict(X)
prediction_cooling = pd.DataFrame(
    cooling_pred, columns=['Cooling_Load'])

heating_pred = grid_search_rf2.predict(X)
prediction_heating = pd.DataFrame(
    heating_pred, columns=['Heating_Load'])

print(cooling_pred,heating_pred)

[[20688.41451129]] [[199.50701566]]


In [56]:
path_dfs = data_test['paths']
full = pd.concat([path_dfs, prediction_cooling.apply(np.int64), prediction_heating.apply(np.int64)], axis = 1).to_csv('energy_output.csv')

In [82]:
X[0,3] = X_train_div[idx_min][3]

In [60]:
X_train_div.shape

(1175, 6)

In [68]:
idx_min = np.argmin(np.linalg.norm(X_train_div-X,ord=2,axis=1))

In [69]:
X_train_div[idx_min]

array([0.08794248, 0.0436747 , 0.05054063, 0.03571429, 0.11111111,
       0.05050505])

In [70]:
X

array([[ 0.08130531,  0.03825301,  0.06880734, -0.03571429,  0.14814815,
         0.18181818]])

In [81]:
y1_train.to_numpy()[idx_min],y2_train.to_numpy()[idx_min]

(array([20664]), array([200]))

In [88]:
bla = y1_train.to_numpy()/y2_train.to_numpy()

(126.89058585539381, 9.24735854148215)