In [1]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
import datetime

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler

# regressors
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import BayesianRidge
from xgboost import XGBRegressor
from sklearn.svm import SVR

# classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import svm
import lightgbm as lgb

# for results
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import max_error
from sklearn.metrics import r2_score

pd.set_option("display.max_rows", 10)

In [2]:
# load data
division = 'm'  # s or m (s = small-sized meeting room, m = medium-sized seminar room)

# session 1
if division == 's':
    l = glob.glob(r'..\..\datasets\small-room\sess1\P=*.csv')
elif division == 'm':
    l = glob.glob(r'..\..\datasets\medium-room\sess1\P=*.csv')
l.sort()

df_fea_sess1 = []
for i in l:
    df_fea_sess1.append(pd.read_csv(i, header=None))

# session 2
if division == 's':
    l = glob.glob(r'..\..\datasets\small-room\sess2\P=*.csv')
elif division == 'm':
    l = glob.glob(r'..\..\datasets\medium-room\sess2\P=*.csv')
l.sort()

df_fea_sess2 = []
for i in l:
    df_fea_sess2.append(pd.read_csv(i, header=None))
    
# session 3
if division == 's':
    l = glob.glob(r'..\..\datasets\small-room\sess3\P=*.csv')
elif division == 'm':
    l = glob.glob(r'..\..\datasets\medium-room\sess3\P=*.csv')
l.sort()

df_fea_sess3 = []
for i in l:
    df_fea_sess3.append(pd.read_csv(i, header=None))

In [3]:
l

['..\\..\\datasets\\medium-room\\sess3\\P=0.csv',
 '..\\..\\datasets\\medium-room\\sess3\\P=1.csv',
 '..\\..\\datasets\\medium-room\\sess3\\P=10.csv',
 '..\\..\\datasets\\medium-room\\sess3\\P=2.csv',
 '..\\..\\datasets\\medium-room\\sess3\\P=3.csv',
 '..\\..\\datasets\\medium-room\\sess3\\P=4.csv',
 '..\\..\\datasets\\medium-room\\sess3\\P=5.csv',
 '..\\..\\datasets\\medium-room\\sess3\\P=6.csv',
 '..\\..\\datasets\\medium-room\\sess3\\P=7.csv',
 '..\\..\\datasets\\medium-room\\sess3\\P=8.csv',
 '..\\..\\datasets\\medium-room\\sess3\\P=9.csv']

In [4]:
# In case of medium-room, bring P=10 dataset to behind

if division == 'm':
    # session 1
    temp1 = df_fea_sess1[2]
    del df_fea_sess1[2]
    df_fea_sess1.append(temp1)

    # session 2
    temp2 = df_fea_sess2[2]
    del df_fea_sess2[2]
    df_fea_sess2.append(temp2)

    # session 3
    temp3 = df_fea_sess3[2]
    del df_fea_sess3[2]
    df_fea_sess3.append(temp3)
    
    print('done.')

done.


In [5]:
# create column label (feature name)
# l(N1)_xxx(N2) >> N1: link number, N2: subcarrier number.

nof_link = 4
nof_usedsubc = 13

col_label = []

for i in range(nof_link):
    
    for j in range(nof_usedsubc):
        col_label.append('l%d_std%d' %(i+1,j+1))
        col_label.append('l%d_min%d' %(i+1,j+1))
        col_label.append('l%d_max%d' %(i+1,j+1))
        col_label.append('l%d_qtl%d' %(i+1,j+1))
        col_label.append('l%d_qtu%d' %(i+1,j+1))
        col_label.append('l%d_avg%d' %(i+1,j+1))
        col_label.append('l%d_iqr%d' %(i+1,j+1))
        
    for j in range(nof_usedsubc-1):
        col_label.append('l%d_adj%d' %(i+1,j+1))
        
    col_label.append('l%d_euc' %(i+1))
    col_label.append('l%d_rss' %(i+1))

len(col_label)

420

In [6]:
# change column label

# sess1
for i in range(len(df_fea_sess1)):
    df_fea_sess1[i].columns = col_label
    
# sess2
for i in range(len(df_fea_sess2)):
    df_fea_sess2[i].columns = col_label
    
# sess3
for i in range(len(df_fea_sess3)):
    df_fea_sess3[i].columns = col_label

In [7]:
df_fea_sess1[0]

Unnamed: 0,l1_std1,l1_min1,l1_max1,l1_qtl1,l1_qtu1,l1_avg1,l1_iqr1,l1_std2,l1_min2,l1_max2,...,l4_adj5,l4_adj6,l4_adj7,l4_adj8,l4_adj9,l4_adj10,l4_adj11,l4_adj12,l4_euc,l4_rss
0,0.508360,0.556233,0.547783,0.554438,0.538969,0.528093,0.595343,0.578696,0.637909,0.631446,...,3.255593,2.824882,1.398378,1.429693,2.379176,2.900677,2.177376,1.711222,1.581755,0.514346
1,0.485241,0.579260,0.549776,0.535239,0.512745,0.543510,0.614083,0.611247,0.662962,0.590504,...,3.329217,2.945392,1.482271,1.529019,2.439519,2.971721,2.243851,1.673655,1.701424,0.472633
2,0.523973,0.615076,0.563608,0.577708,0.536653,0.565550,0.661444,0.643379,0.719187,0.600445,...,3.390512,3.144561,1.535579,1.655584,2.537666,3.056356,2.314811,1.652796,1.740957,0.570579
3,0.521867,0.585397,0.514375,0.554391,0.509185,0.557026,0.588465,0.606894,0.623689,0.568644,...,3.327382,3.027890,1.446881,1.501696,2.350605,3.039381,2.209710,1.695750,1.661805,0.603008
4,0.520318,0.607385,0.544917,0.574134,0.548106,0.562364,0.583918,0.614920,0.654618,0.599471,...,3.273700,2.818021,1.362363,1.330560,2.080632,2.943398,2.176994,1.660822,1.575143,0.435911
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,0.557364,0.595671,0.593804,0.563037,0.548741,0.606306,0.674505,0.647142,0.726399,0.593987,...,3.307183,2.801403,1.189689,1.282997,2.137156,2.988761,2.181092,1.526116,1.288780,0.725632
195,0.583639,0.645127,0.609895,0.606136,0.579689,0.601357,0.697142,0.642774,0.712564,0.646622,...,3.303011,2.668910,1.218117,1.258081,2.072770,2.904706,2.106952,1.512701,1.255239,0.702359
196,0.577348,0.635703,0.549147,0.604853,0.563156,0.571387,0.723391,0.605568,0.731006,0.609101,...,3.350651,2.741673,1.257348,1.404358,2.172586,2.934249,2.287955,1.567191,1.305473,0.795444
197,0.575806,0.561338,0.514700,0.622597,0.538869,0.555903,0.657174,0.621989,0.704929,0.615367,...,3.331872,2.715729,1.351905,1.310305,2.124697,2.953141,2.285298,1.664113,1.331436,0.663437


In [8]:
# Ground truth

for i in range(len(df_fea_sess1)):
    df_fea_sess1[i]['GT'] = i  # GT: ground truth
    
for i in range(len(df_fea_sess2)):
    df_fea_sess2[i]['GT'] = i  # GT: ground truth
    
for i in range(len(df_fea_sess3)):
    df_fea_sess3[i]['GT'] = i  # GT: ground truth

In [9]:
df_fea_sess1[3]

Unnamed: 0,l1_std1,l1_min1,l1_max1,l1_qtl1,l1_qtu1,l1_avg1,l1_iqr1,l1_std2,l1_min2,l1_max2,...,l4_adj6,l4_adj7,l4_adj8,l4_adj9,l4_adj10,l4_adj11,l4_adj12,l4_euc,l4_rss,GT
0,1.830603,1.873160,1.725519,1.594962,1.513311,1.417834,1.282464,1.052657,0.974200,0.959019,...,3.948070,1.619502,1.737586,3.056655,2.481858,3.216479,1.641629,2.833969,2.505928,3
1,1.653972,1.700725,1.442612,1.373765,1.360974,1.302505,1.178472,0.948451,0.962930,0.978861,...,2.887147,1.621722,1.422434,2.023871,2.392200,2.613357,1.874043,2.460489,2.458782,3
2,1.919865,1.950287,1.596515,1.595795,1.476098,1.285457,1.212172,1.040958,1.112712,1.029208,...,2.433325,1.457065,1.313497,1.668600,2.215635,2.213719,1.742178,2.388251,1.841882,3
3,2.215348,2.244203,1.977337,1.939716,1.737858,1.500368,1.274563,1.022749,1.153209,1.155699,...,3.231465,1.457090,1.636411,2.088249,2.016143,2.699968,1.494795,2.451414,2.619573,3
4,2.056869,2.179477,2.168995,2.066923,1.973254,1.829268,1.557790,1.185382,1.152784,1.106285,...,3.475257,1.680644,1.722947,2.527573,2.301586,2.812813,1.487229,2.523600,2.846565,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,0.910188,0.929432,0.820374,0.841888,0.818081,0.780074,0.837690,0.763892,0.778448,0.725079,...,2.053912,1.429570,1.451467,1.543837,1.651854,1.874679,1.503917,2.262251,1.437628,3
195,0.846394,0.832982,0.753228,0.761799,0.682601,0.734084,0.798138,0.713589,0.710684,0.708480,...,3.388962,1.819057,1.783210,2.688159,2.718980,3.509049,2.113260,2.388466,2.251772,3
196,0.799441,0.832479,0.716002,0.716839,0.647494,0.697313,0.753961,0.733368,0.756890,0.748847,...,4.509243,2.614164,2.072276,3.154163,3.881063,4.361751,2.650688,2.747130,2.187316,3
197,0.862368,0.973360,0.861439,0.845210,0.727936,0.765795,0.778770,0.725881,0.733296,0.758743,...,4.704748,3.276289,2.261142,2.710720,3.640153,3.909919,2.856965,3.130545,2.235205,3


In [10]:
df_sess1_tot = pd.DataFrame()
df_sess2_tot = pd.DataFrame()
df_sess3_tot = pd.DataFrame()

for i in df_fea_sess1:
    df_sess1_tot = pd.concat([df_sess1_tot, i], axis = 0)
    
for i in df_fea_sess2:
    df_sess2_tot = pd.concat([df_sess2_tot, i], axis = 0)
    
for i in df_fea_sess3:
    df_sess3_tot = pd.concat([df_sess3_tot, i], axis = 0)

In [11]:
df_sess1_tot

Unnamed: 0,l1_std1,l1_min1,l1_max1,l1_qtl1,l1_qtu1,l1_avg1,l1_iqr1,l1_std2,l1_min2,l1_max2,...,l4_adj6,l4_adj7,l4_adj8,l4_adj9,l4_adj10,l4_adj11,l4_adj12,l4_euc,l4_rss,GT
0,0.508360,0.556233,0.547783,0.554438,0.538969,0.528093,0.595343,0.578696,0.637909,0.631446,...,2.824882,1.398378,1.429693,2.379176,2.900677,2.177376,1.711222,1.581755,0.514346,0
1,0.485241,0.579260,0.549776,0.535239,0.512745,0.543510,0.614083,0.611247,0.662962,0.590504,...,2.945392,1.482271,1.529019,2.439519,2.971721,2.243851,1.673655,1.701424,0.472633,0
2,0.523973,0.615076,0.563608,0.577708,0.536653,0.565550,0.661444,0.643379,0.719187,0.600445,...,3.144561,1.535579,1.655584,2.537666,3.056356,2.314811,1.652796,1.740957,0.570579,0
3,0.521867,0.585397,0.514375,0.554391,0.509185,0.557026,0.588465,0.606894,0.623689,0.568644,...,3.027890,1.446881,1.501696,2.350605,3.039381,2.209710,1.695750,1.661805,0.603008,0
4,0.520318,0.607385,0.544917,0.574134,0.548106,0.562364,0.583918,0.614920,0.654618,0.599471,...,2.818021,1.362363,1.330560,2.080632,2.943398,2.176994,1.660822,1.575143,0.435911,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,0.917898,0.919850,0.856698,0.804004,0.727550,0.679315,0.650399,0.645671,0.707827,0.656676,...,4.454451,2.749956,2.607276,3.803033,2.618672,4.155710,1.962209,4.423469,3.771152,10
195,0.993171,1.055292,0.918394,0.886796,0.840524,0.783448,0.783273,0.697142,0.768202,0.650290,...,4.439517,3.112935,2.914972,3.582024,2.502279,4.593882,2.161841,4.788501,3.268520,10
196,0.913375,1.006806,1.072936,1.082991,1.013199,0.927840,0.883479,0.827988,0.818503,0.758421,...,4.656702,2.967989,2.856366,3.668633,2.768351,4.650457,2.228464,5.654283,3.655176,10
197,0.987489,1.093021,1.092624,1.122730,1.011742,0.943294,0.923651,0.845975,0.821050,0.787015,...,4.555711,2.941429,2.859348,3.872804,2.970375,4.116094,2.294561,5.577052,3.980606,10


In [12]:
# session number

df_sess1_tot['session'] = 1
df_sess2_tot['session'] = 2
df_sess3_tot['session'] = 3

In [13]:
df_sess1_tot

Unnamed: 0,l1_std1,l1_min1,l1_max1,l1_qtl1,l1_qtu1,l1_avg1,l1_iqr1,l1_std2,l1_min2,l1_max2,...,l4_adj7,l4_adj8,l4_adj9,l4_adj10,l4_adj11,l4_adj12,l4_euc,l4_rss,GT,session
0,0.508360,0.556233,0.547783,0.554438,0.538969,0.528093,0.595343,0.578696,0.637909,0.631446,...,1.398378,1.429693,2.379176,2.900677,2.177376,1.711222,1.581755,0.514346,0,1
1,0.485241,0.579260,0.549776,0.535239,0.512745,0.543510,0.614083,0.611247,0.662962,0.590504,...,1.482271,1.529019,2.439519,2.971721,2.243851,1.673655,1.701424,0.472633,0,1
2,0.523973,0.615076,0.563608,0.577708,0.536653,0.565550,0.661444,0.643379,0.719187,0.600445,...,1.535579,1.655584,2.537666,3.056356,2.314811,1.652796,1.740957,0.570579,0,1
3,0.521867,0.585397,0.514375,0.554391,0.509185,0.557026,0.588465,0.606894,0.623689,0.568644,...,1.446881,1.501696,2.350605,3.039381,2.209710,1.695750,1.661805,0.603008,0,1
4,0.520318,0.607385,0.544917,0.574134,0.548106,0.562364,0.583918,0.614920,0.654618,0.599471,...,1.362363,1.330560,2.080632,2.943398,2.176994,1.660822,1.575143,0.435911,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,0.917898,0.919850,0.856698,0.804004,0.727550,0.679315,0.650399,0.645671,0.707827,0.656676,...,2.749956,2.607276,3.803033,2.618672,4.155710,1.962209,4.423469,3.771152,10,1
195,0.993171,1.055292,0.918394,0.886796,0.840524,0.783448,0.783273,0.697142,0.768202,0.650290,...,3.112935,2.914972,3.582024,2.502279,4.593882,2.161841,4.788501,3.268520,10,1
196,0.913375,1.006806,1.072936,1.082991,1.013199,0.927840,0.883479,0.827988,0.818503,0.758421,...,2.967989,2.856366,3.668633,2.768351,4.650457,2.228464,5.654283,3.655176,10,1
197,0.987489,1.093021,1.092624,1.122730,1.011742,0.943294,0.923651,0.845975,0.821050,0.787015,...,2.941429,2.859348,3.872804,2.970375,4.116094,2.294561,5.577052,3.980606,10,1


In [14]:
df_sess_tot = pd.DataFrame()

df_sess_tot = pd.concat([df_sess_tot,df_sess1_tot], axis = 0)
df_sess_tot = pd.concat([df_sess_tot,df_sess2_tot], axis = 0)
df_sess_tot = pd.concat([df_sess_tot,df_sess3_tot], axis = 0)

df_sess_tot

Unnamed: 0,l1_std1,l1_min1,l1_max1,l1_qtl1,l1_qtu1,l1_avg1,l1_iqr1,l1_std2,l1_min2,l1_max2,...,l4_adj7,l4_adj8,l4_adj9,l4_adj10,l4_adj11,l4_adj12,l4_euc,l4_rss,GT,session
0,0.508360,0.556233,0.547783,0.554438,0.538969,0.528093,0.595343,0.578696,0.637909,0.631446,...,1.398378,1.429693,2.379176,2.900677,2.177376,1.711222,1.581755,0.514346,0,1
1,0.485241,0.579260,0.549776,0.535239,0.512745,0.543510,0.614083,0.611247,0.662962,0.590504,...,1.482271,1.529019,2.439519,2.971721,2.243851,1.673655,1.701424,0.472633,0,1
2,0.523973,0.615076,0.563608,0.577708,0.536653,0.565550,0.661444,0.643379,0.719187,0.600445,...,1.535579,1.655584,2.537666,3.056356,2.314811,1.652796,1.740957,0.570579,0,1
3,0.521867,0.585397,0.514375,0.554391,0.509185,0.557026,0.588465,0.606894,0.623689,0.568644,...,1.446881,1.501696,2.350605,3.039381,2.209710,1.695750,1.661805,0.603008,0,1
4,0.520318,0.607385,0.544917,0.574134,0.548106,0.562364,0.583918,0.614920,0.654618,0.599471,...,1.362363,1.330560,2.080632,2.943398,2.176994,1.660822,1.575143,0.435911,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,1.202281,1.290801,1.271125,1.305875,1.164755,1.178365,1.393361,1.390979,1.085079,1.281914,...,3.599491,3.233487,4.721437,3.916543,5.886878,4.088313,6.122421,3.924364,10,3
195,0.722978,0.720824,0.755536,0.760434,0.756769,0.859506,0.879017,0.809056,0.723456,0.669041,...,3.853352,3.410025,5.080214,4.169576,6.019021,3.874508,6.068462,3.071311,10,3
196,1.439172,1.433734,1.375746,1.396880,1.182754,1.100798,1.080205,0.930623,0.951749,0.980427,...,3.939145,3.469200,4.786500,3.846884,5.735223,3.777857,6.277089,3.256937,10,3
197,1.687076,1.663921,1.522277,1.425893,1.212331,1.112767,1.146947,1.055635,1.047915,0.989870,...,3.470630,3.189984,4.881475,3.785914,5.302870,3.445231,5.274994,3.496227,10,3


In [15]:
# row index reset

df_sess_tot = df_sess_tot.reset_index()
df_sess_tot = df_sess_tot.drop('index', axis = 1)

df_sess_tot

Unnamed: 0,l1_std1,l1_min1,l1_max1,l1_qtl1,l1_qtu1,l1_avg1,l1_iqr1,l1_std2,l1_min2,l1_max2,...,l4_adj7,l4_adj8,l4_adj9,l4_adj10,l4_adj11,l4_adj12,l4_euc,l4_rss,GT,session
0,0.508360,0.556233,0.547783,0.554438,0.538969,0.528093,0.595343,0.578696,0.637909,0.631446,...,1.398378,1.429693,2.379176,2.900677,2.177376,1.711222,1.581755,0.514346,0,1
1,0.485241,0.579260,0.549776,0.535239,0.512745,0.543510,0.614083,0.611247,0.662962,0.590504,...,1.482271,1.529019,2.439519,2.971721,2.243851,1.673655,1.701424,0.472633,0,1
2,0.523973,0.615076,0.563608,0.577708,0.536653,0.565550,0.661444,0.643379,0.719187,0.600445,...,1.535579,1.655584,2.537666,3.056356,2.314811,1.652796,1.740957,0.570579,0,1
3,0.521867,0.585397,0.514375,0.554391,0.509185,0.557026,0.588465,0.606894,0.623689,0.568644,...,1.446881,1.501696,2.350605,3.039381,2.209710,1.695750,1.661805,0.603008,0,1
4,0.520318,0.607385,0.544917,0.574134,0.548106,0.562364,0.583918,0.614920,0.654618,0.599471,...,1.362363,1.330560,2.080632,2.943398,2.176994,1.660822,1.575143,0.435911,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6562,1.202281,1.290801,1.271125,1.305875,1.164755,1.178365,1.393361,1.390979,1.085079,1.281914,...,3.599491,3.233487,4.721437,3.916543,5.886878,4.088313,6.122421,3.924364,10,3
6563,0.722978,0.720824,0.755536,0.760434,0.756769,0.859506,0.879017,0.809056,0.723456,0.669041,...,3.853352,3.410025,5.080214,4.169576,6.019021,3.874508,6.068462,3.071311,10,3
6564,1.439172,1.433734,1.375746,1.396880,1.182754,1.100798,1.080205,0.930623,0.951749,0.980427,...,3.939145,3.469200,4.786500,3.846884,5.735223,3.777857,6.277089,3.256937,10,3
6565,1.687076,1.663921,1.522277,1.425893,1.212331,1.112767,1.146947,1.055635,1.047915,0.989870,...,3.470630,3.189984,4.881475,3.785914,5.302870,3.445231,5.274994,3.496227,10,3


In [16]:
# data seperation for leave-one-session-out

from sklearn.model_selection import LeaveOneGroupOut

sessions = df_sess_tot['session']
y = df_sess_tot['GT']

df_sess_tot2 = df_sess_tot.drop('session', axis = 1)
df_sess_tot2 = df_sess_tot2.drop('GT', axis = 1)

X = df_sess_tot2.copy()
X

Unnamed: 0,l1_std1,l1_min1,l1_max1,l1_qtl1,l1_qtu1,l1_avg1,l1_iqr1,l1_std2,l1_min2,l1_max2,...,l4_adj5,l4_adj6,l4_adj7,l4_adj8,l4_adj9,l4_adj10,l4_adj11,l4_adj12,l4_euc,l4_rss
0,0.508360,0.556233,0.547783,0.554438,0.538969,0.528093,0.595343,0.578696,0.637909,0.631446,...,3.255593,2.824882,1.398378,1.429693,2.379176,2.900677,2.177376,1.711222,1.581755,0.514346
1,0.485241,0.579260,0.549776,0.535239,0.512745,0.543510,0.614083,0.611247,0.662962,0.590504,...,3.329217,2.945392,1.482271,1.529019,2.439519,2.971721,2.243851,1.673655,1.701424,0.472633
2,0.523973,0.615076,0.563608,0.577708,0.536653,0.565550,0.661444,0.643379,0.719187,0.600445,...,3.390512,3.144561,1.535579,1.655584,2.537666,3.056356,2.314811,1.652796,1.740957,0.570579
3,0.521867,0.585397,0.514375,0.554391,0.509185,0.557026,0.588465,0.606894,0.623689,0.568644,...,3.327382,3.027890,1.446881,1.501696,2.350605,3.039381,2.209710,1.695750,1.661805,0.603008
4,0.520318,0.607385,0.544917,0.574134,0.548106,0.562364,0.583918,0.614920,0.654618,0.599471,...,3.273700,2.818021,1.362363,1.330560,2.080632,2.943398,2.176994,1.660822,1.575143,0.435911
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6562,1.202281,1.290801,1.271125,1.305875,1.164755,1.178365,1.393361,1.390979,1.085079,1.281914,...,3.177322,5.855425,3.599491,3.233487,4.721437,3.916543,5.886878,4.088313,6.122421,3.924364
6563,0.722978,0.720824,0.755536,0.760434,0.756769,0.859506,0.879017,0.809056,0.723456,0.669041,...,3.688230,6.062943,3.853352,3.410025,5.080214,4.169576,6.019021,3.874508,6.068462,3.071311
6564,1.439172,1.433734,1.375746,1.396880,1.182754,1.100798,1.080205,0.930623,0.951749,0.980427,...,3.616793,5.867715,3.939145,3.469200,4.786500,3.846884,5.735223,3.777857,6.277089,3.256937
6565,1.687076,1.663921,1.522277,1.425893,1.212331,1.112767,1.146947,1.055635,1.047915,0.989870,...,3.374324,5.820106,3.470630,3.189984,4.881475,3.785914,5.302870,3.445231,5.274994,3.496227


In [17]:
# from dataframe to numpy

X = X.values
y = y.values

In [18]:
logo = LeaveOneGroupOut()
logo.get_n_splits(X, y, groups = sessions)

3

In [22]:
# learning

result_y_test=[]

result_y_pred_1=[]
result_y_pred_2=[]
result_y_pred_3=[]
result_y_pred_4=[]

count = 0

# regression models
reg_1 = LinearRegression()
reg_2 = RandomForestRegressor()
reg_3 = XGBRegressor()
reg_4 = lgb.LGBMRegressor()

print(datetime.datetime.now())

for train_index, test_index in logo.split(X, y, sessions):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Standardization
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train=scaler.transform(X_train)
    X_test=scaler.transform(X_test)
    
    # fit
    reg_1.fit(X_train, y_train)
    reg_2.fit(X_train, y_train)
    reg_3.fit(X_train, y_train)
    reg_4.fit(X_train, y_train)
    
    # prediction
    y_pred_1 = reg_1.predict(X_test)
    y_pred_2 = reg_2.predict(X_test)
    y_pred_3 = reg_3.predict(X_test)
    y_pred_4 = reg_4.predict(X_test)
    
    # add result
    result_y_test.append(y_test)
    
    result_y_pred_1.append(y_pred_1)
    result_y_pred_2.append(y_pred_2)
    result_y_pred_3.append(y_pred_3)
    result_y_pred_4.append(y_pred_4)
    
    count += 1
    print(count)

print(datetime.datetime.now())

2022-02-14 15:16:12.730802
1
2
3
2022-02-14 15:19:21.469417


In [23]:
# result scores

print('Regressor 1')
print('###########################')
print('Med-abs-err: ',round(median_absolute_error(np.array(result_y_test).flatten(), np.array(result_y_pred_1).flatten()),4))
# print('Mean-abs-err: ',round(mean_absolute_error(np.array(result_y_test).flatten(), np.array(result_y_pred_1).flatten()),4))
# print(mean_squared_error(y_test_tot, y_pred_1_tot))
print('Max-err: ',round(max_error(np.array(result_y_test).flatten(), np.array(result_y_pred_1).flatten()),4))
print('R2_score: ',round(r2_score(np.array(result_y_test).flatten(), np.array(result_y_pred_1).flatten()),4))
print('###########################')

print(' ')

print('Regressor 2')
print('###########################')
print('Med-abs-err: ',round(median_absolute_error(np.array(result_y_test).flatten(), np.array(result_y_pred_2).flatten()),4))
# print('Mean-abs-err: ',round(mean_absolute_error(np.array(result_y_test).flatten(), np.array(result_y_pred_2).flatten()),4))
# print(mean_squared_error(y_test_tot, y_pred_1_tot))
print('Max-err: ',round(max_error(np.array(result_y_test).flatten(), np.array(result_y_pred_2).flatten()),4))
print('R2_score: ',round(r2_score(np.array(result_y_test).flatten(), np.array(result_y_pred_2).flatten()),4))
print('###########################')

print(' ')

print('Regressor 3')
print('###########################')
print('Med-abs-err: ',round(median_absolute_error(np.array(result_y_test).flatten(), np.array(result_y_pred_3).flatten()),4))
# print('Mean-abs-err: ',round(mean_absolute_error(np.array(result_y_test).flatten(), np.array(result_y_pred_3).flatten()),4))
# print(mean_squared_error(y_test_tot, y_pred_1_tot))
print('Max-err: ',round(max_error(np.array(result_y_test).flatten(), np.array(result_y_pred_3).flatten()),4))
print('R2_score: ',round(r2_score(np.array(result_y_test).flatten(), np.array(result_y_pred_3).flatten()),4))
print('###########################')

print(' ')

print('Regressor 4')
print('###########################')
print('Med-abs-err: ',round(median_absolute_error(np.array(result_y_test).flatten(), np.array(result_y_pred_4).flatten()),4))
# print('Mean-abs-err: ',round(mean_absolute_error(np.array(result_y_test).flatten(), np.array(result_y_pred_4).flatten()),4))
# print(mean_squared_error(y_test_tot, y_pred_1_tot))
print('Max-err: ',round(max_error(np.array(result_y_test).flatten(), np.array(result_y_pred_4).flatten()),4))
print('R2_score: ',round(r2_score(np.array(result_y_test).flatten(), np.array(result_y_pred_4).flatten()),4))
print('###########################')

Regressor 1
###########################
Med-abs-err:  0.54
Max-err:  7.7171
R2_score:  0.9043
###########################
 
Regressor 2
###########################
Med-abs-err:  0.46
Max-err:  4.19
R2_score:  0.9309
###########################
 
Regressor 3
###########################
Med-abs-err:  0.4403
Max-err:  4.198
R2_score:  0.927
###########################
 
Regressor 4
###########################
Med-abs-err:  0.4109
Max-err:  3.5168
R2_score:  0.9399
###########################
