In [1]:
# Import all necessary modules
import numpy as np
import matplotlib.pyplot as plt
import glob, json
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score
from sklearn.metrics import confusion_matrix

from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report

In [9]:
# Load file names and labels for the processed data
data_folder_prefix = "250x250/Featured_Data_Update/Seg_Featured_"

with open("250x250/data_labels.json", 'r') as json_file:
    label_dict = json.load(json_file)

data_files = glob.glob("250x250/Featured_Data_Update/Seg_Featured_*")
avalible_files=[]
for fl in data_files:
    avalible_files.append(fl[len("250x250/Featured_Data_Update\Seg_Featured_"):-len(".npy")])

print(len(avalible_files), avalible_files[:5])

file_names = set.intersection(set(avalible_files), set(list(label_dict.keys())))
print(len(file_names))

labels = []
for fl in file_names:
    labels.append(label_dict[fl])
print(len(labels))

files = [data_folder_prefix+x+".npy" for x in file_names]

max_len = len([0]*576+[0]*576+[0]*576+[0]*250*2+[0]*1) 
flattened_test_load = np.zeros((max_len, ))
print(flattened_test_load.shape)

for fl in files:
    data = np.load(fl, None, allow_pickle=True)
    flattened_test_load = np.vstack((flattened_test_load, data))


flattened_data = flattened_test_load[1:,:]
flattened_data.shape, len(labels)

995 ['Data_20120330_01_004_0', 'Data_20120330_01_004_1', 'Data_20120330_01_004_10', 'Data_20120330_01_004_11', 'Data_20120330_01_004_12']
634
634
(2229,)


((634, 2229), 634)

In [4]:
# Load file names and labels for the processed data
data_folder_prefix = "250x250/Seg_Featured_"

with open("250x250/data_labels.json", 'r') as json_file:
    label_dict = json.load(json_file)


file_names = list(label_dict.keys())
print(len(file_names))

labels = []
for fl in file_names:
    labels.append(label_dict[fl])
print(len(labels))
files = [data_folder_prefix+x+".npy" for x in file_names]

743
743


In [5]:
# Revised 

# files = glob.glob("Seg_Featured_Data/*.npy")
max_len = 250*250+7
num_padded = 0
flattened_test_load = np.zeros((max_len, ))
print(flattened_test_load.shape)

for fl in files:
    test_load = np.load(fl, None, allow_pickle=True)
    y,x,_ = test_load.shape

    # Ignore depth 0
    # Keep all Depth 1
    #  if the x,y is less than 250x50 pad w 0s
    layers_depth = test_load[:,:,1]
    # print(layers_depth.shape)
    if x < 250:
        layers_depth = np.hstack((layers_depth, np.zeros((y, y-x))))
        num_padded +=1
    if y < 250:
        layers_depth = np.hstack((layers_depth, np.zeros((250-y, 250))))
    layers_depth = layers_depth.flatten()
    # print(layers_depth.shape)


    # Get the metrics from the other depths
    vl, ct = np.unique(test_load[:,:,5], return_counts=True)
    if len(ct)>1:
        num_bp = ct[1]
    else:
        num_bp = 0

    # print("conected components (mean, std)", np.mean(test_load[:y//10-1,:x//10-1,2]), np.std(test_load[:y//10-1,:x//10-1,2]))
    # print("orientaiton (mean, std)", np.mean(test_load[:y//10-1,:x//10-1,3]), np.std(test_load[:y//10-1,:x//10-1,3]))
    # print("mean orientaiton (mean, std)", np.mean(test_load[:250//10-1,:250//10-1,4]), np.std(test_load[:250//10-1,:250//10-1,4]))
    # print("num breakpoints",num_bp)
    # print("dist map (mean, std)", np.mean(test_load[:,:,6]), np.std(test_load[:,:,6]))

    data= np.append(layers_depth, [np.mean(test_load[:y//10-1,:x//10-1,2]), np.std(test_load[:y//10-1,:x//10-1,2]),
                                    np.mean(test_load[:y//10-1,:x//10-1,3]), np.std(test_load[:y//10-1,:x//10-1,3]),
                                    num_bp,
                                    np.mean(test_load[:,:,6]), np.std(test_load[:,:,6])])

    # print(data.shape)
    flattened_test_load = np.vstack((flattened_test_load, data))

print(flattened_test_load.shape,flattened_test_load[0].shape, "num padded:", num_padded)

flattened_data = flattened_test_load[1:,:]
flattened_data.shape, len(labels)


(62507,)
(744, 62507) (62507,) num padded: 106


((743, 62507), 743)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(flattened_data, labels, test_size=0.2)

In [11]:
print(X_train.shape)
print(X_test.shape)
print(len(y_train))
print(len(y_test))

(507, 2229)
(127, 2229)
507
127


In [12]:
def mse(y_true, y_pred):
    return np.average(np.square(np.subtract(y_true, y_pred)))

In [13]:
def rmse(y_true, y_pred):
    return np.sqrt(np.average(np.square(np.subtract(y_true, y_pred))))

In [14]:
def mae(y_true, y_pred):
    return np.average(np.absolute(np.subtract(y_true, y_pred)))

In [15]:
# Build a LinearRegression model and train it on (X_train, y_train)
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

In [16]:
# Test lin_reg on X_test
y_pred_lin_reg = lin_reg.predict(X_test)

mae_lin_reg = mean_absolute_error(y_test, y_pred_lin_reg)
print('Linear Regression MAE', mae_lin_reg)

mse_lin_reg = mean_squared_error(y_test, y_pred_lin_reg)
print('Linear Regression MSE', mse_lin_reg)

Linear Regression MAE 1.670425090479847
Linear Regression MSE 4.636249582720583


In [17]:
# Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
svr = SVR(kernel='linear')
svr.fit(X_train, y_train)

In [18]:
# Test svr
y_pred_svr = svr.predict(X_test)

In [19]:
# Test lin_reg on X_test
y_pred_svr = svr.predict(X_test)

mae_srv = mean_absolute_error(y_test, y_pred_svr)
print('SRV MAE', mae_srv)

mse_srv = mean_squared_error(y_test, y_pred_svr)
print('SRV MSE', mse_srv)

SRV MAE 1.4413666063858876
SRV MSE 3.4760130068418227


In [20]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg = XGBRegressor(objective ='reg:squarederror')
xgb_reg.fit(X_train, y_train)

In [21]:
# Test xgb_reg
y_pred_xgb = xgb_reg.predict(X_test)


mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
print('SRV MAE', mae_xgb)

mse_xgb = mean_squared_error(y_test, y_pred_xgb)
print('SRV MSE', mse_xgb)

SRV MAE 0.7363377443448765
SRV MSE 1.2575617120541245


In [22]:
y_pred_xgb10 = np.rint(y_pred_xgb)

In [23]:
y_test10 = np.rint(y_test)

In [24]:
confusion_matrix(y_test10, y_pred_xgb10)

array([[ 0,  0,  0,  2,  4],
       [ 0,  0,  0,  0,  1],
       [ 0,  0,  0,  2,  5],
       [ 0,  1,  0,  4, 11],
       [ 0,  0,  8, 21, 68]], dtype=int64)

In [25]:
balanced_acc = balanced_accuracy_score(y_test10, y_pred_xgb10)
print(balanced_acc)

0.1902061855670103


In [26]:
print(classification_report(y_test10, y_pred_xgb10))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         6
         1.0       0.00      0.00      0.00         1
         2.0       0.00      0.00      0.00         7
         3.0       0.14      0.25      0.18        16
         4.0       0.76      0.70      0.73        97

    accuracy                           0.57       127
   macro avg       0.18      0.19      0.18       127
weighted avg       0.60      0.57      0.58       127



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
print(y_test10[0:10])

[4. 4. 4. 4. 4. 4. 4. 4. 4. 3.]


In [28]:
print(y_pred_xgb10[0:10])

[2. 4. 4. 4. 4. 4. 2. 4. 4. 3.]


In [52]:
# Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
svrL = SVR(kernel='linear')
svrL.fit(X_train1, y_train1)

# Test lin_reg on X_test
y_pred_svrL = svrL.predict(X_test1)

mae_svrL = mean_absolute_error(y_test, np.multiply(y_pred_svrL, 4))
print('SRV MAE', mae_svrL)

mse_svrL = mean_squared_error(y_test, np.multiply(y_pred_svrL, 4))
print('SRV MSE', mse_svrL)

SRV MAE 1.5986129494036398
SRV MSE 4.434646308685692


In [53]:
max_svrL = np.max(y_pred_svrL)

In [54]:
y_pred_svr10 = np.rint(np.multiply(y_pred_svrL, 4))
y_test10 = np.rint(y_test)
print(classification_report(y_test10, y_pred_svr10))

              precision    recall  f1-score   support

        -2.0       0.00      0.00      0.00         0
        -1.0       0.00      0.00      0.00         0
         0.0       0.25      0.17      0.20         6
         1.0       0.00      0.00      0.00         1
         2.0       0.05      0.14      0.08         7
         3.0       0.08      0.12      0.10        16
         4.0       0.68      0.20      0.30        97
         5.0       0.00      0.00      0.00         0
         6.0       0.00      0.00      0.00         0
         7.0       0.00      0.00      0.00         0
         8.0       0.00      0.00      0.00         0
         9.0       0.00      0.00      0.00         0

    accuracy                           0.18       127
   macro avg       0.09      0.05      0.06       127
weighted avg       0.54      0.18      0.26       127



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [55]:
# Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
svrR = SVR(kernel='rbf')
svrR.fit(X_train1, y_train1)

# Test lin_reg on X_test
y_pred_svrR = svrR.predict(X_test1)

mae_svrR = mean_absolute_error(y_test, np.multiply(y_pred_svrR, 4))
print('SRV MAE', mae_svrR)

mse_svrR = mean_squared_error(y_test, np.multiply(y_pred_svrR, 4))
print('SRV MSE', mse_svrR)

SRV MAE 0.7101671299775499
SRV MSE 0.9846162429865946


In [56]:
y_pred_svr10 = np.rint(np.multiply(y_pred_svrR, 4))
y_test10 = np.rint(y_test)
print(classification_report(y_test10, y_pred_svr10))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         6
         1.0       0.00      0.00      0.00         1
         2.0       0.00      0.00      0.00         7
         3.0       0.11      0.38      0.17        16
         4.0       0.76      0.56      0.64        97

    accuracy                           0.47       127
   macro avg       0.17      0.19      0.16       127
weighted avg       0.59      0.47      0.51       127



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [57]:
# Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
svr3 = SVR(kernel="poly", degree=3)
svr3.fit(X_train1, y_train1)

# Test lin_reg on X_test
y_pred_svr3 = svr3.predict(X_test1)

mae_svr3 = mean_absolute_error(y_test, np.multiply(y_pred_svr3, 4))
print('SRV MAE', mae_svr3)

mse_svr3 = mean_squared_error(y_test, np.multiply(y_pred_svr3, 4))
print('SRV MSE', mse_svr3)

SRV MAE 0.7115631353111643
SRV MSE 1.1067803064761499


In [58]:
y_pred_svr10 = np.rint(np.multiply(y_pred_svr3, 4))
y_test10 = np.rint(y_test)
print(classification_report(y_test10, y_pred_svr10))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         6
         1.0       0.00      0.00      0.00         1
         2.0       0.00      0.00      0.00         7
         3.0       0.14      0.38      0.20        16
         4.0       0.78      0.64      0.70        97
         5.0       0.00      0.00      0.00         0

    accuracy                           0.54       127
   macro avg       0.15      0.17      0.15       127
weighted avg       0.61      0.54      0.56       127



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [59]:
# Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
svr4 = SVR(kernel="poly", degree=4)
svr4.fit(X_train1, y_train1)

# Test lin_reg on X_test
y_pred_svr4 = svr4.predict(X_test1)

mae_svr4 = mean_absolute_error(y_test, np.multiply(y_pred_svr4, 4))
print('SRV MAE', mae_svr4)

mse_svr4 = mean_squared_error(y_test1, np.multiply(y_pred_svr4, 4))
print('SRV MSE', mse_svr4)

SRV MAE 0.7251805710334881
SRV MSE 7.097117704644134


In [78]:
# Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
svrS = SVR(kernel="sigmoid")
svrS.fit(X_train1, y_train1)

# Test lin_reg on X_test
y_pred_svrS = svrS.predict(X_test1)

mae_svrS = mean_absolute_error(y_test, np.multiply(y_pred_svrS, 4))
print('SRV MAE', mae_svrS)

mse_svrS = mean_squared_error(y_test, np.multiply(y_pred_svrS, 4))
print('SRV MSE', mse_svrS)

SRV MAE 1.207218292314996
SRV MSE 3.3767953557946204


In [79]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg = XGBRegressor(objective ='reg:squarederror')
xgb_reg.fit(X_train, y_train)

# Test xgb_reg
y_pred_xgb = xgb_reg.predict(X_test)


mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
print('SRV MAE', mae_xgb)

mse_xgb = mean_squared_error(y_test, y_pred_xgb)
print('SRV MSE', mse_xgb)

SRV MAE 0.7363377443448765
SRV MSE 1.2575617120541245


In [80]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg1 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = .2)
xgb_reg1.fit(X_train, y_train)

# Test xgb_reg
y_pred_xgb1 = xgb_reg1.predict(X_test)


mae_xgb1 = mean_absolute_error(y_test, y_pred_xgb1)
print('SRV MAE', mae_xgb1)

mse_xgb1 = mean_squared_error(y_test, y_pred_xgb1)
print('SRV MSE', mse_xgb1)

SRV MAE 0.702674856336098
SRV MSE 1.1942818201705918


In [81]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg2 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = .5)
xgb_reg2.fit(X_train, y_train)

# Test xgb_reg
y_pred_xgb2 = xgb_reg2.predict(X_test)


mae_xgb2 = mean_absolute_error(y_test, y_pred_xgb2)
print('SRV MAE', mae_xgb2)

mse_xgb2 = mean_squared_error(y_test, y_pred_xgb2)
print('SRV MSE', mse_xgb2)

SRV MAE 0.6767689768723616
SRV MSE 1.1450596243620361


In [82]:
y_pred_xgb10 = np.rint(y_pred_xgb2)
y_test10 = np.rint(y_test)
print(classification_report(y_test10, y_pred_xgb10))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         6
         1.0       0.00      0.00      0.00         1
         2.0       0.00      0.00      0.00         7
         3.0       0.16      0.38      0.23        16
         4.0       0.79      0.71      0.75        97

    accuracy                           0.59       127
   macro avg       0.19      0.22      0.20       127
weighted avg       0.63      0.59      0.60       127



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [83]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg3 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = 5)
xgb_reg3.fit(X_train, y_train)

# Test xgb_reg
y_pred_xgb3 = xgb_reg3.predict(X_test)


mae_xgb3 = mean_absolute_error(y_test, y_pred_xgb3)
print('SRV MAE', mae_xgb3)

mse_xgb3 = mean_squared_error(y_test, y_pred_xgb3)
print('SRV MSE', mse_xgb3)

SRV MAE 0.6728765100944699
SRV MSE 1.1632487104492253


In [84]:
y_pred_xgb10 = np.rint(y_pred_xgb3)
y_test10 = np.rint(y_test)
print(classification_report(y_test10, y_pred_xgb10))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         6
         1.0       0.00      0.00      0.00         1
         2.0       0.00      0.00      0.00         7
         3.0       0.22      0.50      0.31        16
         4.0       0.82      0.70      0.76        97

    accuracy                           0.60       127
   macro avg       0.21      0.24      0.21       127
weighted avg       0.65      0.60      0.62       127



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [85]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg4 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = 20)
xgb_reg4.fit(X_train, y_train)

# Test xgb_reg
y_pred_xgb4 = xgb_reg4.predict(X_test)


mae_xgb4 = mean_absolute_error(y_test, y_pred_xgb4)
print('SRV MAE', mae_xgb4)

mse_xgb4 = mean_squared_error(y_test, y_pred_xgb4)
print('SRV MSE', mse_xgb4)

SRV MAE 0.7498165657670479
SRV MSE 1.3470151541214932


In [86]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg5 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = 25)
xgb_reg5.fit(X_train, y_train)

# Test xgb_reg
y_pred_xgb5 = xgb_reg5.predict(X_test)


mae_xgb5 = mean_absolute_error(y_test, y_pred_xgb5)
print('SRV MAE', mae_xgb5)

mse_xgb5 = mean_squared_error(y_test, y_pred_xgb5)
print('SRV MSE', mse_xgb5)

SRV MAE 0.7625838710567144
SRV MSE 1.3939677476283798


In [87]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg4 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = 30)
xgb_reg4.fit(X_train, y_train)

# Test xgb_reg
y_pred_xgb4 = xgb_reg4.predict(X_test)


mae_xgb4 = mean_absolute_error(y_test, y_pred_xgb4)
print('SRV MAE', mae_xgb4)

mse_xgb4 = mean_squared_error(y_test, y_pred_xgb4)
print('SRV MSE', mse_xgb4)

SRV MAE 0.7150369084726168
SRV MSE 1.210115178366863


In [88]:
y_pred_xgb10 = np.rint(y_pred_xgb4)
y_test10 = np.rint(y_test)
print(classification_report(y_test10, y_pred_xgb10))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         6
         1.0       0.00      0.00      0.00         1
         2.0       0.00      0.00      0.00         7
         3.0       0.18      0.50      0.26        16
         4.0       0.81      0.63      0.71        97

    accuracy                           0.54       127
   macro avg       0.20      0.23      0.19       127
weighted avg       0.64      0.54      0.57       127



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [89]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg4 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = 35)
xgb_reg4.fit(X_train, y_train)

# Test xgb_reg
y_pred_xgb4 = xgb_reg4.predict(X_test)


mae_xgb4 = mean_absolute_error(y_test, y_pred_xgb4)
print('SRV MAE', mae_xgb4)

mse_xgb4 = mean_squared_error(y_test, y_pred_xgb4)
print('SRV MSE', mse_xgb4)

SRV MAE 0.6959115848766537
SRV MSE 1.1827032717924053


In [90]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg4 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = 40)
xgb_reg4.fit(X_train, y_train)

# Test xgb_reg
y_pred_xgb4 = xgb_reg4.predict(X_test)


mae_xgb4 = mean_absolute_error(y_test, y_pred_xgb4)
print('SRV MAE', mae_xgb4)

mse_xgb4 = mean_squared_error(y_test, y_pred_xgb4)
print('SRV MSE', mse_xgb4)

SRV MAE 0.7109258799102363
SRV MSE 1.2194389228256817


In [91]:
y_pred_xgb10 = np.rint(y_pred_xgb4)
y_test10 = np.rint(y_test)
print(classification_report(y_test10, y_pred_xgb10))
print(confusion_matrix(y_test10, y_pred_xgb10))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         6
         1.0       0.00      0.00      0.00         1
         2.0       0.00      0.00      0.00         7
         3.0       0.16      0.38      0.23        16
         4.0       0.78      0.66      0.72        97

    accuracy                           0.55       127
   macro avg       0.19      0.21      0.19       127
weighted avg       0.62      0.55      0.57       127

[[ 0  0  1  1  4]
 [ 0  0  0  0  1]
 [ 0  0  0  3  4]
 [ 0  0  1  6  9]
 [ 0  1  5 27 64]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [92]:
print(y_pred_xgb10)

[3. 4. 4. 3. 4. 4. 3. 3. 4. 3. 4. 4. 4. 4. 4. 4. 4. 2. 3. 4. 3. 4. 4. 4.
 4. 4. 1. 4. 4. 4. 3. 3. 3. 4. 4. 4. 4. 3. 3. 3. 4. 4. 4. 4. 4. 4. 3. 4.
 4. 4. 2. 4. 4. 4. 3. 4. 4. 2. 3. 4. 3. 3. 4. 4. 3. 4. 4. 4. 4. 4. 4. 4.
 2. 3. 2. 4. 4. 4. 3. 4. 4. 3. 4. 3. 4. 4. 4. 4. 3. 4. 4. 4. 2. 3. 3. 4.
 4. 3. 4. 4. 4. 4. 3. 4. 4. 4. 4. 4. 4. 4. 3. 3. 4. 4. 2. 3. 3. 3. 4. 4.
 3. 4. 4. 3. 4. 3. 3.]


In [93]:
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score

In [94]:
print(
'\nmR',recall_score(y_test10, y_pred_xgb10, average='micro'),'\nmP', precision_score(y_test10, y_pred_xgb10, average='micro'),
'\nMR',recall_score(y_test10, y_pred_xgb10, average='macro'),'\nMP', precision_score(y_test10, y_pred_xgb10, average='macro'),
'\nmF1',f1_score(y_test10, y_pred_xgb10,average='micro'),'\nMF1', f1_score(y_test10, y_pred_xgb10,average='macro'))


mR 0.5511811023622047 
mP 0.5511811023622047 
MR 0.20695876288659792 
MP 0.1885299934080422 
mF1 0.5511811023622047 
MF1 0.18829977864446082


  _warn_prf(average, modifier, msg_start, len(result))


In [77]:
print(labels)

[4, 3, 4, 4, 3, 4, 4, 4, 4, 3, 3, 4, 4, 4, 2, 4, 4, 3, 4, 4, 4, 4, 3, 4, 3, 3, 4, 3, 4, 4, 4, 4, 4, 3, 4, 4, 3, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, 4, 4, 3, 4, 2, 4, 4, 4, 3, 4, 0, 4, 4, 4, 3, 4, 4, 4, 4, 3, 2, 4, 3, 4, 4, 3, 4, 0, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 2, 4, 3, 4, 3, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 2, 4, 4, 4, 4, 4, 4, 4, 3, 3, 4, 2, 4, 4, 4, 4, 4, 4, 0, 4, 4, 0, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 4, 4, 4, 1, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 3, 4, 4, 4, 3, 4, 4, 3, 0, 4, 3, 4, 3, 4, 4, 1, 4, 4, 4, 0, 2, 4, 0, 4, 4, 4, 4, 4, 4, 3, 4, 4, 3, 3, 2, 3, 4, 4, 4, 4, 4, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 0, 4, 3, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 1, 2, 4, 0, 4, 4, 4, 4, 4, 3, 4, 4, 3, 4, 4, 4, 4, 4, 3, 4, 1, 4, 0, 3, 4, 4, 4, 4, 4, 3, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 4, 3, 

In [46]:
lablesNormilized = np.divide(labels, 4)

In [50]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(flattened_data, lablesNormilized, test_size=0.2)

In [51]:
X_test1.shape

(127, 2229)

In [95]:
# Build a xgb regression model and train it on (X_train, y_train)
xgb_reg4 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = 40)
xgb_reg4.fit(X_train, y_train)

# Test xgb_reg
y_pred_xgb4 = xgb_reg4.predict(X_test)


mae_xgb4 = mean_absolute_error(y_test, y_pred_xgb4)
print('SRV MAE', mae_xgb4)

mse_xgb4 = mean_squared_error(y_test, y_pred_xgb4)
print('SRV MSE', mse_xgb4)

SRV MAE 0.7109258799102363
SRV MSE 1.2194389228256817
