# Consufion Matrices

Print the confusion matrices nicely then try random sampling.

In [37]:
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.datasets import make_blobs
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import (confusion_matrix, accuracy_score, f1_score, precision_score, recall_score, 
                            classification_report, multilabel_confusion_matrix, mean_squared_error, mean_absolute_error)
from sklearn.utils.class_weight import compute_sample_weight
import matplotlib.pyplot as plt
import numpy as np
import glob, json, os
from tabulate import tabulate
import pickle

In [3]:
# Load file names and labels for the processed data
data_folder_prefix = "../Seg_Featured_Data_Updated/Seg_Featured_"

with open("../data_labels.json", 'r') as json_file:
    label_dict = json.load(json_file)

data_files = glob.glob("../Seg_Featured_Data_Updated/Seg_Featured_*")
avalible_files=[]
for fl in data_files:
    avalible_files.append(fl[len("../Seg_Featured_Data_Updated\Seg_Featured_"):-len(".npy")])

print(len(avalible_files), avalible_files[:5])
# print(avalible_files[0], list(label_dict.keys())[0])


file_names = set.intersection(set(avalible_files), set(list(label_dict.keys())))
print(len(file_names))

labels = []
for fl in file_names:
    labels.append(label_dict[fl])
print(len(labels))

files = [data_folder_prefix+x+".npy" for x in file_names]

ct, values = np.unique(labels, return_counts=True)
print(ct, values, values/values.sum())

pred = [4]* len(files)
accuracy_score(labels, pred)

995 ['Data_20120330_01_004_0', 'Data_20120330_01_004_1', 'Data_20120330_01_004_10', 'Data_20120330_01_004_11', 'Data_20120330_01_004_12']
634
634
[0 1 2 3 4] [ 20   6  22 120 466] [0.03154574 0.00946372 0.03470032 0.18927445 0.73501577]


0.7350157728706624

In [4]:
max_len = len([0]*576+[0]*576+[0]*576+[0]*250*2+[0]*1) 
flattened_test_load = np.zeros((max_len, ))
print(flattened_test_load.shape)

for fl in files:
    data = np.load(fl, None, allow_pickle=True)
    flattened_test_load = np.vstack((flattened_test_load, data))

print(flattened_test_load.shape)

flattened_data = flattened_test_load[1:,:]
flattened_data.shape, len(labels)


(2229,)
(635, 2229)


((634, 2229), 634)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(flattened_data, labels, test_size=0.2, random_state=42)
print(len(X_train), len(X_test), len(y_train), len(y_test))

507 127 507 127


In [52]:
ct, values = np.unique(y_test, return_counts=True)
test_weights = values/values.sum()
print(ct, values, values/values.sum())
class_weight_dict = {}
for c in ct:
    class_weight_dict[c] = test_weights[c]

class_weight_dict

[0 1 2 3 4] [ 4  1  6 27 89] [0.03149606 0.00787402 0.04724409 0.21259843 0.7007874 ]


{0: 0.031496062992125984,
 1: 0.007874015748031496,
 2: 0.047244094488188976,
 3: 0.2125984251968504,
 4: 0.7007874015748031}

## RF

In [55]:
if(os.path.exists('../Models/RF/clf_0.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/RF/clf_0.pkl', 'rb') as f:
        clf0 = pickle.load(f)
else:
    # create Random Forest
    clf0 = RandomForestClassifier(
        n_estimators=500, 
        max_depth=None,
        min_samples_split=10, 
        class_weight='balanced'
        )

    clf0.fit(X_train, y_train)

    with open('../Models/RF/clf_0.pkl','wb') as f:
        pickle.dump(clf0,f)

pred_labels = clf0.predict(X_test)

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_labels, sample_weight=compute_sample_weight('balanced', y_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, clf0.predict(X_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |        0 |   1 |        2 |       3 |       4 |
|----|----------|-----|----------|---------|---------|
|  0 | 0        |   0 | 0        | 0       | 25.4    |
|  1 | 0        |   0 | 0        | 0       | 25.4    |
|  2 | 0        |   0 | 0        | 0       | 25.4    |
|  3 | 0.940741 |   0 | 0        | 2.82222 | 21.637  |
|  4 | 0        |   0 | 0.285393 | 1.42697 | 23.6876 |


|    |   0 |   1 |   2 |   3 |   4 |
|----|-----|-----|-----|-----|-----|
|  0 |   0 |   0 |   0 |   0 |   4 |
|  1 |   0 |   0 |   0 |   0 |   1 |
|  2 |   0 |   0 |   0 |   0 |   6 |
|  3 |   1 |   0 |   0 |   3 |  23 |
|  4 |   0 |   0 |   1 |   5 |  83 |


In [60]:
# Doesnt classify anything as 0-3

# if(os.path.exists('../Models/RF/clf_0_w.pkl')):
#     # Load the random forest
#     with open('../Models/RF/clf_0_w.pkl', 'rb') as f:
#         clf_0_w = pickle.load(f)
# else:
#     # create Random Forest
#     clf_0_w = RandomForestClassifier(
#         n_estimators=500, 
#         max_depth=None,
#         min_samples_split=10, 
#         class_weight=class_weight_dict
#         )

#     clf_0_w.fit(X_train, y_train)

#     with open('../Models/RF/clf_0_w.pkl','wb') as f:
#         pickle.dump(clf_0_w,f)

# print("X= Pred, Y= True")
# print(tabulate(confusion_matrix(y_test, clf_0_w.predict(X_test), sample_weight=compute_sample_weight('balanced', y_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))
# print("\n")
# print(tabulate(confusion_matrix(y_test, clf_0_w.predict(X_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))


In [58]:
if(os.path.exists('../Models/RF/clf_1.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/RF/clf_1.pkl', 'rb') as f:
        clf_1 = pickle.load(f)
else:
    # Create rf model
    clf_1 = RandomForestClassifier(
        n_estimators=1000, max_depth=None,
        min_samples_split=10,
        class_weight='balanced'
        )

    clf_1.fit(X_train, y_train)

    with open('../Models/RF/clf_1.pkl','wb') as f:
        pickle.dump(clf_1,f)

pred_labels_1 = clf_1.predict(X_test)
print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_labels_1, sample_weight=compute_sample_weight('balanced', y_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, clf_1.predict(X_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |        0 |   1 |        2 |       3 |       4 |
|----|----------|-----|----------|---------|---------|
|  0 | 0        |   0 | 0        | 0       | 25.4    |
|  1 | 0        |   0 | 0        | 0       | 25.4    |
|  2 | 0        |   0 | 0        | 0       | 25.4    |
|  3 | 0.940741 |   0 | 0        | 1.88148 | 22.5778 |
|  4 | 0        |   0 | 0.285393 | 1.42697 | 23.6876 |


|    |   0 |   1 |   2 |   3 |   4 |
|----|-----|-----|-----|-----|-----|
|  0 |   0 |   0 |   0 |   0 |   4 |
|  1 |   0 |   0 |   0 |   0 |   1 |
|  2 |   0 |   0 |   0 |   0 |   6 |
|  3 |   1 |   0 |   0 |   2 |  24 |
|  4 |   0 |   0 |   1 |   5 |  83 |


In [None]:
X_train, X_test, y_train, y_test = train_test_split(flattened_data, labels, test_size=0.2, random_state=42)
print(len(X_train), len(X_test), len(y_train), len(y_test))

507 127 507 127


In [59]:
# Doesnt classify anything as 0-3

# if(os.path.exists('../Models/RF/clf_1_w.pkl')):
#     # Load the random forest
#     with open('../Models/RF/clf_1_w.pkl', 'rb') as f:
#         clf_1_w = pickle.load(f)
# else:
#     # create Random Forest
#     clf_1_w = RandomForestClassifier(
#         n_estimators=500, 
#         max_depth=None,
#         min_samples_split=10, 
#         class_weight=class_weight_dict
#         )

#     clf_1_w.fit(X_train, y_train)

#     with open('../Models/RF/clf_1_w.pkl','wb') as f:
#         pickle.dump(clf_1_w,f)

# print("X= Pred, Y= True")
# print(tabulate(confusion_matrix(y_test, clf_1_w.predict(X_test), sample_weight=compute_sample_weight('balanced', y_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))
# print("\n")
# print(tabulate(confusion_matrix(y_test, clf_1_w.predict(X_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))


In [62]:
if(os.path.exists('../Models/RF/clf_2.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/RF/clf_2.pkl', 'rb') as f:
        clf_2 = pickle.load(f)
else:

    # create Random Forest
    clf_2 = RandomForestClassifier(
        n_estimators=1000, max_depth=None,
        min_samples_split=20,
        class_weight='balanced'
        )

    clf_2.fit(X_train, y_train)

    with open('../Models/RF/clf_2.pkl','wb') as f:
        pickle.dump(clf_2,f)

pred_labels_2 = clf_2.predict(X_test)

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_labels_2, sample_weight=compute_sample_weight('balanced', y_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, clf_1.predict(X_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |        0 |   1 |        2 |       3 |       4 |
|----|----------|-----|----------|---------|---------|
|  0 | 0        |   0 | 0        | 0       | 25.4    |
|  1 | 0        |   0 | 0        | 0       | 25.4    |
|  2 | 0        |   0 | 0        | 0       | 25.4    |
|  3 | 0.940741 |   0 | 0        | 2.82222 | 21.637  |
|  4 | 0.285393 |   0 | 0.285393 | 1.99775 | 22.8315 |


|    |   0 |   1 |   2 |   3 |   4 |
|----|-----|-----|-----|-----|-----|
|  0 |   0 |   0 |   0 |   0 |   4 |
|  1 |   0 |   0 |   0 |   0 |   1 |
|  2 |   0 |   0 |   0 |   0 |   6 |
|  3 |   1 |   0 |   0 |   2 |  24 |
|  4 |   0 |   0 |   1 |   5 |  83 |


In [63]:
if(os.path.exists('../Models/RF/clf_3.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/RF/clf_3.pkl', 'rb') as f:
        clf_3 = pickle.load(f)
else:

    # create Random Forest
    clf_3 = RandomForestClassifier(
        n_estimators=10000, max_depth=5000,
        min_samples_split=10,
        class_weight='balanced'
        )

    clf_3.fit(X_train, y_train)

    with open('../Models/RF/clf_3.pkl','wb') as f:
        pickle.dump(clf_3,f)


pred_labels_3 = clf_3.predict(X_test)

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_labels_3, sample_weight=compute_sample_weight('balanced', y_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, clf_1.predict(X_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |        0 |   1 |        2 |       3 |      4 |
|----|----------|-----|----------|---------|--------|
|  0 | 0        |   0 | 0        | 0       | 25.4   |
|  1 | 0        |   0 | 0        | 0       | 25.4   |
|  2 | 0        |   0 | 0        | 0       | 25.4   |
|  3 | 0.940741 |   0 | 0        | 2.82222 | 21.637 |
|  4 | 0.285393 |   0 | 0.285393 | 0.85618 | 23.973 |


|    |   0 |   1 |   2 |   3 |   4 |
|----|-----|-----|-----|-----|-----|
|  0 |   0 |   0 |   0 |   0 |   4 |
|  1 |   0 |   0 |   0 |   0 |   1 |
|  2 |   0 |   0 |   0 |   0 |   6 |
|  3 |   1 |   0 |   0 |   2 |  24 |
|  4 |   0 |   0 |   1 |   5 |  83 |


In [64]:
if(os.path.exists('../Models/RF/clf_4.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/RF/clf_4.pkl', 'rb') as f:
        clf_4 = pickle.load(f)
else:
    # create Random Forest
    clf_4 = RandomForestClassifier(
        n_estimators=100, max_depth=None,
        min_samples_split=5,
        class_weight='balanced'
        )

    clf_4.fit(X_train, y_train)

    with open('../Models/RF/clf_4.pkl','wb') as f:
        pickle.dump(clf_4,f)

pred_labels_4 = clf_4.predict(X_test)

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_labels_4, sample_weight=compute_sample_weight('balanced', y_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, clf_1.predict(X_test)), headers=[0,1,2,3,4],showindex="always", tablefmt="github"))


X= Pred, Y= True
|    |        0 |   1 |   2 |       3 |       4 |
|----|----------|-----|-----|---------|---------|
|  0 | 0        |   0 |   0 | 0       | 25.4    |
|  1 | 0        |   0 |   0 | 0       | 25.4    |
|  2 | 4.23333  |   0 |   0 | 0       | 21.1667 |
|  3 | 0.940741 |   0 |   0 | 1.88148 | 22.5778 |
|  4 | 0        |   0 |   0 | 1.14157 | 24.2584 |


|    |   0 |   1 |   2 |   3 |   4 |
|----|-----|-----|-----|-----|-----|
|  0 |   0 |   0 |   0 |   0 |   4 |
|  1 |   0 |   0 |   0 |   0 |   1 |
|  2 |   0 |   0 |   0 |   0 |   6 |
|  3 |   1 |   0 |   0 |   2 |  24 |
|  4 |   0 |   0 |   1 |   5 |  83 |


## Neural Net

In [71]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

In [89]:
if(os.path.exists('../Models/NN/model_0')):
    print("Loaded")
    # Load the random forest
    model_0 = tf.keras.saving.load_model('../Models/NN/model_0')
else:
    # create Random Forest
    model_0 = Sequential()
    model_0.add(Dense(64, input_dim=2229, activation='relu'))
    model_0.add(Dense(32, activation='relu'))
    model_0.add(Dense(1, activation='linear'))

    model_0.compile(loss='mean_squared_error', optimizer='adam',metrics=['accuracy'])

    model_0.fit(X_train, y_train, epochs=10, batch_size=32)

    tf.keras.saving.save_model(model_0, "../Models/NN/model_0", overwrite=True, save_format="tf")

pred_0 = np.rint(model_0.predict(X_test))
headers = list(np.arange(np.unique(pred_0).max()+1))


print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_0, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers ,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_0), headers=headers,showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |   0.0 |   1.0 |     2.0 |      3.0 |     4.0 |      5.0 |      6.0 |
|----|-------|-------|---------|----------|---------|----------|----------|
|  0 |     0 |     0 | 0       | 19.05    |  6.35   | 0        |  0       |
|  1 |     0 |     0 | 0       |  0       |  0      | 0        | 25.4     |
|  2 |     0 |     0 | 0       |  8.46667 | 12.7    | 4.23333  |  0       |
|  3 |     0 |     0 | 0       | 10.3481  | 12.2296 | 0.940741 |  1.88148 |
|  4 |     0 |     0 | 0.85618 |  7.99101 | 12.5573 | 3.13933  |  0.85618 |
|  5 |     0 |     0 | 0       |  0       |  0      | 0        |  0       |
|  6 |     0 |     0 | 0       |  0       |  0      | 0        |  0       |


|    |   0.0 |   1.0 |   2.0 |   3.0 |   4.0 |   5.0 |   6.0 |
|----|-------|-------|-------|-------|-------|-------|-------|
|  0 |     0 |     0 |     0 |     3 |     1 |     0 |     0 |
|  1 |     0 |     0 |     0 |     0 |     0 |     0 |     1 |
|  2 |     0 |     0 |     0 |     2 |     3 

In [87]:
if(os.path.exists('../Models/NN/model_1')):
    print("Loaded")
    # Load the random forest
    model_1 = tf.keras.saving.load_model('../Models/NN/model_1')
else:
    # create Random Forest
    model_1 = Sequential()
    model_1.add(Dense(128,input_dim=2229,activation='relu'))
    model_1.add(Dense(64,activation='relu'))
    model_1.add(Dense(32,activation='relu'))
    model_1.add(Dense(1,activation='linear'))

    model_1.compile(loss='mean_squared_error', optimizer='adam')

    model_1.fit(X_train, y_train, epochs=10, batch_size=32)

    tf.keras.saving.save_model(model_1, "../Models/NN/model_1", overwrite=True, save_format="tf")

pred_1 = np.rint(model_1.predict(X_test))
headers = list(np.arange(np.unique(pred_1).max()+1))


print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_1, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_1), headers=headers,showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |   0.0 |   1.0 |      2.0 |      3.0 |     4.0 |       5.0 |      6.0 |      7.0 |
|----|-------|-------|----------|----------|---------|-----------|----------|----------|
|  0 |     0 |     0 | 0        | 25.4     |  0      |  0        | 0        | 0        |
|  1 |     0 |     0 | 0        |  0       |  0      | 25.4      | 0        | 0        |
|  2 |     0 |     0 | 0        | 12.7     | 12.7    |  0        | 0        | 0        |
|  3 |     0 |     0 | 0        | 10.3481  | 12.2296 |  0.940741 | 0.940741 | 0.940741 |
|  4 |     0 |     0 | 0.285393 |  7.99101 | 12.5573 |  3.99551  | 0.285393 | 0.285393 |
|  5 |     0 |     0 | 0        |  0       |  0      |  0        | 0        | 0        |
|  6 |     0 |     0 | 0        |  0       |  0      |  0        | 0        | 0        |
|  7 |     0 |     0 | 0        |  0       |  0      |  0        | 0        | 0        |


|    |   0.0 |   1.0 |   2.0 |   3.0 |   4.0 |   5.0 |   6.0 |   7.0 |
|----|-------|------

In [86]:
if(os.path.exists('../Models/NN/model_2')):
    print("Loaded")
    # Load the random forest
    model_2 = tf.keras.saving.load_model('../Models/NN/model_2')
else:
    # create Random Forest
    model_2 = Sequential()
    model_2.add(Dense(256,input_dim=2229,activation='relu'))
    model_2.add(Dense(128,activation='relu'))
    model_2.add(Dense(64,activation='relu'))
    model_2.add(Dense(32,activation='relu'))
    model_2.add(Dense(1,activation='linear'))

    model_2.compile(loss='mean_squared_error', optimizer='adam')

    model_2.fit(X_train, y_train, epochs=10, batch_size=64)

    tf.keras.saving.save_model(model_2, "../Models/NN/model_2", overwrite=True, save_format="tf")

pred_2 = np.rint(model_2.predict(X_test))
headers = list(np.arange(np.unique(pred_2).max()+1))

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_2, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers ,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_2), headers=headers ,showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |   0.0 |   1.0 |     2.0 |      3.0 |      4.0 |      5.0 |     6.0 |
|----|-------|-------|---------|----------|----------|----------|---------|
|  0 |     0 |     0 | 0       | 25.4     |  0       |  0       | 0       |
|  1 |     0 |     0 | 0       |  0       |  0       | 25.4     | 0       |
|  2 |     0 |     0 | 0       | 12.7     |  8.46667 |  4.23333 | 0       |
|  3 |     0 |     0 | 0       |  8.46667 | 12.2296  |  2.82222 | 1.88148 |
|  4 |     0 |     0 | 0.85618 |  5.99326 | 14.5551  |  2.85393 | 1.14157 |
|  5 |     0 |     0 | 0       |  0       |  0       |  0       | 0       |
|  6 |     0 |     0 | 0       |  0       |  0       |  0       | 0       |


|    |   0.0 |   1.0 |   2.0 |   3.0 |   4.0 |   5.0 |   6.0 |
|----|-------|-------|-------|-------|-------|-------|-------|
|  0 |     0 |     0 |     0 |     4 |     0 |     0 |     0 |
|  1 |     0 |     0 |     0 |     0 |     0 |     1 |     0 |
|  2 |     0 |     0 |     0 |     3 |     2 

## SVM

In [91]:
from sklearn.svm import SVR

In [93]:
if (os.path.exists('../Models/SVM/linear.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/SVM/linear.pkl', 'rb') as f:
        svrL = pickle.load(f)
else:

    # Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
    svrL = SVR(kernel='linear')
    svrL.fit(X_train, y_train)

    with open('../Models/SVM/linear.pkl','wb') as f:
        pickle.dump(svrL,f)

# Test lin_reg on X_test
pred_svrL = np.rint(svrL.predict(X_test))
headers = list(np.arange(np.unique(pred_svrL).max()+1))

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_svrL, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers ,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_svrL), headers=headers ,showindex="always", tablefmt="github"))

X= Pred, Y= True
|   0.0 |     1.0 |      2.0 |      3.0 |     4.0 |     5.0 |      6.0 |     7.0 |     8.0 |      9.0 |
|-------|---------|----------|----------|---------|---------|----------|---------|---------|----------|
|     0 | 0       | 0        | 12.7     | 6.35    | 6.35    |  0       | 0       | 0       | 0        |
|     1 | 0       | 0        |  0       | 0       | 0       | 25.4     | 0       | 0       | 0        |
|     2 | 0       | 4.23333  |  4.23333 | 8.46667 | 0       |  4.23333 | 4.23333 | 0       | 0        |
|     3 | 0       | 0.940741 |  3.76296 | 4.7037  | 8.46667 |  2.82222 | 1.88148 | 1.88148 | 0.940741 |
|     4 | 1.42697 | 1.14157  |  4.2809  | 6.84944 | 4.2809  |  3.42472 | 2.56854 | 1.42697 | 0        |
|     5 | 0       | 0        |  0       | 0       | 0       |  0       | 0       | 0       | 0        |
|     6 | 0       | 0        |  0       | 0       | 0       |  0       | 0       | 0       | 0        |
|     7 | 0       | 0        |  0       | 0    

In [96]:
if (os.path.exists('../Models/SVM/rbf.pkl')):
    # Load the random forest
    print("Loaded")
    with open('../Models/SVM/rbf.pkl', 'rb') as f:
        svrrbf = pickle.load(f)
else:

    # Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
    svrrbf = SVR(kernel='rbf')
    svrrbf.fit(X_train, y_train)

    with open('../Models/SVM/rbf.pkl','wb') as f:
        pickle.dump(svrrbf,f)

# Test lin_reg on X_test
pred_svrrbf = np.rint(svrrbf.predict(X_test))
headers = list(np.arange(np.unique(pred_svrrbf).max()+1))

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_svrrbf, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers ,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_svrrbf), headers=headers ,showindex="always", tablefmt="github"))

Loaded
X= Pred, Y= True
|    |   0.0 |   1.0 |   2.0 |     3.0 |     4.0 |
|----|-------|-------|-------|---------|---------|
|  0 |     0 |     0 |     0 | 0       | 25.4    |
|  1 |     0 |     0 |     0 | 0       | 25.4    |
|  2 |     0 |     0 |     0 | 0       | 25.4    |
|  3 |     0 |     0 |     0 | 2.82222 | 22.5778 |
|  4 |     0 |     0 |     0 | 0       | 25.4    |


|    |   0.0 |   1.0 |   2.0 |   3.0 |   4.0 |
|----|-------|-------|-------|-------|-------|
|  0 |     0 |     0 |     0 |     0 |     4 |
|  1 |     0 |     0 |     0 |     0 |     1 |
|  2 |     0 |     0 |     0 |     0 |     6 |
|  3 |     0 |     0 |     0 |     3 |    24 |
|  4 |     0 |     0 |     0 |     0 |    89 |


In [98]:
if (os.path.exists('../Models/SVM/poly_3.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/SVM/poly_3.pkl', 'rb') as f:
        svrpoly_3 = pickle.load(f)
else:

    # Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
    svrpoly_3 = SVR(kernel="poly", degree=3)
    svrpoly_3.fit(X_train, y_train)

    with open('../Models/SVM/poly_3.pkl','wb') as f:
        pickle.dump(svrpoly_3,f)

# Test lin_reg on X_test
pred_svrpoly_3 = np.rint(svrpoly_3.predict(X_test))
headers = list(np.arange(np.unique(pred_svrpoly_3).max()+1))

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_svrpoly_3, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers ,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_svrpoly_3), headers=headers ,showindex="always", tablefmt="github"))

Loaded
X= Pred, Y= True
|    |   0.0 |   1.0 |   2.0 |     3.0 |     4.0 |      5.0 |
|----|-------|-------|-------|---------|---------|----------|
|  0 |     0 |     0 |     0 | 0       | 25.4    | 0        |
|  1 |     0 |     0 |     0 | 0       | 25.4    | 0        |
|  2 |     0 |     0 |     0 | 0       | 25.4    | 0        |
|  3 |     0 |     0 |     0 | 3.76296 | 21.637  | 0        |
|  4 |     0 |     0 |     0 | 1.99775 | 23.1169 | 0.285393 |
|  5 |     0 |     0 |     0 | 0       |  0      | 0        |


|    |   0.0 |   1.0 |   2.0 |   3.0 |   4.0 |   5.0 |
|----|-------|-------|-------|-------|-------|-------|
|  0 |     0 |     0 |     0 |     0 |     4 |     0 |
|  1 |     0 |     0 |     0 |     0 |     1 |     0 |
|  2 |     0 |     0 |     0 |     0 |     6 |     0 |
|  3 |     0 |     0 |     0 |     4 |    23 |     0 |
|  4 |     0 |     0 |     0 |     7 |    81 |     1 |
|  5 |     0 |     0 |     0 |     0 |     0 |     0 |


In [99]:
if (os.path.exists('../Models/SVM/poly_4.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/SVM/poly_4.pkl', 'rb') as f:
        svrpoly_4 = pickle.load(f)
else:

    # Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
    svrpoly_4 = SVR(kernel="poly", degree=3)
    svrpoly_4.fit(X_train, y_train)

    with open('../Models/SVM/poly_4.pkl','wb') as f:
        pickle.dump(svrpoly_4,f)

# Test lin_reg on X_test
pred_svrpoly_4 = np.rint(svrpoly_4.predict(X_test))
headers = list(np.arange(np.unique(pred_svrpoly_4).max()+1))

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_svrpoly_4, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers ,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_svrpoly_4), headers=headers ,showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |   0.0 |   1.0 |   2.0 |     3.0 |     4.0 |      5.0 |
|----|-------|-------|-------|---------|---------|----------|
|  0 |     0 |     0 |     0 | 0       | 25.4    | 0        |
|  1 |     0 |     0 |     0 | 0       | 25.4    | 0        |
|  2 |     0 |     0 |     0 | 0       | 25.4    | 0        |
|  3 |     0 |     0 |     0 | 3.76296 | 21.637  | 0        |
|  4 |     0 |     0 |     0 | 1.99775 | 23.1169 | 0.285393 |
|  5 |     0 |     0 |     0 | 0       |  0      | 0        |


|    |   0.0 |   1.0 |   2.0 |   3.0 |   4.0 |   5.0 |
|----|-------|-------|-------|-------|-------|-------|
|  0 |     0 |     0 |     0 |     0 |     4 |     0 |
|  1 |     0 |     0 |     0 |     0 |     1 |     0 |
|  2 |     0 |     0 |     0 |     0 |     6 |     0 |
|  3 |     0 |     0 |     0 |     4 |    23 |     0 |
|  4 |     0 |     0 |     0 |     7 |    81 |     1 |
|  5 |     0 |     0 |     0 |     0 |     0 |     0 |


In [101]:
if (os.path.exists('../Models/SVM/sigmoid.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/SVM/sigmoid.pkl', 'rb') as f:
        sigmoid = pickle.load(f)
else:

    # Build a SVR (SVM Regression) model and train it on (X_train, y_train), kernel should be 'linear'
    sigmoid = SVR(kernel="sigmoid")
    sigmoid.fit(X_train, y_train)

    with open('../Models/SVM/sigmoid.pkl','wb') as f:
        pickle.dump(sigmoid,f)

# Test lin_reg on X_test
pred_sigmoid = np.rint(sigmoid.predict(X_test))
headers = list(np.arange(np.unique(pred_sigmoid).max()+1))

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_sigmoid, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers ,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_sigmoid), headers=headers ,showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |   0.0 |   1.0 |      2.0 |     3.0 |     4.0 |
|----|-------|-------|----------|---------|---------|
|  0 |     0 |     0 | 0        | 0       | 25.4    |
|  1 |     0 |     0 | 0        | 0       | 25.4    |
|  2 |     0 |     0 | 0        | 0       | 25.4    |
|  3 |     0 |     0 | 0        | 1.88148 | 23.5185 |
|  4 |     0 |     0 | 0.285393 | 1.42697 | 23.6876 |


|    |   0.0 |   1.0 |   2.0 |   3.0 |   4.0 |
|----|-------|-------|-------|-------|-------|
|  0 |     0 |     0 |     0 |     0 |     4 |
|  1 |     0 |     0 |     0 |     0 |     1 |
|  2 |     0 |     0 |     0 |     0 |     6 |
|  3 |     0 |     0 |     0 |     2 |    25 |
|  4 |     0 |     0 |     1 |     5 |    83 |


## XGBoost


these are all regesors, maybe try classification?


https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.XGBClassifier

In [103]:
%pip install xgboost

Collecting xgboost
  Downloading xgboost-2.0.3-py3-none-win_amd64.whl (99.8 MB)
     ---------------------------------------- 99.8/99.8 MB 1.9 MB/s eta 0:00:00
Installing collected packages: xgboost
Successfully installed xgboost-2.0.3
Note: you may need to restart the kernel to use updated packages.


In [104]:
from xgboost import XGBRegressor

In [105]:
if (os.path.exists('../Models/XGB/xgb_0.5.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/XGB/xgb_0.5.pkl', 'rb') as f:
        xgb_reg2 = pickle.load(f)
else:

    # Build a xgb regression model and train it on (X_train, y_train)
    xgb_reg2 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = .5)
    xgb_reg2.fit(X_train, y_train)

    with open('../Models/XGB/xgb_0.5.pkl','wb') as f:
        pickle.dump(xgb_reg2,f)

# Test lin_reg on X_test
pred_xgb_reg2 = np.rint(xgb_reg2.predict(X_test))
headers = list(np.arange(np.unique(pred_xgb_reg2).max()+1))

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_xgb_reg2, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers ,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_xgb_reg2), headers=headers ,showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |   0.0 |   1.0 |     2.0 |      3.0 |     4.0 |
|----|-------|-------|---------|----------|---------|
|  0 |     0 |     0 | 0       | 12.7     | 12.7    |
|  1 |     0 |     0 | 0       | 25.4     |  0      |
|  2 |     0 |     0 | 0       |  8.46667 | 16.9333 |
|  3 |     0 |     0 | 1.88148 | 10.3481  | 13.1704 |
|  4 |     0 |     0 | 1.14157 |  9.98876 | 14.2697 |


|    |   0.0 |   1.0 |   2.0 |   3.0 |   4.0 |
|----|-------|-------|-------|-------|-------|
|  0 |     0 |     0 |     0 |     2 |     2 |
|  1 |     0 |     0 |     0 |     1 |     0 |
|  2 |     0 |     0 |     0 |     2 |     4 |
|  3 |     0 |     0 |     2 |    11 |    14 |
|  4 |     0 |     0 |     4 |    35 |    50 |


In [106]:
if (os.path.exists('../Models/XGB/xgb_5.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/XGB/xgb_5.pkl', 'rb') as f:
        xgb_reg2 = pickle.load(f)
else:

    # Build a xgb regression model and train it on (X_train, y_train)
    xgb_reg2 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = 5)
    xgb_reg2.fit(X_train, y_train)

    with open('../Models/XGB/xgb_5.pkl','wb') as f:
        pickle.dump(xgb_reg2,f)

# Test lin_reg on X_test
pred_xgb_reg2 = np.rint(xgb_reg2.predict(X_test))
headers = list(np.arange(np.unique(pred_xgb_reg2).max()+1))

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_xgb_reg2, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers ,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_xgb_reg2), headers=headers ,showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |   0.0 |   1.0 |     2.0 |      3.0 |      4.0 |
|----|-------|-------|---------|----------|----------|
|  0 |     0 |     0 | 6.35    |  0       | 19.05    |
|  1 |     0 |     0 | 0       | 25.4     |  0       |
|  2 |     0 |     0 | 4.23333 | 12.7     |  8.46667 |
|  3 |     0 |     0 | 0       |  9.40741 | 15.9926  |
|  4 |     0 |     0 | 0.85618 |  7.42022 | 17.1236  |


|    |   0.0 |   1.0 |   2.0 |   3.0 |   4.0 |
|----|-------|-------|-------|-------|-------|
|  0 |     0 |     0 |     1 |     0 |     3 |
|  1 |     0 |     0 |     0 |     1 |     0 |
|  2 |     0 |     0 |     1 |     3 |     2 |
|  3 |     0 |     0 |     0 |    10 |    17 |
|  4 |     0 |     0 |     3 |    26 |    60 |


In [107]:
if (os.path.exists('../Models/XGB/xgb_30.pkl')):
    print("Loaded")
    # Load the random forest
    with open('../Models/XGB/xgb_30.pkl', 'rb') as f:
        xgb_reg2 = pickle.load(f)
else:

    # Build a xgb regression model and train it on (X_train, y_train)
    xgb_reg2 = XGBRegressor(objective ='reg:squarederror', scale_pos_weight = 30)
    xgb_reg2.fit(X_train, y_train)

    with open('../Models/XGB/xgb_30.pkl','wb') as f:
        pickle.dump(xgb_reg2,f)

# Test lin_reg on X_test
pred_xgb_reg2 = np.rint(xgb_reg2.predict(X_test))
headers = list(np.arange(np.unique(pred_xgb_reg2).max()+1))

print("X= Pred, Y= True")
print(tabulate(confusion_matrix(y_test, pred_xgb_reg2, sample_weight=compute_sample_weight('balanced', y_test)), headers=headers ,showindex="always", tablefmt="github"))
print("\n")
print(tabulate(confusion_matrix(y_test, pred_xgb_reg2), headers=headers ,showindex="always", tablefmt="github"))

X= Pred, Y= True
|    |   0.0 |   1.0 |      2.0 |      3.0 |     4.0 |
|----|-------|-------|----------|----------|---------|
|  0 |     0 |     0 | 0        |  6.35    | 19.05   |
|  1 |     0 |     0 | 0        |  0       | 25.4    |
|  2 |     0 |     0 | 0        | 12.7     | 12.7    |
|  3 |     0 |     0 | 0.940741 |  8.46667 | 15.9926 |
|  4 |     0 |     0 | 1.71236  |  6.84944 | 16.8382 |


|    |   0.0 |   1.0 |   2.0 |   3.0 |   4.0 |
|----|-------|-------|-------|-------|-------|
|  0 |     0 |     0 |     0 |     1 |     3 |
|  1 |     0 |     0 |     0 |     0 |     1 |
|  2 |     0 |     0 |     0 |     3 |     3 |
|  3 |     0 |     0 |     1 |     9 |    17 |
|  4 |     0 |     0 |     6 |    24 |    59 |
