In [1]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn import metrics
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn import linear_model
from sklearn.feature_extraction.text import CountVectorizer

import pickle
import numpy as np # linear algebra
import pandas as pd

In [2]:
#read csv
dict_label = {
    'Datetime':0, 
    'Sentence':1, 
    'Custom Object': 2, 
    'URL': 3, 
    'Numbers': 4, 
    'List': 5}
data = pd.read_csv('data/needs_extraction_data/labelled_data.csv')

data['y_act'] = [dict_label[i] for i in data['y_act']]
y = data.loc[:,['y_act']]

In [3]:
data1 = data[['%_nans', 'mean_word_count', 'std_dev_word_count', 'has_delimiters']]
data1 = data1.fillna(0)

data1 = data1.rename(columns={'mean_word_count': 'scaled_mean_token_count', 'std_dev_word_count': 'scaled_std_dev_token_count', '%_nans': 'scaled_perc_nans'})
data1.loc[data1['scaled_mean_token_count'] > 10000, 'scaled_mean_token_count'] = 10000
data1.loc[data1['scaled_mean_token_count'] < -10000, 'scaled_mean_token_count'] = -10000
data1.loc[data1['scaled_std_dev_token_count'] > 10000, 'scaled_std_dev_token_count'] = 10000
data1.loc[data1['scaled_std_dev_token_count'] < -10000, 'scaled_std_dev_token_count'] = -10000
data1.loc[data1['scaled_perc_nans'] > 10000, 'scaled_perc_nans'] = 10000
data1.loc[data1['scaled_perc_nans'] < -10000, 'scaled_perc_nans'] = -10000
column_names_to_normalize = ['scaled_mean_token_count', 'scaled_std_dev_token_count','scaled_perc_nans']
x = data1[column_names_to_normalize].values
x = np.nan_to_num(x)
x_scaled = StandardScaler().fit_transform(x)
df_temp = pd.DataFrame(x_scaled, columns=column_names_to_normalize, index = data1.index)
data1[column_names_to_normalize] = df_temp

y.y_act = y.y_act.astype(float)

print(f"> Data mean: \n{data1.mean()}")
print(f"> Data median: \n{data1.median()}")
print(f"> Data stdev: \n{data1.std()}")

# data1.to_csv('before.csv')
# f = open('current.txt','w')
# d = enchant.Dict("en_US")

# for i in data.index:
#     ival = data.at[i,'Attribute_name']
#     if ival != 'id' and d.check(ivadf_tempdata1)
#         print(f,ival)
#         print(f,y.at[i,'y_act'])
#         data1.at[i,'dictionary_item'] = 1
#     else:
#         data1.at[i,'dictionary_item'] = 0

# data1.to_csv('after.csv')
# f.close()
# print(data1.columns)

> Data mean: 
scaled_perc_nans             -2.745801e-16
scaled_mean_token_count      -1.117919e-16
scaled_std_dev_token_count   -2.236863e-17
has_delimiters                3.105360e-01
dtype: float64
> Data median: 
scaled_perc_nans             -0.653046
scaled_mean_token_count      -0.144106
scaled_std_dev_token_count   -0.171320
has_delimiters                0.000000
dtype: float64
> Data stdev: 
scaled_perc_nans              1.000925
scaled_mean_token_count       1.000925
scaled_std_dev_token_count    1.000925
has_delimiters                0.463141
dtype: float64


In [4]:
print("===[VECTORIZATION]===")
arr = data['Attribute_name'].values
data = data.fillna(0)
arr1 = data['sample_1'].values
arr1 = [str(x) for x in arr1]
arr2 = data['sample_2'].values
arr2 = [str(x) for x in arr2]

vectorizer = CountVectorizer(ngram_range=(3, 3), analyzer='char')
X = vectorizer.fit_transform(arr)
X1 = vectorizer.fit_transform(arr1)
X2 = vectorizer.fit_transform(arr2)

print(f"> Length of vectorized feature_names: {len(vectorizer.get_feature_names())}")

data1.to_csv('data/preprocessing/before.csv')
attr_df = pd.DataFrame(X.toarray())
sample1_df = pd.DataFrame(X1.toarray())
sample2_df = pd.DataFrame(X2.toarray())

data2 = pd.concat([data1, attr_df, sample1_df, sample2_df], axis=1, sort=False)
data2.to_csv('data/preprocessing/after.csv')
data2.head()

X_train, X_test, y_train, y_test = train_test_split(
    data2, y, test_size=0.2, random_state=100)
atr_train,atr_test = train_test_split(data2, test_size=0.2,random_state=100)

# X_train_train, X_test_train,y_train_train,y_test_train = train_test_split(X_train,y_train, test_size=0.25)
# print(X_train.head())
# print(y_train.head())

X_train_new = X_train.reset_index(drop=True)
y_train_new = y_train.reset_index(drop=True)
print(f"X_train preview: {X_train.head()}")
print(f"y_train preview: {y_train.head()}")

X_train_new = X_train_new.values
y_train_new = y_train_new.values

===[VECTORIZATION]===
> Length of vectorized feature_names: 8528
X_train preview:      scaled_perc_nans  scaled_mean_token_count  scaled_std_dev_token_count  \
453         -0.653097                 0.686283                    3.364514   
43          -0.653120                 0.162079                   -0.054513   
133          1.978459                -0.148544                   -0.167108   
205         -0.653120                -0.141062                   -0.175870   
282         -0.653120                -0.148960                   -0.175870   

     has_delimiters  0  1  2  3  4  5  ...   8518  8519  8520  8521  8522  \
453            True  0  0  0  0  0  0  ...      0     0     0     0     0   
43             True  0  0  0  0  0  0  ...      0     0     0     0     0   
133            True  0  0  0  0  0  0  ...      0     0     0     0     0   
205           False  0  0  0  0  0  0  ...      0     0     0     0     0   
282           False  0  0  0  0  0  0  ...      0     0     0   

In [5]:
try:
    acc_df = pd.read_csv('data/model_data.csv')
    index = len(acc_df)
except FileNotFoundError:
    acc_df = pd.DataFrame(columns=['Model', 'Params', 'Feats', 'Train', 'Validation', 'Test', 'Precision'])
    index = 0

In [None]:
k = 5
kf = KFold(n_splits=k)
avg_train_acc, avg_test_acc = 0, 0

cvals = [0.1, 1, 10, 100, 1000]
gamavals = [0.0001, 0.001, 0.01, 0.1, 1, 10]


avgsc_lst, avgsc_train_lst, avgsc_hld_lst = [], [], []
avgsc, avgsc_train, avgsc_hld = 0, 0, 0

best_param_count = {'C': {}, 'gamma': {}}
for train_index, test_index in kf.split(X_train_new):
    X_train_cur, X_test_cur = X_train_new[train_index], X_train_new[test_index]
    y_train_cur, y_test_cur = y_train_new[train_index], y_train_new[test_index]
    X_train_train, X_val, y_train_train, y_val = train_test_split(
        X_train_cur, y_train_cur, test_size=0.25, random_state=100)

    bestPerformingModel = svm.SVC(
        C=100, decision_function_shape="ovo", gamma=0.001, probability=True)
    bestscore = 0
    print('='*10)
    for cval in cvals:
        for gval in gamavals:
            clf = svm.SVC(C=cval, decision_function_shape="ovo", gamma=gval, probability=True)
            clf.fit(X_train_train, y_train_train)
            sc = clf.score(X_val, y_val)
            print(f"[C: {cval}, gamma: {gval}, accuracy: {sc}]")
            if bestscore < sc:
                bestcval = cval
                bestgval = gval
                bestscore = sc
                bestPerformingModel = clf

    if str(bestcval) in best_param_count['C']:
        best_param_count['C'][str(bestcval)] += 1
    else:
        best_param_count['C'][str(bestcval)] = 1
        
    if str(bestgval) in best_param_count['gamma']:
        best_param_count['gamma'][str(bestgval)] += 1
    else:
        best_param_count['gamma'][str(bestgval)] = 1
        
    bscr_train = bestPerformingModel.score(X_train_cur, y_train_cur)
    bscr = bestPerformingModel.score(X_test_cur, y_test_cur)
    bscr_hld = bestPerformingModel.score(X_test, y_test)

    avgsc_train_lst.append(bscr_train)
    avgsc_lst.append(bscr)
    avgsc_hld_lst.append(bscr_hld)

    avgsc_train = avgsc_train + bscr_train
    avgsc = avgsc + bscr
    avgsc_hld = avgsc_hld + bscr_hld
    print()
    print(f"> Best C: {bestcval} || Best gamma: {bestgval}")
    print(f"> Best training score: {bscr_train}")
    print(f"> Best test score: {bscr}")
    print(f"> Best held score: {bscr_hld}")
    print('='*10)



  y = column_or_1d(y, warn=True)


[C: 0.1, gamma: 0.0001, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 0.001, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 0.01, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 0.1, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 1, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 10, accuracy: 0.4367816091954023]
[C: 1, gamma: 0.0001, accuracy: 0.45977011494252873]
[C: 1, gamma: 0.001, accuracy: 0.47126436781609193]
[C: 1, gamma: 0.01, accuracy: 0.7701149425287356]
[C: 1, gamma: 0.1, accuracy: 0.5287356321839081]
[C: 1, gamma: 1, accuracy: 0.4367816091954023]
[C: 1, gamma: 10, accuracy: 0.4367816091954023]
[C: 10, gamma: 0.0001, accuracy: 0.47126436781609193]
[C: 10, gamma: 0.001, accuracy: 0.7471264367816092]
[C: 10, gamma: 0.01, accuracy: 0.8160919540229885]
[C: 10, gamma: 0.1, accuracy: 0.5287356321839081]
[C: 10, gamma: 1, accuracy: 0.4367816091954023]
[C: 10, gamma: 10, accuracy: 0.4367816091954023]
[C: 100, gamma: 0.0001, accuracy: 0.7471264367816092]
[C: 100, gamma: 0.001, accuracy: 0

In [7]:
y_pred = bestPerformingModel.predict(X_test)
prec = metrics.precision_score(y_test, y_pred, average=None)
cat_prec = {
    'Datetime': prec[0],
    'Sentence': prec[1],
    'Custom Object': prec[2],
    'URL': prec[3],
    'Numbers': prec[4],
    'List': prec[5],
}

  'precision', 'predicted', average, warn_for)


In [8]:
bestcval = max(best_param_count['C'], key=lambda i: best_param_count['C'][i])
bestgval = max(best_param_count['gamma'], key=lambda i: best_param_count['gamma'][i])
bestparams = {'C': bestcval, 'gamma': bestgval}
print(f"> Best n_estimator : {bestcval} || Best max_depth : {bestgval}")
print(f"> Average training score list: {avgsc_train_lst}")
print(f"> Average testing score list: {avgsc_lst}")
print(f"> Average held score list: {avgsc_hld_lst}")
print()
avgsc_train = avgsc_train/k
avgsc = avgsc/k
avgsc_hld = avgsc_hld/k
print(f"> Average training score list: {avgsc_train}")
print(f"> Average testing score list: {avgsc}")
print(f"> Average held score list: {avgsc_hld}")
acc_df.loc[index] = ['rbf_svm', str(bestparams),"X_stats, X_name, X_sample1, X_sample2", avgsc_train, avgsc, avgsc_hld, str(cat_prec)]
index += 1
print()

y_pred = bestPerformingModel.predict(X_test)
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
print('Confusion Matrix: Actual (Row) vs Predicted (Column)')
print(cnf_matrix)

> Best n_estimator : 10 || Best max_depth : 0.01
> Average training score list: [0.9623188405797102, 0.9420289855072463, 0.9364161849710982, 0.9479768786127167, 0.9479768786127167]
> Average testing score list: [0.7701149425287356, 0.7931034482758621, 0.7906976744186046, 0.7906976744186046, 0.8023255813953488]
> Average held score list: [0.8165137614678899, 0.8532110091743119, 0.8532110091743119, 0.8532110091743119, 0.8165137614678899]

> Average training score list: 0.9473435536566976
> Average testing score list: 0.7893878642074312
> Average held score list: 0.8385321100917432

Confusion Matrix: Actual (Row) vs Predicted (Column)
[[24  0  3  0  0  0]
 [ 0 15  7  0  0  0]
 [ 0  4 48  0  0  0]
 [ 0  0  0  2  0  0]
 [ 0  0  1  0  0  0]
 [ 0  2  3  0  0  0]]


In [9]:
# save the model to disk
filename = 'data/pretrained/svm_finalized_model.pickle'
pickle.dump(bestPerformingModel, open(filename, 'wb'))

# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, y_test)
y_prob = bestPerformingModel.predict_proba(X_test)

df = pd.DataFrame.from_records(y_prob)
print(df)
df.to_csv('data/model_predictions/svm_predictions.csv', index=False)

            0         1         2         3         4         5
0    0.075341  0.197462  0.569570  0.019212  0.053135  0.085279
1    0.000665  0.433059  0.062211  0.455819  0.000522  0.047724
2    0.973558  0.009671  0.003656  0.004775  0.003333  0.005007
3    0.957586  0.011027  0.011226  0.005045  0.008624  0.006492
4    0.015018  0.235220  0.569255  0.023989  0.007546  0.148972
5    0.218382  0.066021  0.688526  0.004707  0.011074  0.011290
6    0.022391  0.038373  0.889596  0.004936  0.030617  0.014087
7    0.964575  0.013729  0.006302  0.005389  0.003805  0.006200
8    0.705144  0.067835  0.061431  0.015548  0.077598  0.072444
9    0.013207  0.592731  0.279037  0.016983  0.008525  0.089516
10   0.186476  0.072395  0.545799  0.013224  0.066994  0.115113
11   0.004370  0.772350  0.126692  0.014399  0.002782  0.079407
12   0.994043  0.001324  0.000507  0.002901  0.000450  0.000774
13   0.958287  0.011069  0.008303  0.006499  0.006888  0.008955
14   0.114588  0.140891  0.541398  0.017

In [10]:
def test_feat_combos(index):
    combos = {
        "X_stats": data1,
        "X_name": attr_df,
        "X_stats, X_name": pd.concat([data1, attr_df], axis=1, sort=False),
        "X_sample1":  pd.concat([sample1_df], axis=1, sort=False),
        "X_name, X_sample1":  pd.concat([attr_df, sample1_df], axis=1, sort=False),
        "X_stats, X_sample1":  pd.concat([data1, sample1_df], axis=1, sort=False),
        "X_stats, X_name, X_sample1":  pd.concat([data1, attr_df, sample1_df], axis=1, sort=False)
    }
    

    for combo in combos:
        print("="*50, combo, "="*50)
        X_train, X_test, y_train, y_test = train_test_split(
            combos[combo], y, test_size=0.2, random_state=100)

        X_train_new = X_train.reset_index(drop=True)
        y_train_new = y_train.reset_index(drop=True)
        X_train_new = X_train_new.values
        y_train_new = y_train_new.values
        k = 5
        kf = KFold(n_splits=k)
        avg_train_acc, avg_test_acc = 0, 0

        cvals = [0.1, 1, 10, 100, 1000]
        gamavals = [0.0001, 0.001, 0.01, 0.1, 1, 10]


        avgsc_lst, avgsc_train_lst, avgsc_hld_lst = [], [], []
        avgsc, avgsc_train, avgsc_hld = 0, 0, 0

        best_param_count = {'C': {}, 'gamma': {}}
        for train_index, test_index in kf.split(X_train_new):
            X_train_cur, X_test_cur = X_train_new[train_index], X_train_new[test_index]
            y_train_cur, y_test_cur = y_train_new[train_index], y_train_new[test_index]
            X_train_train, X_val, y_train_train, y_val = train_test_split(
                X_train_cur, y_train_cur, test_size=0.25, random_state=100)

            bestPerformingModel = svm.SVC(
                C=100, decision_function_shape="ovo", gamma=0.001, probability=True)
            bestscore = 0
            print('-'*10)
            for cval in cvals:
                for gval in gamavals:
                    clf = svm.SVC(C=cval, decision_function_shape="ovo", gamma=gval, probability=True)
                    clf.fit(X_train_train, y_train_train)
                    sc = clf.score(X_val, y_val)
                    print(f"[C: {cval}, gamma: {gval}, accuracy: {sc}]")
                    if bestscore < sc:
                        bestcval = cval
                        bestgval = gval
                        bestscore = sc
                        bestPerformingModel = clf

            if str(bestcval) in best_param_count['C']:
                best_param_count['C'][str(bestcval)] += 1
            else:
                best_param_count['C'][str(bestcval)] = 1

            if str(bestgval) in best_param_count['gamma']:
                best_param_count['gamma'][str(bestgval)] += 1
            else:
                best_param_count['gamma'][str(bestgval)] = 1

            bscr_train = bestPerformingModel.score(X_train_cur, y_train_cur)
            bscr = bestPerformingModel.score(X_test_cur, y_test_cur)
            bscr_hld = bestPerformingModel.score(X_test, y_test)

            avgsc_train_lst.append(bscr_train)
            avgsc_lst.append(bscr)
            avgsc_hld_lst.append(bscr_hld)

            avgsc_train = avgsc_train + bscr_train
            avgsc = avgsc + bscr
            avgsc_hld = avgsc_hld + bscr_hld
            print()
            print(f"\t> Best C: {bestcval} || Best gamma: {bestgval}")
            print(f"\t> Best training score: {bscr_train}")
            print(f"\t> Best test score: {bscr}")
            print(f"\t> Best held score: {bscr_hld}")
        print('\t', '-'*10)
        
        y_pred = bestPerformingModel.predict(X_test)
        prec = metrics.precision_score(y_test, y_pred, average=None)
        cat_prec = {
            'Datetime': prec[0],
            'Sentence': prec[1],
            'Custom Object': prec[2],
            'URL': prec[3],
            'Numbers': prec[4],
            'List': prec[5],
        }    
        bestcval = max(best_param_count['C'], key=lambda i: best_param_count['C'][i])
        bestgval = max(best_param_count['gamma'], key=lambda i: best_param_count['gamma'][i])
        bestparams = {'C': bestcval, 'gamma': bestgval}
        print(f"\t> Best n_estimator : {bestcval} || Best max_depth : {bestgval}")
        print(f"\t> Average training score list: {avgsc_train_lst}")
        print(f"\t> Average testing score list: {avgsc_lst}")
        print(f"\t> Average held score list: {avgsc_hld_lst}")
        print()
        avgsc_train = avgsc_train/k
        avgsc = avgsc/k
        avgsc_hld = avgsc_hld/k
        print(f"\t> Average training score list: {avgsc_train}")
        print(f"\t> Average testing score list: {avgsc}")
        print(f"\t> Average held score list: {avgsc_hld}")
        acc_df.loc[index] = ['rbf_svm', str(bestparams), combo, avgsc_train, avgsc, avgsc_hld, str(cat_prec)]
        index += 1
        print()

        y_pred = bestPerformingModel.predict(X_test)
        cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
        print('\tConfusion Matrix: Actual (Row) vs Predicted (Column)')
        print('\t', cnf_matrix)

In [11]:
test_feat_combos(index)
acc_df.to_csv('data/model_data.csv', index=False)

----------
[C: 0.1, gamma: 0.0001, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 0.001, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 0.01, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 0.1, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 1, accuracy: 0.5057471264367817]
[C: 0.1, gamma: 10, accuracy: 0.5172413793103449]
[C: 1, gamma: 0.0001, accuracy: 0.4367816091954023]
[C: 1, gamma: 0.001, accuracy: 0.4482758620689655]
[C: 1, gamma: 0.01, accuracy: 0.45977011494252873]
[C: 1, gamma: 0.1, accuracy: 0.5402298850574713]
[C: 1, gamma: 1, accuracy: 0.5287356321839081]
[C: 1, gamma: 10, accuracy: 0.5402298850574713]
[C: 10, gamma: 0.0001, accuracy: 0.4482758620689655]


  y = column_or_1d(y, warn=True)


[C: 10, gamma: 0.001, accuracy: 0.45977011494252873]
[C: 10, gamma: 0.01, accuracy: 0.5402298850574713]
[C: 10, gamma: 0.1, accuracy: 0.5287356321839081]
[C: 10, gamma: 1, accuracy: 0.5632183908045977]
[C: 10, gamma: 10, accuracy: 0.5977011494252874]
[C: 100, gamma: 0.0001, accuracy: 0.45977011494252873]
[C: 100, gamma: 0.001, accuracy: 0.5402298850574713]
[C: 100, gamma: 0.01, accuracy: 0.5402298850574713]
[C: 100, gamma: 0.1, accuracy: 0.5632183908045977]
[C: 100, gamma: 1, accuracy: 0.6551724137931034]
[C: 100, gamma: 10, accuracy: 0.7241379310344828]
[C: 1000, gamma: 0.0001, accuracy: 0.5287356321839081]
[C: 1000, gamma: 0.001, accuracy: 0.5402298850574713]
[C: 1000, gamma: 0.01, accuracy: 0.5517241379310345]
[C: 1000, gamma: 0.1, accuracy: 0.7011494252873564]
[C: 1000, gamma: 1, accuracy: 0.735632183908046]
[C: 1000, gamma: 10, accuracy: 0.7241379310344828]

	> Best C: 1000 || Best gamma: 1
	> Best training score: 0.7710144927536232
	> Best test score: 0.6091954022988506
	> Best h

  'precision', 'predicted', average, warn_for)


----------
[C: 0.1, gamma: 0.0001, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 0.001, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 0.01, accuracy: 0.4367816091954023]
[C: 0.1, gamma: 0.1, accuracy: 0.47126436781609193]
[C: 0.1, gamma: 1, accuracy: 0.45977011494252873]
[C: 0.1, gamma: 10, accuracy: 0.45977011494252873]
[C: 1, gamma: 0.0001, accuracy: 0.4367816091954023]
[C: 1, gamma: 0.001, accuracy: 0.4367816091954023]
[C: 1, gamma: 0.01, accuracy: 0.6666666666666666]
[C: 1, gamma: 0.1, accuracy: 0.7471264367816092]
[C: 1, gamma: 1, accuracy: 0.6436781609195402]
[C: 1, gamma: 10, accuracy: 0.6436781609195402]
[C: 10, gamma: 0.0001, accuracy: 0.4367816091954023]
[C: 10, gamma: 0.001, accuracy: 0.6896551724137931]
[C: 10, gamma: 0.01, accuracy: 0.7701149425287356]
[C: 10, gamma: 0.1, accuracy: 0.7586206896551724]
[C: 10, gamma: 1, accuracy: 0.6436781609195402]
[C: 10, gamma: 10, accuracy: 0.6436781609195402]
[C: 100, gamma: 0.0001, accuracy: 0.6896551724137931]
[C: 100, gamma: 0.001, 

[C: 1000, gamma: 0.1, accuracy: 0.7471264367816092]
[C: 1000, gamma: 1, accuracy: 0.6896551724137931]
[C: 1000, gamma: 10, accuracy: 0.6896551724137931]

	> Best C: 10 || Best gamma: 0.01
	> Best training score: 0.9104046242774566
	> Best test score: 0.7441860465116279
	> Best held score: 0.7981651376146789
	 ----------
	> Best n_estimator : 100 || Best max_depth : 0.01
	> Average training score list: [0.8724637681159421, 0.9246376811594202, 0.9277456647398844, 0.930635838150289, 0.9104046242774566]
	> Average testing score list: [0.8390804597701149, 0.7701149425287356, 0.8023255813953488, 0.7906976744186046, 0.7441860465116279]
	> Average held score list: [0.8073394495412844, 0.7798165137614679, 0.7798165137614679, 0.7889908256880734, 0.7981651376146789]

	> Average training score list: 0.9131775152885986
	> Average testing score list: 0.7892809409248864
	> Average held score list: 0.7908256880733945

	Confusion Matrix: Actual (Row) vs Predicted (Column)
	 [[24  0  3  0  0  0]
 [ 2 13

[C: 0.1, gamma: 1, accuracy: 0.5287356321839081]
[C: 0.1, gamma: 10, accuracy: 0.5287356321839081]
[C: 1, gamma: 0.0001, accuracy: 0.5287356321839081]
[C: 1, gamma: 0.001, accuracy: 0.5402298850574713]
[C: 1, gamma: 0.01, accuracy: 0.7126436781609196]
[C: 1, gamma: 0.1, accuracy: 0.735632183908046]
[C: 1, gamma: 1, accuracy: 0.6781609195402298]
[C: 1, gamma: 10, accuracy: 0.6781609195402298]
[C: 10, gamma: 0.0001, accuracy: 0.5402298850574713]
[C: 10, gamma: 0.001, accuracy: 0.7011494252873564]
[C: 10, gamma: 0.01, accuracy: 0.8160919540229885]
[C: 10, gamma: 0.1, accuracy: 0.7701149425287356]
[C: 10, gamma: 1, accuracy: 0.6781609195402298]
[C: 10, gamma: 10, accuracy: 0.6781609195402298]
[C: 100, gamma: 0.0001, accuracy: 0.7011494252873564]
[C: 100, gamma: 0.001, accuracy: 0.8160919540229885]
[C: 100, gamma: 0.01, accuracy: 0.8045977011494253]
[C: 100, gamma: 0.1, accuracy: 0.7586206896551724]
[C: 100, gamma: 1, accuracy: 0.6781609195402298]
[C: 100, gamma: 10, accuracy: 0.68965517241

[C: 10, gamma: 0.01, accuracy: 0.735632183908046]
[C: 10, gamma: 0.1, accuracy: 0.632183908045977]
[C: 10, gamma: 1, accuracy: 0.39080459770114945]
[C: 10, gamma: 10, accuracy: 0.4942528735632184]
[C: 100, gamma: 0.0001, accuracy: 0.6206896551724138]
[C: 100, gamma: 0.001, accuracy: 0.7241379310344828]
[C: 100, gamma: 0.01, accuracy: 0.735632183908046]
[C: 100, gamma: 0.1, accuracy: 0.632183908045977]
[C: 100, gamma: 1, accuracy: 0.39080459770114945]
[C: 100, gamma: 10, accuracy: 0.4942528735632184]
[C: 1000, gamma: 0.0001, accuracy: 0.7241379310344828]
[C: 1000, gamma: 0.001, accuracy: 0.735632183908046]
[C: 1000, gamma: 0.01, accuracy: 0.735632183908046]
[C: 1000, gamma: 0.1, accuracy: 0.632183908045977]
[C: 1000, gamma: 1, accuracy: 0.39080459770114945]
[C: 1000, gamma: 10, accuracy: 0.4942528735632184]

	> Best C: 10 || Best gamma: 0.01
	> Best training score: 0.8497109826589595
	> Best test score: 0.7093023255813954
	> Best held score: 0.7981651376146789
----------
[C: 0.1, gamma:

[C: 1000, gamma: 0.0001, accuracy: 0.735632183908046]
[C: 1000, gamma: 0.001, accuracy: 0.7241379310344828]
[C: 1000, gamma: 0.01, accuracy: 0.7471264367816092]
[C: 1000, gamma: 0.1, accuracy: 0.5977011494252874]
[C: 1000, gamma: 1, accuracy: 0.4482758620689655]
[C: 1000, gamma: 10, accuracy: 0.4482758620689655]

	> Best C: 100 || Best gamma: 0.001
	> Best training score: 0.9277456647398844
	> Best test score: 0.813953488372093
	> Best held score: 0.8256880733944955
----------
[C: 0.1, gamma: 0.0001, accuracy: 0.4827586206896552]
[C: 0.1, gamma: 0.001, accuracy: 0.4827586206896552]
[C: 0.1, gamma: 0.01, accuracy: 0.4827586206896552]
[C: 0.1, gamma: 0.1, accuracy: 0.4827586206896552]
[C: 0.1, gamma: 1, accuracy: 0.4827586206896552]
[C: 0.1, gamma: 10, accuracy: 0.4827586206896552]
[C: 1, gamma: 0.0001, accuracy: 0.5172413793103449]
[C: 1, gamma: 0.001, accuracy: 0.5172413793103449]
[C: 1, gamma: 0.01, accuracy: 0.735632183908046]
[C: 1, gamma: 0.1, accuracy: 0.6206896551724138]
[C: 1, g

----------
[C: 0.1, gamma: 0.0001, accuracy: 0.4482758620689655]
[C: 0.1, gamma: 0.001, accuracy: 0.4482758620689655]
[C: 0.1, gamma: 0.01, accuracy: 0.4482758620689655]
[C: 0.1, gamma: 0.1, accuracy: 0.4482758620689655]
[C: 0.1, gamma: 1, accuracy: 0.4482758620689655]
[C: 0.1, gamma: 10, accuracy: 0.4482758620689655]
[C: 1, gamma: 0.0001, accuracy: 0.45977011494252873]
[C: 1, gamma: 0.001, accuracy: 0.45977011494252873]
[C: 1, gamma: 0.01, accuracy: 0.6781609195402298]
[C: 1, gamma: 0.1, accuracy: 0.632183908045977]
[C: 1, gamma: 1, accuracy: 0.4942528735632184]
[C: 1, gamma: 10, accuracy: 0.47126436781609193]
[C: 10, gamma: 0.0001, accuracy: 0.47126436781609193]
[C: 10, gamma: 0.001, accuracy: 0.6551724137931034]
[C: 10, gamma: 0.01, accuracy: 0.7126436781609196]
[C: 10, gamma: 0.1, accuracy: 0.6436781609195402]
[C: 10, gamma: 1, accuracy: 0.4827586206896552]
[C: 10, gamma: 10, accuracy: 0.47126436781609193]
[C: 100, gamma: 0.0001, accuracy: 0.632183908045977]
[C: 100, gamma: 0.001, 

[C: 1, gamma: 1, accuracy: 0.41379310344827586]
[C: 1, gamma: 10, accuracy: 0.41379310344827586]
[C: 10, gamma: 0.0001, accuracy: 0.45977011494252873]
[C: 10, gamma: 0.001, accuracy: 0.6551724137931034]
[C: 10, gamma: 0.01, accuracy: 0.7701149425287356]
[C: 10, gamma: 0.1, accuracy: 0.5632183908045977]
[C: 10, gamma: 1, accuracy: 0.41379310344827586]
[C: 10, gamma: 10, accuracy: 0.41379310344827586]
[C: 100, gamma: 0.0001, accuracy: 0.6666666666666666]
[C: 100, gamma: 0.001, accuracy: 0.7471264367816092]
[C: 100, gamma: 0.01, accuracy: 0.7471264367816092]
[C: 100, gamma: 0.1, accuracy: 0.5632183908045977]
[C: 100, gamma: 1, accuracy: 0.41379310344827586]
[C: 100, gamma: 10, accuracy: 0.41379310344827586]
[C: 1000, gamma: 0.0001, accuracy: 0.7471264367816092]
[C: 1000, gamma: 0.001, accuracy: 0.7241379310344828]
[C: 1000, gamma: 0.01, accuracy: 0.7471264367816092]
[C: 1000, gamma: 0.1, accuracy: 0.5517241379310345]
[C: 1000, gamma: 1, accuracy: 0.41379310344827586]
[C: 1000, gamma: 10, 