In [37]:
# Support-Vector Machine (SVM)
#
# Created by ravissement 2021-12-28.
# SVM model using i-vector Features.

import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

# DataSet-2000 Set

In [38]:
path = "/data/dataset-2284/ivectors8k/"
files = os.listdir(path)

# All : 2000

# label ratio		0(1166)		1(834)
# train ratio train(8) : test(1) : valid(1)

# ---------------------

# 8			934		668


# 1			116		83


# 1			116		83

# X ratio Set
xtrain_ratio = 934
xtest_ratio = 1050
xvalid_ratio = 1166

# Y ratio Set
ytrain_ratio = 668
ytest_ratio = 751
yvalid_ratio = 834

X_train = []
X_test = []
y_train = []
y_test = []
X_valid = []
y_valid = []

pos0_trainCount = 0
pos0_testCount = 0
pos0_validCount = 0

pos1_trainCount = 0
pos1_testCount = 0
pos1_validCount = 0

ycount = 0
x1count = 0
xcount = 0
for file in files:
    ivt = np.load(path+file,allow_pickle=True)
    if(int(file[7]) == 0):
        x1count+=1
        if(x1count <= 1166):
            xcount+=1
            if(xcount <= xtrain_ratio):
                X_train.append(ivt)
                y_train.append(int(file[7]))
                pos0_trainCount += 1
            elif(xcount <= xtest_ratio):
                X_test.append(ivt)
                y_test.append(int(file[7]))
                pos0_testCount += 1
            elif(xcount <= xvalid_ratio):
                X_valid.append(ivt)
                y_valid.append(int(file[7]))
                pos0_validCount += 1
    else:
        ycount+=1
        if(ycount <= ytrain_ratio):
            X_train.append(ivt)
            y_train.append(int(file[7]))
            pos1_trainCount += 1
        elif(ycount <= ytest_ratio):
            X_test.append(ivt)
            y_test.append(int(file[7]))
            pos1_testCount += 1
        elif(ycount <= yvalid_ratio):
            X_valid.append(ivt)
            y_valid.append(int(file[7]))
            pos1_validCount += 1
        
print("0 : ",xcount, "1 : ",ycount , "\t", "total : ", xcount + ycount)
print("\n")
print("train Set : ", len(X_train), len(y_train)) 
print("test Set : ", len(X_test), len(y_test)) 
print("Valid Set : ", len(X_valid), len(y_valid))
print("\n")
print("train Label 0 : ", pos0_trainCount, "\t", "train Label 1 : ", pos1_trainCount)
print("test Label 0 : ", pos0_testCount, "\t", "test Label 1 : ", pos1_testCount)
print("valid Label 0 : ", pos0_validCount, "\t", "valid Label 1 : ", pos1_validCount)

0 :  1166 1 :  834 	 total :  2000


train Set :  1602 1602
test Set :  199 199
Valid Set :  199 199


train Label 0 :  934 	 train Label 1 :  668
test Label 0 :  116 	 test Label 1 :  83
valid Label 0 :  116 	 valid Label 1 :  83


In [39]:
# grid search

best_score = 0
values = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]

for g in values:

    for c in values:
        svc = SVC(gamma=g, C=c).fit(X_train, y_train)
        score = svc.score(X_valid, y_valid)

        if score > best_score:
            best_score = score
            best_param = {'C':c, 'gamma':g}


print('best score ==> {:.3f}'.format(best_score))
print('best parameter ==> {}'.format(best_param))

best score ==> 0.709
best parameter ==> {'C': 1, 'gamma': 0.001}


In [40]:
svc = SVC(gamma=best_param['gamma'], C=best_param['C']).fit(X_train, y_train)

In [41]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
y_pred = svc.predict(X_valid)
y_pred2 = svc.predict(X_test)
#print(y_pred)

In [42]:
from sklearn.metrics import confusion_matrix
print("Valid")
print(confusion_matrix(y_valid, y_pred))
print("\n")
print("Test")
print(confusion_matrix(y_test, y_pred2))

Valid
[[97 19]
 [39 44]]


Test
[[102  14]
 [ 66  17]]


In [43]:
from sklearn.metrics import  classification_report
print("Valid")
print(classification_report(y_valid, y_pred, digits=4))
print("\n")
print("Test")
print(classification_report(y_test, y_pred2, digits=4))

Valid
              precision    recall  f1-score   support

           0     0.7132    0.8362    0.7698       116
           1     0.6984    0.5301    0.6027        83

    accuracy                         0.7085       199
   macro avg     0.7058    0.6832    0.6863       199
weighted avg     0.7071    0.7085    0.7001       199



Test
              precision    recall  f1-score   support

           0     0.6071    0.8793    0.7183       116
           1     0.5484    0.2048    0.2982        83

    accuracy                         0.5980       199
   macro avg     0.5778    0.5421    0.5083       199
weighted avg     0.5826    0.5980    0.5431       199



In [46]:
#valid
print("VALID pos_label : 1")
print("f1 : ",f1_score(y_valid, y_pred))
print("recall : ",recall_score(y_valid, y_pred)) 
print("precision : ",precision_score(y_valid, y_pred))
print("\n")
#test
print("TEST pos_label : 1")
print("f1 : ",f1_score(y_test, y_pred2))
print("recall : ",recall_score(y_test, y_pred2)) 
print("precision : ",precision_score(y_test, y_pred2))

VALID pos_label : 1
f1 :  0.6027397260273972
recall :  0.5301204819277109
precision :  0.6984126984126984


TEST pos_label : 1
f1 :  0.2982456140350877
recall :  0.20481927710843373
precision :  0.5483870967741935


In [47]:
#valid
print("VALID pos_label : 0")
print("f1 : ",f1_score(y_valid, y_pred, pos_label=0))
print("recall : ",recall_score(y_valid, y_pred,  pos_label=0)) 
print("precision : ",precision_score(y_valid, y_pred,  pos_label=0))
print("\n")
#test
print("TEST pos_label : 0")
print("f1 : ",f1_score(y_test, y_pred2, pos_label=0))
print("recall : ",recall_score(y_test, y_pred2,  pos_label=0)) 
print("precision : ",precision_score(y_test, y_pred2,  pos_label=0))

VALID pos_label : 0
f1 :  0.7698412698412698
recall :  0.8362068965517241
precision :  0.7132352941176471


TEST pos_label : 0
f1 :  0.7183098591549295
recall :  0.8793103448275862
precision :  0.6071428571428571
