In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
import pandas as pd
from sklearn.metrics import  confusion_matrix
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.neural_network import MLPClassifier


In [15]:
def report_classifer(y_test,predicts):
    cm=confusion_matrix(y_test,predicts)
    print(cm)
    print(classification_report(y_test, predicts))


In [16]:
def read_data_set(csv_vectors):
    vdf=pd.read_csv(csv_vectors,header=None)
    features=vdf.iloc[:,0:-1]
    labels=vdf.iloc[:,-1]
    return features,labels

In [17]:
def run_svm_with_cross_val(path, filename, vec_type = 'i'):
  features,labels=read_data_set(path+vec_type+filename)
  svc = SVC(gamma='auto',random_state=42)
  y_pred = cross_val_predict(svc, features, labels, cv=5,verbose=1)
  print("-----------------------------")
  print(f"Report Classifier: {vec_type}-vectors/SVM with cross validation")
  report_classifer(labels, y_pred)


In [18]:
def run_svm(path, filename, vec_type = 'i'):
  features,labels=read_data_set(path+vec_type+filename)
  X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3,random_state=42)
  #Create a svm Classifier
  clf = SVC(kernel='linear') 
  #Train the model using the training sets
  clf.fit(X_train, y_train)
  #Predict the response for test dataset
  y_pred = clf.predict(X_test)
  # Model Accuracy
  print("Accuracy on normal SVM without cross validation:",metrics.accuracy_score(y_test, y_pred))
  print(f"Report Classifier: {vec_type}-vectors/SVM without cross validation")
  print(classification_report(y_test, y_pred))


In [25]:
def run_mlp(path, filename, vec_type = 'i'):
  features,labels=read_data_set(path+vec_type+filename)
  X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3,random_state=42)
  mlp = MLPClassifier(hidden_layer_sizes=(8,8,8), activation='relu', solver='adam', max_iter=500)
  mlp.fit(X_train,y_train)
  predict_train = mlp.predict(X_train)
  predict_test = mlp.predict(X_test)
  # Model Accuracy
  print("Accuracy on MLP:",metrics.accuracy_score(y_test, predict_test))
  print(f"Report Classifier: {vec_type}-vectors/MLP")
  print(classification_report(y_test, predict_test))



In [19]:
path = '/content/drive/MyDrive/xivectors/'
filename = 'vectors_ArchiMob_5000into5.csv' # remove the i or x at the beginning 

In [20]:
# training normal svm 
run_svm(path, filename, 'i')

Accuracy on normal SVM without cross validation: 0.8570666666666666
Report Classifier: i-vectors/SVM without cross validation
              precision    recall  f1-score   support

          AG       0.82      0.87      0.84      1513
          BE       0.87      0.89      0.88      1482
          BS       0.87      0.86      0.87      1507
          LU       0.88      0.86      0.87      1507
          ZH       0.84      0.81      0.82      1491

    accuracy                           0.86      7500
   macro avg       0.86      0.86      0.86      7500
weighted avg       0.86      0.86      0.86      7500



In [21]:
# training svm with cross validation to test the data leakage into the test set
run_svm_with_cross_val(path, filename, 'i')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  4.1min finished


-----------------------------
Report Classifier: i-vectors/SVM with cross validation
[[4292  146  229  151  182]
 [ 174 4542   90   91  103]
 [ 241   92 4329   60  278]
 [ 218  104   64 4301  313]
 [ 378  190  359  227 3846]]
              precision    recall  f1-score   support

          AG       0.81      0.86      0.83      5000
          BE       0.90      0.91      0.90      5000
          BS       0.85      0.87      0.86      5000
          LU       0.89      0.86      0.88      5000
          ZH       0.81      0.77      0.79      5000

    accuracy                           0.85     25000
   macro avg       0.85      0.85      0.85     25000
weighted avg       0.85      0.85      0.85     25000



In [27]:
run_mlp(path, filename, 'i')

Accuracy on MLP: 0.8468
Report Classifier: i-vectors/MLP
              precision    recall  f1-score   support

          AG       0.82      0.83      0.82      1513
          BE       0.87      0.89      0.88      1482
          BS       0.86      0.83      0.85      1507
          LU       0.86      0.87      0.86      1507
          ZH       0.83      0.81      0.82      1491

    accuracy                           0.85      7500
   macro avg       0.85      0.85      0.85      7500
weighted avg       0.85      0.85      0.85      7500



In [22]:
# training normal svm 
run_svm(path, filename, 'x')

Accuracy on normal SVM without cross validation: 0.9670666666666666
Report Classifier: x-vectors/SVM without cross validation
              precision    recall  f1-score   support

          AG       0.95      0.97      0.96      1513
          BE       0.97      0.98      0.98      1482
          BS       0.98      0.97      0.97      1507
          LU       0.98      0.96      0.97      1507
          ZH       0.96      0.96      0.96      1491

    accuracy                           0.97      7500
   macro avg       0.97      0.97      0.97      7500
weighted avg       0.97      0.97      0.97      7500



In [23]:
# training svm with cross validation to test the data leakage into the test set
run_svm_with_cross_val(path, filename, 'x')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  4.0min finished


-----------------------------
Report Classifier: x-vectors/SVM with cross validation
[[4959   12    6   16    7]
 [  60 4881    4   28   27]
 [  62    2 4901    8   27]
 [ 132   23   21 4400  424]
 [ 192  139   67  101 4501]]
              precision    recall  f1-score   support

          AG       0.92      0.99      0.95      5000
          BE       0.97      0.98      0.97      5000
          BS       0.98      0.98      0.98      5000
          LU       0.97      0.88      0.92      5000
          ZH       0.90      0.90      0.90      5000

    accuracy                           0.95     25000
   macro avg       0.95      0.95      0.95     25000
weighted avg       0.95      0.95      0.95     25000



In [28]:
run_mlp(path, filename, 'x')

Accuracy on MLP: 0.9734666666666667
Report Classifier: x-vectors/MLP
              precision    recall  f1-score   support

          AG       0.97      0.97      0.97      1513
          BE       0.97      0.98      0.98      1482
          BS       0.98      0.98      0.98      1507
          LU       0.97      0.97      0.97      1507
          ZH       0.97      0.96      0.97      1491

    accuracy                           0.97      7500
   macro avg       0.97      0.97      0.97      7500
weighted avg       0.97      0.97      0.97      7500



---------------------