In [1]:
#Importing the required libraries to build the SVM Machine learning model
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

In [3]:
#Load the input data
data = np.load('./data/data_pca_target_50.npz')
data.allow_pickle = True

In [4]:
#input data with 50 pca components
X = data['arr_0']  
# target data
y = data['arr_1']

In [5]:
#Split data into train and test data
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,stratify=y)

In [6]:
#Load the SVM model and specify the model feature values
model_svc = SVC(probability=True)

param_grid = {'C':[0.5,1,10,20,30,50],
             'kernel':['rbf','poly'],
             'gamma':[0.1,0.05,0.01,0.001,0.002,0.005],
             'coef0':[0,1]}

In [8]:
# Specify the feature values for the gridsearchCV
# Gridsearch is used for finding the optimal combination of hyperparameters for a specific model
model_grid = GridSearchCV(model_svc,
                          param_grid=param_grid,
                          scoring='accuracy',cv=3,verbose=2)

In [9]:
# Fit the model
model_grid.fit(X_train,y_train)

Fitting 3 folds for each of 144 candidates, totalling 432 fits
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   2.8s
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   2.7s
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   2.1s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   1.4s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   1.3s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   1.5s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   1.7s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   1.7s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   1.8s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   1.2s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   1.2s
[CV] END ............C=0.5, coef0=0, gamma=0.0

In [12]:
# Utilize the built gridsearchCV to find the best parameters
model_final = model_grid.best_estimator_

In [13]:
#Make the predictions using the model
y_pred = model_final.predict(X_test)

In [14]:
#Now to view the classification report of our model
#Classification Report
cr=metrics.classification_report(y_test,y_pred,output_dict=True)

In [15]:
cr

{'Female': {'precision': 0.7855787476280834,
  'recall': 0.8363636363636363,
  'f1-score': 0.8101761252446184,
  'support': 495.0},
 'Male': {'precision': 0.7890625,
  'recall': 0.7283653846153846,
  'f1-score': 0.7575,
  'support': 416.0},
 'accuracy': 0.7870472008781558,
 'macro avg': {'precision': 0.7873206238140418,
  'recall': 0.7823645104895105,
  'f1-score': 0.7838380626223092,
  'support': 911.0},
 'weighted avg': {'precision': 0.7871695719823285,
  'recall': 0.7870472008781558,
  'f1-score': 0.7861220439034974,
  'support': 911.0}}