# MODEL TRAINING 
## Using 5 cross validation

In [1]:
# Importing modules
import joblib
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [2]:
# Importing Data
tr_feature = pd.read_csv('train_features.csv')
tr_label = pd.read_csv('train_label.csv')
vl_feature = pd.read_csv('val_features.csv')
vl_label = pd.read_csv('val_label.csv')
ts_feature = pd.read_csv('test_features.csv')
ts_label = pd.read_csv('test_label.csv')

In [3]:
tr_feature.shape

(3898, 12)

In [4]:
# Import modules for preprocessing data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [5]:
tr_feature = scaler.fit_transform(tr_feature)
vl_feature = scaler.fit_transform(vl_feature)
ts_feature = scaler.fit_transform(ts_feature)

In [6]:
# Find the best parameter
def print_result(results):
    print(f'BEST PARAMS :{results.best_params_}')
    mean = results.cv_results_['mean_test_score']
    stds = results.cv_results_['std_test_score']
    params = results.cv_results_['params']
    
    for mean,std,param in zip(mean,stds,params):
        print(f'{round(mean,3)} +/-{round(std,3)} for{param}')

check the last commit in which multiple parameters are used
now i'am working on the best parameter c= 30 and kernel = 'linear'
in my older commit the model take more time to fit the model

In [7]:
svm_model = SVC()
parameters = {
    'C' :[30],
    'kernel' :['linear','rbf']
}

In [8]:
cv = GridSearchCV(svm_model,parameters,cv=5)

In [9]:
cv.fit(tr_feature,tr_label.values.ravel())

In [10]:
print_result(cv)

BEST PARAMS :{'C': 30, 'kernel': 'linear'}
0.996 +/-0.002 for{'C': 30, 'kernel': 'linear'}
0.995 +/-0.002 for{'C': 30, 'kernel': 'rbf'}


In [11]:
# joblib.dump(cv.best_estimator_,'SVM_MDL.pkl')

joblib.dump(cv,'SVM_MDL.pkl')

['SVM_MDL.pkl']

In [12]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
from time import time

In [13]:
model = {'SVM_MDL.pkl':joblib.load('SVM_MDL.pkl')}

In [14]:
def evaluuate_model(name,model,features,labels):
    start = time()
    pred = model.predict(features)
    end = time()
    accuracy = round(accuracy_score(labels,pred),3)
    precision = round(precision_score(labels,pred),3)
    recall = round(recall_score(labels,pred),3)
    print(f'{name}--Accuracy: {accuracy}/ Precision: {precision} / Recall: {recall} / Latency: {round((end-start)*1000,1)}ms')

### Geting accuracy for validation data

In [15]:
for name,mdl in model.items():
    evaluuate_model(name,mdl,vl_feature,vl_label)

SVM_MDL.pkl--Accuracy: 0.996/ Precision: 0.99 / Recall: 0.994 / Latency: 8.7ms


### Geting accuracy for Test data

In [17]:
evaluuate_model('SVM',model['SVM_MDL.pkl'],ts_feature,ts_label)

SVM--Accuracy: 0.989/ Precision: 0.975 / Recall: 0.981 / Latency: 8.1ms
