# Train and Dump Model -Pipline 

In [1]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_fscore_support
%matplotlib inline

In [2]:
data=pd.read_csv('labelled_data_new.csv')
y=data.Label
X=data.drop(['Label'],axis=1)


In [3]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.7,random_state=10)

In [4]:
X_test2=X_test.copy()

In [5]:
sc=StandardScaler(copy=False)
sc.fit_transform(X_train)
sc.transform(X_test)

array([[-0.34038599,  0.11560035,  0.63822535,  1.14716531,  0.62539939,
         0.52455605],
       [ 2.16245217,  0.57191753, -0.26068359,  0.19384234,  0.12905067,
         0.52455605],
       [ 0.16018164,  0.11560035,  0.18877088, -1.23614212, -0.36729805,
         0.06709438],
       ...,
       [ 0.16018164, -0.34071682, -0.26068359,  2.10048828,  0.62539939,
         0.52455605],
       [-0.34038599,  1.0282347 ,  1.98658875,  0.19384234, -0.36729805,
        -0.39036729],
       [ 0.16018164,  1.94086905,  1.08767981,  1.62382679,  1.61809683,
        -0.39036729]])

In [6]:
clf3 = SVC(probability=True)
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
clf3 = GridSearchCV(SVC(), tuned_parameters,scoring='f1_macro')
clf3.fit(X_train, y_train)
print("Best parameters set found on development set:")
print(clf3.best_params_)

Best parameters set found on development set:
{'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}


In [7]:
means = clf3.cv_results_['mean_test_score']
for mean, params in zip(means, clf3.cv_results_['params']):
        print("%0.3f for %r"% (mean, params))

0.234 for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.234 for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.909 for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.234 for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.947 for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.909 for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.953 for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.947 for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.942 for {'C': 1, 'kernel': 'linear'}
0.941 for {'C': 10, 'kernel': 'linear'}
0.930 for {'C': 100, 'kernel': 'linear'}
0.930 for {'C': 1000, 'kernel': 'linear'}


In [8]:
pre3=clf3.predict(X_test)
# prec[2],rec[2],f1[2],_=precision_recall_fscore_support(y_test,pre3,average='macro')
precision, recall, f1, _ = precision_recall_fscore_support(y_test, pre3, average='macro')
# print("Gridsearch with SVM - Precision = %0.3f, Recall = %0.3f, F1-score = %0.3f" %(prec[2],rec[2],f1[2]))
print("Gridsearch with SVM - Precision = %0.3f, Recall = %0.3f, F1-score = %0.3f" % (precision, recall, f1))

Gridsearch with SVM - Precision = 0.919, Recall = 0.949, F1-score = 0.933


In [9]:
from sklearn.metrics import classification_report


In [10]:
target_names = ['class 0', 'class 1', 'class 2']
print(classification_report(y_test, pre3, target_names=target_names))
 

              precision    recall  f1-score   support

     class 0       0.87      0.98      0.92        41
     class 1       0.96      0.92      0.94       192
     class 2       0.93      0.95      0.94       117

    accuracy                           0.94       350
   macro avg       0.92      0.95      0.93       350
weighted avg       0.94      0.94      0.94       350



In [11]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
# from sklearn.externals import joblib
import joblib



In [12]:
pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC (C=1000,gamma=0.001, kernel='rbf'))])


In [13]:
pipe.fit(X_train, y_train)

pipe.score(X_test, y_test)

0.9371428571428572

In [14]:
joblib.dump(pipe, 'my_model.joblib')

['transform_predict.joblib']

In [15]:
pipeline = joblib.load('my_model.joblib') 


In [16]:
predictions = pipeline.predict(X_test)

In [17]:
target_names = ['class 0', 'class 1', 'class 2']
print(classification_report(y_test, predictions, target_names=target_names))
 

              precision    recall  f1-score   support

     class 0       0.87      0.98      0.92        41
     class 1       0.96      0.92      0.94       192
     class 2       0.93      0.95      0.94       117

    accuracy                           0.94       350
   macro avg       0.92      0.95      0.93       350
weighted avg       0.94      0.94      0.94       350



In [18]:
predictions[0]

1.0

In [20]:
import joblib

# Specify the filename for your model
model_filename = "my_model.joblib"

# Save the model using Joblib
joblib.dump(clf3, model_filename)

['my_model.joblib']