In [5]:
import pandas as pd 
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt 

In [4]:
# creating dataset 
# binary classification
from sklearn.datasets import make_classification
x,y=make_classification(n_samples=2000,n_features=3,n_redundant=0,n_classes=2,n_clusters_per_class=1,weights=[0.5,0.5])
# n_features number of features or independent features
#n_redundant = 0 means number of features that provide same or similiar imformation
# this all parameters effect the performance of model
# this make classification is giving clear dataset so accuracy can be very high

In [6]:
pd.DataFrame(y).value_counts()

0
0    1000
1    1000
Name: count, dtype: int64

In [7]:
pd.DataFrame(x)
# these are the features of in x 

Unnamed: 0,0,1,2
0,1.150675,1.221356,-0.561838
1,-1.227509,-1.637511,0.525579
2,0.650096,0.214088,-0.279398
3,-0.827110,0.301187,-1.279705
4,1.440917,1.475635,-0.219586
...,...,...,...
1995,-0.879419,-1.166508,0.064046
1996,0.937570,1.457660,0.450626
1997,-0.806906,-1.316153,0.527546
1998,0.832416,1.391398,-0.042158


In [8]:
# split the data into train and test
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.3,random_state=42)

## Traning model SVC

In [9]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,classification_report
svc = SVC() #let's try without any kernal
svc.fit(x_train,y_train)
y_pred = svc.predict(x_test)
report = classification_report(y_test,y_pred)
metrix = confusion_matrix(y_test,y_pred)
print(report)
print(metrix)

              precision    recall  f1-score   support

           0       0.98      1.00      0.99       281
           1       1.00      0.98      0.99       319

    accuracy                           0.99       600
   macro avg       0.99      0.99      0.99       600
weighted avg       0.99      0.99      0.99       600

[[280   1]
 [  5 314]]


This model is producing good result 
Its accuracy is also around the 93% 
We can improve the performance of model using the hypermeter tuning 

In [10]:
svc

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


these are the default parameter used by SVC models
Default it is using rbf kernal which is suitable for this types of dataset

In [11]:
# for same data let's use the different kernals
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,classification_report
svc = SVC(kernel='linear') #let's try without any kernal
svc.fit(x_train,y_train)
y_pred = svc.predict(x_test)
report = classification_report(y_test,y_pred)
metrix = confusion_matrix(y_test,y_pred)
print(report)
print(metrix)

              precision    recall  f1-score   support

           0       0.98      1.00      0.99       281
           1       1.00      0.98      0.99       319

    accuracy                           0.99       600
   macro avg       0.99      0.99      0.99       600
weighted avg       0.99      0.99      0.99       600

[[280   1]
 [  5 314]]


In [12]:
svc

0,1,2
,C,1.0
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [13]:
# for same data let's use the different kernals
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,classification_report
svc = SVC(kernel='poly') #let's try without any kernal
svc.fit(x_train,y_train)
y_pred = svc.predict(x_test)
report = classification_report(y_test,y_pred)
metrix = confusion_matrix(y_test,y_pred)
print(report)
print(metrix)

              precision    recall  f1-score   support

           0       0.98      0.99      0.99       281
           1       0.99      0.98      0.99       319

    accuracy                           0.99       600
   macro avg       0.99      0.99      0.99       600
weighted avg       0.99      0.99      0.99       600

[[279   2]
 [  6 313]]


few changes in confusion metrixs while using the 'poly' kernal

In [14]:
svc

0,1,2
,C,1.0
,kernel,'poly'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [15]:
# for same data let's use the different kernals
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,classification_report
svc = SVC(kernel='sigmoid') #let's try without any kernal
svc.fit(x_train,y_train)
y_pred = svc.predict(x_test)
report = classification_report(y_test,y_pred)
metrix = confusion_matrix(y_test,y_pred)
print(report)
print(metrix)

              precision    recall  f1-score   support

           0       0.96      0.97      0.96       281
           1       0.97      0.96      0.97       319

    accuracy                           0.97       600
   macro avg       0.97      0.97      0.97       600
weighted avg       0.97      0.97      0.97       600

[[273   8]
 [ 12 307]]


## Hyperparameter tuning 

In [24]:
parameters = {
    'C':[0.1,10,100,1000],
    'gamma':[1,0.1,0.01,0.001],
    'kernel':['rbf'] # rbf kernal is producing the best result so we are directly using the rbf for this model
}  

In [25]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(SVC(),param_grid=parameters,refit=True,verbose=3,cv=5)
grid.fit(x_train,y_train)
y_pred = grid.predict(x_test)
report = classification_report(y_test,y_pred)
metrix = confusion_matrix(y_test,y_pred)
print(report)
print(metrix)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.996 total time=   0.0s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.982 total time=   0.0s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=1.000 total time=   0.0s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.989 total time=   0.0s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=1.000 total time=   0.0s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.996 total time=   0.0s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.982 total time=   0.0s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.986 total time=   0.0s
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.986 total time=   0.0s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=1.000 total time=   0.0s
[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.964 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf;

In [26]:
print(report)
print(metrix)

              precision    recall  f1-score   support

           0       0.98      1.00      0.99       281
           1       1.00      0.98      0.99       319

    accuracy                           0.99       600
   macro avg       0.99      0.99      0.99       600
weighted avg       0.99      0.99      0.99       600

[[280   1]
 [  6 313]]


In [28]:
grid.best_params_
# it give the best parameters which is selected

{'C': 10, 'gamma': 1, 'kernel': 'rbf'}