Importing Required Libraries

In [44]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

Data Collection & Analysis

In [45]:
# Loading the data from csv file to a Pandas DataFrame
parkinsons_data = pd.read_csv('https://github.com/kushpatel19/Parkinson-Disease-Detection/blob/main/parkinsons.data')

In [46]:
# First 5 rows of the dataframe
parkinsons_data.head()

Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,phon_R01_S01_3,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,...,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,...,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335


In [47]:
# Number of rows and columns in the dataframe
parkinsons_data.shape

(195, 24)

In [48]:
# Getting more information about the dataset
parkinsons_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              195 non-null    object 
 1   MDVP:Fo(Hz)       195 non-null    float64
 2   MDVP:Fhi(Hz)      195 non-null    float64
 3   MDVP:Flo(Hz)      195 non-null    float64
 4   MDVP:Jitter(%)    195 non-null    float64
 5   MDVP:Jitter(Abs)  195 non-null    float64
 6   MDVP:RAP          195 non-null    float64
 7   MDVP:PPQ          195 non-null    float64
 8   Jitter:DDP        195 non-null    float64
 9   MDVP:Shimmer      195 non-null    float64
 10  MDVP:Shimmer(dB)  195 non-null    float64
 11  Shimmer:APQ3      195 non-null    float64
 12  Shimmer:APQ5      195 non-null    float64
 13  MDVP:APQ          195 non-null    float64
 14  Shimmer:DDA       195 non-null    float64
 15  NHR               195 non-null    float64
 16  HNR               195 non-null    float64
 1

In [49]:
# Checking for missing values in each column
parkinsons_data.isnull().sum()

name                0
MDVP:Fo(Hz)         0
MDVP:Fhi(Hz)        0
MDVP:Flo(Hz)        0
MDVP:Jitter(%)      0
MDVP:Jitter(Abs)    0
MDVP:RAP            0
MDVP:PPQ            0
Jitter:DDP          0
MDVP:Shimmer        0
MDVP:Shimmer(dB)    0
Shimmer:APQ3        0
Shimmer:APQ5        0
MDVP:APQ            0
Shimmer:DDA         0
NHR                 0
HNR                 0
status              0
RPDE                0
DFA                 0
spread1             0
spread2             0
D2                  0
PPE                 0
dtype: int64

In [50]:
# Getting some statistical measures about the data
parkinsons_data.describe()

Unnamed: 0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
count,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,...,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0
mean,154.228641,197.104918,116.324631,0.00622,4.4e-05,0.003306,0.003446,0.00992,0.029709,0.282251,...,0.046993,0.024847,21.885974,0.753846,0.498536,0.718099,-5.684397,0.22651,2.381826,0.206552
std,41.390065,91.491548,43.521413,0.004848,3.5e-05,0.002968,0.002759,0.008903,0.018857,0.194877,...,0.030459,0.040418,4.425764,0.431878,0.103942,0.055336,1.090208,0.083406,0.382799,0.090119
min,88.333,102.145,65.476,0.00168,7e-06,0.00068,0.00092,0.00204,0.00954,0.085,...,0.01364,0.00065,8.441,0.0,0.25657,0.574282,-7.964984,0.006274,1.423287,0.044539
25%,117.572,134.8625,84.291,0.00346,2e-05,0.00166,0.00186,0.004985,0.016505,0.1485,...,0.024735,0.005925,19.198,1.0,0.421306,0.674758,-6.450096,0.174351,2.099125,0.137451
50%,148.79,175.829,104.315,0.00494,3e-05,0.0025,0.00269,0.00749,0.02297,0.221,...,0.03836,0.01166,22.085,1.0,0.495954,0.722254,-5.720868,0.218885,2.361532,0.194052
75%,182.769,224.2055,140.0185,0.007365,6e-05,0.003835,0.003955,0.011505,0.037885,0.35,...,0.060795,0.02564,25.0755,1.0,0.587562,0.761881,-5.046192,0.279234,2.636456,0.25298
max,260.105,592.03,239.17,0.03316,0.00026,0.02144,0.01958,0.06433,0.11908,1.302,...,0.16942,0.31482,33.047,1.0,0.685151,0.825288,-2.434031,0.450493,3.671155,0.527367


In [51]:
# Distribution of target Variable
parkinsons_data['status'].value_counts()

1    147
0     48
Name: status, dtype: int64

1  --> Parkinson's Disease Positive

0 --> Parkinson's Disease Negative (Healthy)


In [52]:
# Grouping the data based on the target variable
parkinsons_data.groupby('status').mean()

Unnamed: 0_level_0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,MDVP:APQ,Shimmer:DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE
status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,181.937771,223.63675,145.207292,0.003866,2.3e-05,0.001925,0.002056,0.005776,0.017615,0.162958,...,0.013305,0.028511,0.011483,24.67875,0.442552,0.695716,-6.759264,0.160292,2.154491,0.123017
1,145.180762,188.441463,106.893558,0.006989,5.1e-05,0.003757,0.0039,0.011273,0.033658,0.321204,...,0.0276,0.053027,0.029211,20.974048,0.516816,0.725408,-5.33342,0.248133,2.456058,0.233828


Data Pre-Processing

Separating the features & Target

In [53]:
X = parkinsons_data.drop(columns=['name','status'], axis=1)
Y = parkinsons_data['status']
print(X)
print(Y)

     MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
0        119.992       157.302        74.997         0.00784   
1        122.400       148.650       113.819         0.00968   
2        116.682       131.111       111.555         0.01050   
3        116.676       137.871       111.366         0.00997   
4        116.014       141.781       110.655         0.01284   
..           ...           ...           ...             ...   
190      174.188       230.978        94.261         0.00459   
191      209.516       253.017        89.488         0.00564   
192      174.688       240.005        74.287         0.01360   
193      198.764       396.961        74.904         0.00740   
194      214.289       260.277        77.973         0.00567   

     MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  \
0             0.00007   0.00370   0.00554     0.01109       0.04374   
1             0.00008   0.00465   0.00696     0.01394       0.06134   
2             0.00

Splitting the data to training data & Test data

In [54]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [55]:
print(X.shape, X_train.shape, X_test.shape)

(195, 22) (156, 22) (39, 22)


Data Standardization                                                           

---



Standardization is an important technique that is mostly performed as a pre-processing step before many Machine Learning models, to standardize the range of features of input data set.

In [56]:
scaler = StandardScaler()

In [57]:
scaler.fit(X_train)

StandardScaler()

In [58]:
X_train = scaler.transform(X_train)

X_test = scaler.transform(X_test)

In [59]:
print(X_train)

[[ 0.63239631 -0.02731081 -0.87985049 ... -0.97586547 -0.55160318
   0.07769494]
 [-1.05512719 -0.83337041 -0.9284778  ...  0.3981808  -0.61014073
   0.39291782]
 [ 0.02996187 -0.29531068 -1.12211107 ... -0.43937044 -0.62849605
  -0.50948408]
 ...
 [-0.9096785  -0.6637302  -0.160638   ...  1.22001022 -0.47404629
  -0.2159482 ]
 [-0.35977689  0.19731822 -0.79063679 ... -0.17896029 -0.47272835
   0.28181221]
 [ 1.01957066  0.19922317 -0.61914972 ... -0.716232    1.23632066
  -0.05829386]]


Model Training                                                                  
Machine Learning Classification

Let's try all the clasification models                                        

- Support Vector Machine Model
- Logistic Regression
- AdaBoost Classifier 
- RandomForest Classifier
- GaussianNB
- K Nearest Neighbor(KNN)
- DecisionTree Classifier
- XGB Classifier 
- XGBRF Classifier



In [60]:
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from xgboost import XGBRFClassifier
from sklearn.ensemble import AdaBoostClassifier

In [65]:
def accuracy(model, title):
  model.fit(X_train, Y_train)
  predictions = model.predict(X_test)
  acc = accuracy_score(Y_test, predictions)
  print('Accuracy for', title, 'is :-', acc,'\n')

In [69]:
model_1 = svm.SVC(kernel='rbf')
accuracy(model_1,'Support Vector Machine Model')
model_2 = LogisticRegression()
accuracy(model_2,'Logistic Regression')
model_3 = AdaBoostClassifier()
accuracy(model_3,'Ada')
model_4 = RandomForestClassifier()
accuracy(model_4,'Random Forest')
model_5 = GaussianNB()
accuracy(model_5,'NBG')
model_6 = KNeighborsClassifier()
accuracy(model_6,'K Nearest Neighbor(KNN)')
model_7 = DecisionTreeClassifier()
accuracy(model_7,'Decision Tree')
model_8 = XGBClassifier()
accuracy(model_8,'XGB')
model_9 = XGBRFClassifier()
accuracy(model_9,'XGBRF')

Accuracy for Support Vector Machine Model is :- 0.8974358974358975 

Accuracy for Logistic Regression is :- 0.8205128205128205 

Accuracy for Ada is :- 0.8974358974358975 

Accuracy for Random Forest is :- 0.7948717948717948 

Accuracy for NBG is :- 0.6153846153846154 

Accuracy for K Nearest Neighbor(KNN) is :- 0.7692307692307693 

Accuracy for Decision Tree is :- 0.7435897435897436 

Accuracy for XGB is :- 0.8205128205128205 

Accuracy for XGBRF is :- 0.8205128205128205 



Now, we will select the best model among them according to the highest score of accuracy

Here, SVM and AdaBoostClassifier both give same as well as highest score.    
So, we can continue with any of them. (Let's choose Support Vector Machine Model)

In [80]:
best_model = model_1

In [81]:
best_model.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

Model Evaluation

Accuracy Score

In [82]:
# accuracy score on training data
X_train_prediction = best_model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)
print('Accuracy score of training data : ', training_data_accuracy)

Accuracy score of training data :  0.9166666666666666


In [83]:
# accuracy score on training data
X_test_prediction = best_model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)
print('Accuracy score of test data : ', test_data_accuracy)
print(classification_report(Y_test,X_test_prediction))

Accuracy score of test data :  0.8974358974358975
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         8
           1       0.89      1.00      0.94        31

    accuracy                           0.90        39
   macro avg       0.94      0.75      0.80        39
weighted avg       0.91      0.90      0.88        39



Building a Predictive System

In [84]:
input_data = (197.07600,206.89600,192.05500,0.00289,0.00001,0.00166,0.00168,0.00498,0.01098,0.09700,0.00563,0.00680,0.00802,0.01689,0.00339,26.77500,0.422229,0.741367,-7.348300,0.177551,1.743867,0.085569)

# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the data
std_data = scaler.transform(input_data_reshaped)

prediction = best_model.predict(std_data)
print(prediction)

if (prediction[0] == 0):
  print("The Person does not have Parkinsons Disease")

else:
  print("The Person has Parkinsons Disease")


[0]
The Person does not have Parkinsons Disease


  "X does not have valid feature names, but"


Hyperparameter Tuning for increasing accuracy

In [85]:
# See all the parameters
best_model.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

Important Perameter :-                                                         
- C 
- gamma
- kernel

1. GridSearchCV

In [87]:
from sklearn.model_selection import GridSearchCV
 
# defining parameter range
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf','linear'],
              'cache_size': [0.1, 1, 10, 100, 1000],
              'degree' : [0.1,1,10]}
 
grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 10)
 
# fitting the model for grid search
grid.fit(X_train, Y_train)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[CV 1/5; 251/750] START C=1, cache_size=100, degree=1, gamma=1, kernel=rbf......
[CV 1/5; 251/750] END C=1, cache_size=100, degree=1, gamma=1, kernel=rbf;, score=0.781 total time=   0.0s
[CV 2/5; 251/750] START C=1, cache_size=100, degree=1, gamma=1, kernel=rbf......
[CV 2/5; 251/750] END C=1, cache_size=100, degree=1, gamma=1, kernel=rbf;, score=0.774 total time=   0.0s
[CV 3/5; 251/750] START C=1, cache_size=100, degree=1, gamma=1, kernel=rbf......
[CV 3/5; 251/750] END C=1, cache_size=100, degree=1, gamma=1, kernel=rbf;, score=0.774 total time=   0.0s
[CV 4/5; 251/750] START C=1, cache_size=100, degree=1, gamma=1, kernel=rbf......
[CV 4/5; 251/750] END C=1, cache_size=100, degree=1, gamma=1, kernel=rbf;, score=0.806 total time=   0.0s
[CV 5/5; 251/750] START C=1, cache_size=100, degree=1, gamma=1, kernel=rbf......
[CV 5/5; 251/750] END C=1, cache_size=100, degree=1, gamma=1, kernel=rbf;, score=0.774 total time=   0.0s


GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'cache_size': [0.1, 1, 10, 100, 1000],
                         'degree': [0.1, 1, 10],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf', 'linear']},
             verbose=10)

In [89]:
# print best parameter after tuning
print(grid.best_params_)
# print how our model looks after hyper-parameter tuning
print(grid.best_estimator_)
grid_predictions = grid.predict(X_test)
# print classification report
print(classification_report(Y_test, grid_predictions))

{'C': 100, 'cache_size': 0.1, 'degree': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}
SVC(C=100, cache_size=0.1, degree=0.1, gamma=0.1)
              precision    recall  f1-score   support

           0       0.73      1.00      0.84         8
           1       1.00      0.90      0.95        31

    accuracy                           0.92        39
   macro avg       0.86      0.95      0.90        39
weighted avg       0.94      0.92      0.93        39



In [93]:
# # example of grid searching key hyperparametres for SVC
# from sklearn.datasets import make_blobs
# from sklearn.model_selection import RepeatedStratifiedKFold
# from sklearn.model_selection import GridSearchCV
# from sklearn.svm import SVC
# # define dataset
# # X, y = make_blobs(n_samples=1000, centers=2, n_features=100, cluster_std=20)
# # define model and parameters
# model = SVC()
# kernel = ['poly', 'rbf', 'sigmoid','linear']
# C = [100, 50, 10, 1.0, 0.1, 0.01]
# gamma=[1, 0.1, 0.01, 0.001, 0.0001]
# # gamma = ['scale']
# # define grid search
# grid = dict(kernel=kernel,C=C,gamma=gamma)
# cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
# grid_result = grid_search.fit(X_train, Y_train)
# # summarize results
# print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
# means = grid_result.cv_results_['mean_test_score']
# stds = grid_result.cv_results_['std_test_score']
# params = grid_result.cv_results_['params']
# for mean, stdev, param in zip(means, stds, params):
#     print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.966111 using {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
0.937917 (0.064705) with: {'C': 100, 'gamma': 1, 'kernel': 'poly'}
0.826667 (0.056104) with: {'C': 100, 'gamma': 1, 'kernel': 'rbf'}
0.678333 (0.105428) with: {'C': 100, 'gamma': 1, 'kernel': 'sigmoid'}
0.836250 (0.085304) with: {'C': 100, 'gamma': 1, 'kernel': 'linear'}
0.937917 (0.064705) with: {'C': 100, 'gamma': 0.1, 'kernel': 'poly'}
0.966111 (0.045258) with: {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
0.754722 (0.097775) with: {'C': 100, 'gamma': 0.1, 'kernel': 'sigmoid'}
0.836250 (0.085304) with: {'C': 100, 'gamma': 0.1, 'kernel': 'linear'}
0.856528 (0.062432) with: {'C': 100, 'gamma': 0.01, 'kernel': 'poly'}
0.905972 (0.055691) with: {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
0.839444 (0.068834) with: {'C': 100, 'gamma': 0.01, 'kernel': 'sigmoid'}
0.836250 (0.085304) with: {'C': 100, 'gamma': 0.01, 'kernel': 'linear'}
0.743333 (0.008165) with: {'C': 100, 'gamma': 0.001, 'kernel': 'poly'}
0.859028 (0.060735) with: {'C'

2. RandomizedSearchCV

In [90]:
from sklearn.model_selection import RandomizedSearchCV
 
# defining parameter range
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf'],
              'cache_size': [0.1, 1, 10, 100, 1000],
              'degree' : [0.1,1,10]}
 
random_search = RandomizedSearchCV(svm.SVC(), param_grid, refit = True, verbose = 10)
 
# fitting the model for random_search
random_search.fit(X_train, Y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5; 1/10] START C=100, cache_size=1000, degree=0.1, gamma=1, kernel=rbf....
[CV 1/5; 1/10] END C=100, cache_size=1000, degree=0.1, gamma=1, kernel=rbf;, score=0.844 total time=   0.0s
[CV 2/5; 1/10] START C=100, cache_size=1000, degree=0.1, gamma=1, kernel=rbf....
[CV 2/5; 1/10] END C=100, cache_size=1000, degree=0.1, gamma=1, kernel=rbf;, score=0.806 total time=   0.0s
[CV 3/5; 1/10] START C=100, cache_size=1000, degree=0.1, gamma=1, kernel=rbf....
[CV 3/5; 1/10] END C=100, cache_size=1000, degree=0.1, gamma=1, kernel=rbf;, score=0.806 total time=   0.0s
[CV 4/5; 1/10] START C=100, cache_size=1000, degree=0.1, gamma=1, kernel=rbf....
[CV 4/5; 1/10] END C=100, cache_size=1000, degree=0.1, gamma=1, kernel=rbf;, score=0.806 total time=   0.0s
[CV 5/5; 1/10] START C=100, cache_size=1000, degree=0.1, gamma=1, kernel=rbf....
[CV 5/5; 1/10] END C=100, cache_size=1000, degree=0.1, gamma=1, kernel=rbf;, score=0.774 total time=  

RandomizedSearchCV(estimator=SVC(),
                   param_distributions={'C': [0.1, 1, 10, 100, 1000],
                                        'cache_size': [0.1, 1, 10, 100, 1000],
                                        'degree': [0.1, 1, 10],
                                        'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                                        'kernel': ['rbf']},
                   verbose=10)

In [91]:
# print best parameter after tuning
print(random_search.best_params_)
# print how our model looks after hyper-parameter tuning
print(random_search.best_estimator_)
random_search_predictions = random_search.predict(X_test)
# print classification report
print(classification_report(Y_test, random_search_predictions))

{'kernel': 'rbf', 'gamma': 0.1, 'degree': 0.1, 'cache_size': 10, 'C': 1000}
SVC(C=1000, cache_size=10, degree=0.1, gamma=0.1)
              precision    recall  f1-score   support

           0       0.73      1.00      0.84         8
           1       1.00      0.90      0.95        31

    accuracy                           0.92        39
   macro avg       0.86      0.95      0.90        39
weighted avg       0.94      0.92      0.93        39



Before Hyperparameter Tuning  :-                                                                    
- Accuracy :-  89.74%                                                                          

After Hyperparameter Tuning  :-    
- Accuracy by GridSearchCV :- 92%
- Accuracy by RandomizedSearchCV :- 92%                                         

