In [None]:
#importing libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [None]:
#importing the dataset

df = pd.read_csv("../input/pima-indians-diabetes-database/diabetes.csv")

**Knowing my dataset**

In [None]:
df.head()

In [None]:
df.info()

**Analysis**

In [None]:
sns.pairplot(df)

# Training & testing

**We will be using SVM and will tune C parameter as much as possible.**

In [None]:
X = df.drop(['Outcome'], axis=1)
y = df['Outcome']

In [None]:
#train test split

X_train,X_test,y_train,y_test=train_test_split(X,y,
                                               test_size=0.31,random_state=30)

print('train size is %i'%y_train.shape[0])
print('test size is %i'%y_test.shape[0])

In [None]:
#getting the accuracy rate initially

svm = SVC()
svm.fit(X_train,y_train)


y_pred1=svm.predict(X_test)
accuracy_score(y_pred1,y_test)

**Initially we are getting 79.5% accuracy on my testing data**

In [None]:
print("Testing Classification Report: \n", classification_report(y_test,y_pred1))

**Let us better tune our hyper-parameters to get a better accuracy**

*With rbf kernel and C tuning*

In [None]:
train=[]
test=[]

for i in np.arange(1.0,20.0,0.3):
    svm=SVC(kernel='rbf',C=i)
    svm.fit(X_train,y_train)
    y_pred1=svm.predict(X_train)
    y_pred2=svm.predict(X_test)
    train.append(accuracy_score(y_pred1,y_train))
    test.append(accuracy_score(y_pred2,y_test))

In [None]:
#getting the C value where the accuracy is max

print("Maximum accuracy is at :",max(test)*100,"% where c value is = ",np.argmax(test))


**We are getting an accuracy of 80% with rbf kernel with C tuned at 15**

*With poly kernel and C tuning*

In [None]:
train=[]
test=[]

for i in np.arange(1.0,20.0,0.3):
    svm=SVC(kernel='poly',C=i)
    svm.fit(X_train,y_train)
    y_pred1=svm.predict(X_train)
    y_pred2=svm.predict(X_test)
    train.append(accuracy_score(y_pred1,y_train))
    test.append(accuracy_score(y_pred2,y_test))
    
#getting the C value where the accuracy is max

print("Maximum accuracy is at :",max(test)*100,"% where c value is = ",np.argmax(test))


**With poly kernel we are getting an accuracy of 83% with C tuned at 4**

*With sigmoid kernel and C tuning*

In [None]:
train=[]
test=[]

for i in np.arange(1.0,20.0,0.3):
    svm=SVC(kernel='sigmoid',C=i)
    svm.fit(X_train,y_train)
    y_pred1=svm.predict(X_train)
    y_pred2=svm.predict(X_test)
    train.append(accuracy_score(y_pred1,y_train))
    test.append(accuracy_score(y_pred2,y_test))
    
#getting the C value where the accuracy is max

print("Maximum accuracy is at :",max(test)*100,"% where c value is = ",np.argmax(test))


**Getting a very poor accuracy with sigmoid kernel**

**For getting a better accuracy we can also tune hyper-parameters with grid search / random search/k folds.**

# Thus to conclude, we got the best accuracy of 83% while using "**POLY**" kernel when the C parameter is working at 4.