In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import classification_report
from sklearn.svm import SVC

## Dataset

In [2]:
df=pd.read_csv("diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


## Data Visualization

In [3]:
#cols=list(df.columns)

#for label in cols[:-1]:

  #plt.figure(figsize=(4,3))
  #plt.hist(df[df["Outcome"]==1][label], color='red', label='diabetes', alpha=0.6, density=True)
  #plt.hist(df[df["Outcome"]==0][label], color='blue', label='no-diabetes', alpha=0.6, density=True)

  #plt.title(label)
  #plt.legend()
  #plt.show()

## Preparing Dataset

In [4]:
df['Outcome'].value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [5]:
train, valid, test=np.split(df, [int(0.6*len(df)), int(0.8*len(df))])


In [6]:
def scale_dataset(dataframe, oversample=False):
  X = dataframe[dataframe.columns[:-1]].values
  Y = dataframe[dataframe.columns[-1]].values

  scaler = StandardScaler()
  X = scaler.fit_transform(X)

  if oversample:
    ros = RandomOverSampler()
    X, Y = ros.fit_resample(X, Y)

  data = np.hstack((X, np.reshape(Y, (-1, 1))))

  return data, X, Y

In [7]:
train, X_train, Y_train = scale_dataset(train, oversample=True)
valid, X_valid, Y_valid = scale_dataset(valid, oversample=False)
test, X_test, Y_test = scale_dataset(test, oversample=False)


## Training Model

In [8]:
svm_model=SVC(kernel='rbf')
svm_model.fit(X_train, Y_train)

In [9]:
Y_pred=svm_model.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

           0       0.89      0.79      0.83        99
           1       0.68      0.82      0.74        55

    accuracy                           0.80       154
   macro avg       0.78      0.80      0.79       154
weighted avg       0.81      0.80      0.80       154



## Prediction

In [10]:
input_data=(2,152,70,27,0,36.8,0.34,27)         #Input Data Here

arr=np.asarray(input_data)
arr=arr.reshape(1,-1)


In [11]:
scaler = StandardScaler()
std_data=scaler.fit_transform(arr)

In [12]:
prediction=svm_model.predict(std_data)

if (prediction[0]==0):
  print("The Person is Non-Diabetic")
else:
  print("The Person is Diabetic")


The Person is Diabetic
