 **Aravinda Raman J**

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
data = pd.read_csv('../input/pima-indians-diabetes-database/diabetes.csv')

In [None]:
data.shape

In [None]:
data.isnull().sum()

In [None]:
data[['Glucose','BloodPressure','SkinThickness','Insulin','BMI']] = data[['Glucose','BloodPressure','SkinThickness','Insulin','BMI']].replace(0,np.nan)

In [None]:
data.head(10)

In [None]:
data.isnull().sum()

In [None]:
col = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI']

for i in col:
    data[i].replace(np.nan ,data[i].mean(),inplace=True)

In [None]:
data.isnull().sum()

In [None]:
sns.countplot(x='Outcome',data=data)

In [None]:
data.describe().T

In [None]:
fig, ax = plt.subplots(4,2, figsize=(16,16))
sns.histplot(data.Age, bins = 20, ax=ax[0,0]) 
sns.histplot(data.Pregnancies, bins = 20, ax=ax[0,1]) 
sns.histplot(data.Glucose, bins = 20, ax=ax[1,0]) 
sns.histplot(data.BloodPressure, bins = 20, ax=ax[1,1]) 
sns.histplot(data.SkinThickness, bins = 20, ax=ax[2,0])
sns.histplot(data.Insulin, bins = 20, ax=ax[2,1])
sns.histplot(data.DiabetesPedigreeFunction, bins = 20, ax=ax[3,0]) 
sns.histplot(data.BMI, bins = 20, ax=ax[3,1]) 

In [None]:
plt.figure(figsize=(12,10))  
p=sns.heatmap(data.corr())

In [None]:
p=sns.pairplot(data, hue = 'Outcome')

In [None]:
X = data.iloc[:,:-1]
y = data.iloc[:,-1]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=1/3,random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler
sc= StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# **KNN Algorithm:**


In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train,y_train)

In [None]:
y_pred = knn.predict(X_test)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
from sklearn.model_selection import cross_val_score

In [None]:
print(confusion_matrix(y_test,y_pred))

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
accuracy_rate = []


for i in range(1,30):
    
    knn = KNeighborsClassifier(n_neighbors=i)
    score=cross_val_score(knn,data,data['Outcome'],cv=10)
    accuracy_rate.append(score.mean())

In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(1,30),accuracy_rate,color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Accuracy Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Accuracy Rate')
plt.show()

In [None]:
knn = KNeighborsClassifier(n_neighbors=17)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)

print('\n')
print(confusion_matrix(y_test,y_pred))
print('\n')
print(classification_report(y_test,y_pred))

In [None]:
knn.fit(X_train,y_train)
knn.score(X_test,y_test)

# **Random Forest Classifier:**

In [None]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

In [None]:
pred = classifier.predict(X_test)
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

In [None]:
classifier.fit(X_train,y_train)
classifier.score(X_test,y_test)


# **Implementing using ANN:**

In [None]:
import tensorflow as tf
from keras import regularizers
from keras.layers.core import Dropout

In [None]:
model = tf.keras.models.Sequential()

In [None]:
model.add(tf.keras.layers.Dense(64,activation='relu',input_dim=8,kernel_regularizer=regularizers.l2(0.01)))
Dropout(0.3)
tf.keras.layers.BatchNormalization()
model.add(tf.keras.layers.Dense(32,activation='relu'))
Dropout(0.3)
tf.keras.layers.BatchNormalization()
model.add(tf.keras.layers.Dense(16,activation='relu'))
Dropout(0.3)
tf.keras.layers.BatchNormalization()
model.add(tf.keras.layers.Dense(8,activation='relu'))
Dropout(0.3)
tf.keras.layers.BatchNormalization()
model.add(tf.keras.layers.Dense(1,activation='sigmoid'))



In [None]:
X_train = sc.inverse_transform(X_train)
X_test = sc.inverse_transform(X_test)

In [None]:
from sklearn.preprocessing import MinMaxScaler
scale = MinMaxScaler()
X_train= scale.fit_transform(X_train)
X_test = scale.transform(X_test)

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
history=model.fit(X_train, y_train,validation_data=(X_test,y_test), epochs=200,batch_size=30)

In [None]:
model.evaluate(X_test,y_test)

In [None]:
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)