## Project Diabetes

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('/kaggle/input/pima-indians-diabetes-database/diabetes.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.isnull().count()

In [None]:
df['Outcome'].value_counts()

In [None]:
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='viridis')

### EDA 

In [None]:
sns.set_style('whitegrid')

In [None]:
df.columns

In [None]:
sns.distplot(df['Age'], bins = 50,kde =False)

In [None]:
sns.countplot(x = 'Outcome', data = df)

In [None]:
df.hist(figsize = (12,8))

In [None]:
corr = df.corr()
plt.figure(figsize = (12,8))
sns.heatmap(corr, annot = True)

In [None]:
df.plot(kind = 'box', figsize = (12,8), subplots = True, layout = (3,3))
plt.show()

In [None]:
cols = df.columns[:8]
for item in cols:
    plt.figure(figsize = (6,4))
    plt.title(str(item) + 'With' + 'Outcome')
    sns.violinplot(x = df.Outcome, y = df[item], data = df)
    plt.show()

In [None]:
#sns.pairplot(df,hue='Outcome',palette='coolwarm', diag_kind = 'hist')

In [None]:
X = df.drop(['Outcome'], axis = 1)
y = df['Outcome']

## Standardizing the data

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaler = scaler.fit_transform(X)

## splitting into train and test

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaler, y, test_size=0.33, random_state=42)

## apply algorithm for predictions

### Logistic Regression

In [None]:

from sklearn.linear_model import LogisticRegression
logmodel = LogisticRegression()
logmodel.fit(X_train, y_train)

In [None]:
predic_logistic = logmodel.predict(X_test)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, predic_logistic))
print(confusion_matrix(y_test, predic_logistic))
print('Accuracy -- >', logmodel.score(X_test, y_test)*100)
cm = confusion_matrix(y_test, predic_logistic)
sns.heatmap(cm, annot = True, fmt = 'g')
plt.show()

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
model_random = RandomForestClassifier()
model_random.fit(X_train, y_train)

In [None]:
y_predict_random = model_random.predict(X_test)

In [None]:
print(classification_report(y_test, y_predict_random))
print(confusion_matrix(y_test, y_predict_random))
print('Accuracy -- >', model_random.score(X_test, y_test)*100)
cm = confusion_matrix(y_test, y_predict_random)
sns.heatmap(cm, annot = True, fmt = 'g')
plt.show()

### Support Vector Mechine

In [None]:
from sklearn.svm import SVC
model_svc = SVC()
model_svc.fit(X_train, y_train)

In [None]:
y_pred_svc = model_svc.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred_svc))
print(confusion_matrix(y_test, y_pred_svc))
print('Accuracy -- >', model_svc.score(X_test, y_test)*100)
cm = confusion_matrix(y_test, y_pred_svc)
sns.heatmap(cm, annot = True, fmt = 'g')
plt.show()

### K Nearest Neighbors

In [None]:
from sklearn.neighbors import KNeighborsClassifier
model_KNN = KNeighborsClassifier()
model_KNN.fit(X_train, y_train)

In [None]:
y_pred_knn = model_KNN.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred_knn))
print(confusion_matrix(y_test, y_pred_knn))
print('Accuracy -- >', model_KNN.score(X_test, y_test)*100)
cm = confusion_matrix(y_test, y_pred_knn)
sns.heatmap(cm, annot = True, fmt = 'g')
plt.show()

### Gradient Boosting Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
model_GB = GradientBoostingClassifier()
model_GB.fit(X_train, y_train)

In [None]:
y_pred_GB = model_GB.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred_GB))
print(confusion_matrix(y_test, y_pred_GB))
print('Accuracy -- >', model_GB.score(X_test, y_test)*100)
cm = confusion_matrix(y_test, y_pred_GB)
sns.heatmap(cm, annot = True, fmt = 'g')
plt.show()