# Task for Today  

***

## Diabetes Prediction  

Given *medical data about patients*, let's try to predict whether a given patient will have **diabetes** or not.  
  
We will use logistic regression, support vector machine, and neural network models to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [None]:
data = pd.read_csv('../input/pima-indians-diabetes-database/diabetes.csv')

In [None]:
data

# EDA/Data Visualization

In [None]:
correlation_matrix = data.corr()

In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, vmin=-1.0, vmax=1.0, cmap='mako')
plt.show()

In [None]:
age_ct = pd.crosstab(pd.qcut(data['Age'], q=4, labels=['Youngest', 'Younger', 'Older', 'Oldest']), data['Outcome'])
age_ct_avgs = age_ct[1] / (age_ct[0] + age_ct[1])

age_ct = pd.concat([age_ct, age_ct_avgs], axis=1)
age_ct.columns = ['Negative', 'Positive', '% Positive']

age_ct

In [None]:
scaler = StandardScaler()
scaled_columns = data.iloc[:, :-1]
scaled_columns = pd.DataFrame(scaler.fit_transform(scaled_columns), columns=scaled_columns.columns)

plt.figure(figsize=(18, 10))
for column in scaled_columns.columns:
    sns.kdeplot(scaled_columns[column], shade=True)
plt.show()

# Splitting and Scaling

In [None]:
y = data.loc[:, 'Outcome']
X = data.drop('Outcome', axis=1)

In [None]:
X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=24)

# Training and Results

In [None]:
log_model = LogisticRegression()
svm_model = SVC(C=1.0)
ann_model = MLPClassifier(hidden_layer_sizes=(16, 16))

In [None]:
log_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
ann_model.fit(X_train, y_train)

In [None]:
log_acc = log_model.score(X_test, y_test)
svm_acc = svm_model.score(X_test, y_test)
ann_acc = ann_model.score(X_test, y_test)

In [None]:
fig = px.bar(
    x=['Logistic Regression', 'Support Vector Machine', 'Neural Network'],
    y=[log_acc, svm_acc, ann_acc],
    color=['Logistic Regression', 'Support Vector Machine', 'Neural Network']
)

fig.show()

In [None]:
print("   Logistic Regression:", log_acc)
print("Support Vector Machine:", svm_acc)
print("        Neural Network:", ann_acc)

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/YlEXc6Kwoqc