In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("diabeties.csv")
df.head()

# Goal -: We need to create a ML model, which has the ability to Predict the Patient as Positive if they are actually Positive.

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
#sns.pairplot(df)

In [None]:
#sns.pairplot(data=df, hue="target")

In [None]:
df.target.value_counts()

In [None]:
x = df.iloc[:, :-1]
y = df.iloc[:, -1]

from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.3, random_state=1)

from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(xtrain, ytrain)
ypred = logreg.predict(xtest)

from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, confusion_matrix
ac = accuracy_score(ytest, ypred)
ra = roc_auc_score(ytest, ypred)
cm = confusion_matrix(ytest, ypred)
cr = classification_report(ytest, ypred)

print(f"Accuracy : {ac}\nROC : {ra}\n{cm}\n\n{cr}")

In [None]:
train = logreg.score(xtrain, ytrain)
test = logreg.score(xtest, ytest)

print(f"Training Accuracy - : {train}\nTesting Accuracy -: {test}")

In [None]:
print(f"Actual Values     : {ytest[:25].values}")
print(f"Predicted Values  : {ypred[:25]}")

In [None]:
print(f"Actual Values     : {ytest[25:51].values}")
print(f"Predicted Values  : {ypred[25:51]}")

In [None]:
logreg.predict_proba(xtest)

In [None]:
ypredprob = logreg.predict_proba(xtest)[:, 1]

In [None]:
ypredprob #probability of a patient for being diabetic i.e for class 1

In [None]:
from sklearn.preprocessing import binarize

In [None]:
ypred = binarize([ypredprob], threshold=0.3)[0]

In [None]:
print(f"Actual Values     : {ytest[:25].values}")
print(f"Predicted Values  : {ypred[:25].astype(int)}")

In [None]:
ac = accuracy_score(ytest, ypred)
ra = roc_auc_score(ytest, ypred)
cm = confusion_matrix(ytest, ypred)
cr = classification_report(ytest, ypred)

print(f"Accuracy : {ac}\nROC : {ra}\n{cm}\n\n{cr}")

In [None]:
train = logreg.score(xtrain, ytrain)
test = logreg.score(xtest, ytest)

print(f"Training Accuracy - : {train}\nTesting Accuracy -: {test}")

In [None]:
from sklearn.metrics import roc_curve

In [None]:
fpr, tpr, thres = roc_curve(ytest, ypredprob)
plt.plot(fpr, tpr)

plt.title("ROC curve for Diabeties Classifier")
plt.xlabel("False Positive Rate (1-Specificity)")
plt.ylabel("True Positive Rate (Sensitivity)")
plt.grid()
plt.show()           