#Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#Importing the dataset

In [2]:
data = pd.read_csv("/content/diabetes.csv")
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


#Checking for missing value

In [3]:
data.isnull()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...
763,False,False,False,False,False,False,False,False,False
764,False,False,False,False,False,False,False,False,False
765,False,False,False,False,False,False,False,False,False
766,False,False,False,False,False,False,False,False,False


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


#Initiating the variables

In [5]:
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values

#Splitting data into training, testing datase

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

#Check other missing(zero) values




In [7]:
print("total number of rows : {0}".format(len(data)))
print("number of missing rows Pregnancies : {0}".format(len(data.loc[data['Pregnancies'] == 0])))
print("number of missing rows Glucose : {0}".format(len(data.loc[data['Glucose'] == 0])))
print("number of missing rows BloodPressure : {0}".format(len(data.loc[data['BloodPressure'] == 0])))
print("number of missing rows SkinThickness : {0}".format(len(data.loc[data['SkinThickness'] == 0])))
print("number of missing rows Insulin : {0}".format(len(data.loc[data['Insulin'] == 0])))
print("number of missing rows BMI : {0}".format(len(data.loc[data['BMI'] == 0])))
print("number of missing rows DiabetesPedigreeFunction : {0}".format(len(data.loc[data['DiabetesPedigreeFunction'] == 0])))
print("number of missing rows Age : {0}".format(len(data.loc[data['Age'] == 0])))


total number of rows : 768
number of missing rows Pregnancies : 111
number of missing rows Glucose : 5
number of missing rows BloodPressure : 35
number of missing rows SkinThickness : 227
number of missing rows Insulin : 374
number of missing rows BMI : 11
number of missing rows DiabetesPedigreeFunction : 0
number of missing rows Age : 0


#Filling the missing(zero) value

In [8]:
from sklearn.impute import SimpleImputer

filling_values = SimpleImputer(missing_values=0, strategy="mean")  

X_train = filling_values.fit_transform(X_train)
X_test = filling_values.fit_transform(X_test)

#Feature Scaling

In [9]:
from sklearn.preprocessing import StandardScaler
SC = StandardScaler()
X_train = SC.fit_transform(X_train)
X_test = SC.transform(X_test)

#Training our model using logistic regression

In [10]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(X_train, Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

#Testing our model

In [11]:
Y_pred = classifier.predict(X_test)
Y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0,
       0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])

#Making the Confusion Matrix

In [12]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(Y_test, Y_pred)
print(cm)
accuracy_score(Y_test, Y_pred)

[[82 17]
 [20 35]]


0.7597402597402597

#Prediction for new values

In [19]:
output = classifier.predict(SC.transform([[1, 85, 66, 29, 0, 26.6, 0.351, 31]]))
output

array([0])

In [17]:
#Taking the input from the user
print("Enter the values :")
Num_Preg = float(input("Pregnancies : "))
Gluc_num = float(input("Glucose : "))
BP = float(input("BloodPressure : "))
SK = float(input("SkinThickness : "))
Insulin_Num = float(input("Insulin : "))
bmi = float(input("BMI : "))
DPF = float(input("DiabetesPedigreeFunction : "))
age = float(input("Age : "))

#predicting the profit with respect to the inputs
output = classifier.predict(SC.transform([[Num_Preg, Gluc_num, BP, SK, Insulin_Num, bmi, DPF, age ]]))
print("We can predict outcome as {}  by having {}  Pregnancies , {} Glucose , {}  BloodPressure , {}  SkinThickness , {}  Insulin , {}  BMI , {}  DiabetesPedigreeFunction and Age is {} .".format(output[0] if output else "0", Num_Preg, Gluc_num, BP, SK, Insulin_Num, bmi, DPF, age))

Enter the values :
Pregnancies : 5
Glucose : 70
BloodPressure : 58
SkinThickness : 0
Insulin : 10
BMI : 45.2
DiabetesPedigreeFunction : 22.5
Age : 25
We can predict outcome as 1  by having 5.0  Pregnancies , 70.0  Glucose , 58.0  BloodPressure , 0.0  SkinThickness , 10.0  Insulin , 45.2  BMI , 22.5  DiabetesPedigreeFunction and Age is 25.0 .
