**Mounting Google Drive**

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Load Dataset**

In [4]:
import numpy as np
import pandas as pd
import sklearn
from sklearn import preprocessing

data = pd.read_csv('/content/drive/My Drive/CSE 4104_AILAB/dataset//diabetes.csv')

In [5]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
X = data.iloc[:,0:8].copy()  #Features - Independent Variables
y = data.iloc[:,8].copy()   #Target Variable - Dependent Variable


In [7]:
X.shape


(768, 8)

In [8]:

print(X)


     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0              6      148             72             35        0  33.6   
1              1       85             66             29        0  26.6   
2              8      183             64              0        0  23.3   
3              1       89             66             23       94  28.1   
4              0      137             40             35      168  43.1   
..           ...      ...            ...            ...      ...   ...   
763           10      101             76             48      180  32.9   
764            2      122             70             27        0  36.8   
765            5      121             72             23      112  26.2   
766            1      126             60              0        0  30.1   
767            1       93             70             31        0  30.4   

     DiabetesPedigreeFunction  Age  
0                       0.627   50  
1                       0.351   31  


In [9]:

print(y)



0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: int64


**Scaling**

In [10]:
min_max_scaler = preprocessing.MinMaxScaler(feature_range =(0, 1))
  

x_after_min_max_scaler = min_max_scaler.fit_transform(data)

print ("\nAfter min max Scaling : \n", x_after_min_max_scaler)


After min max Scaling : 
 [[0.35294118 0.74371859 0.59016393 ... 0.23441503 0.48333333 1.        ]
 [0.05882353 0.42713568 0.54098361 ... 0.11656704 0.16666667 0.        ]
 [0.47058824 0.91959799 0.52459016 ... 0.25362938 0.18333333 1.        ]
 ...
 [0.29411765 0.6080402  0.59016393 ... 0.07130658 0.15       0.        ]
 [0.05882353 0.63316583 0.49180328 ... 0.11571307 0.43333333 1.        ]
 [0.05882353 0.46733668 0.57377049 ... 0.10119556 0.03333333 0.        ]]


**Splitting Data**

In [11]:
#split X and y into Training and Testing Sets

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x_after_min_max_scaler, y, test_size = 0.2)


In [12]:
X_train.shape, X_test.shape

((614, 9), (154, 9))

**Model development and Prediction**

In [13]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()

#Fit the model with data or Train the model
logreg.fit(X_train, y_train)

#Predict the Output
y_pred = logreg.predict(X_test)

In [14]:
print(y_test)

244    0
606    1
524    0
357    1
224    0
      ..
530    0
545    1
304    0
148    0
559    0
Name: Outcome, Length: 154, dtype: int64


In [15]:
print(y_pred)

[0 1 0 1 0 0 0 0 0 1 0 0 0 0 1 1 1 1 0 0 1 1 1 1 0 1 1 0 0 1 1 0 0 0 1 0 0
 1 1 1 0 0 0 0 1 0 1 1 1 0 0 1 1 1 1 0 0 1 0 0 1 0 1 1 0 1 1 1 1 1 0 0 0 0
 0 1 0 0 0 1 0 0 1 0 0 1 0 1 1 1 1 1 1 1 1 0 1 0 0 0 0 1 1 0 0 0 1 0 1 0 1
 0 1 0 1 1 0 0 0 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 1 1 1
 1 0 1 0 0 0]


**Evaluation using Classification Report**

In [16]:
from sklearn.metrics import classification_report
target = ['Without Diabetes', 'With Diabetes']
print(classification_report(y_test, y_pred, target_names = target))

                  precision    recall  f1-score   support

Without Diabetes       1.00      1.00      1.00        80
   With Diabetes       1.00      1.00      1.00        74

        accuracy                           1.00       154
       macro avg       1.00      1.00      1.00       154
    weighted avg       1.00      1.00      1.00       154

