**Mounting Google Drive**

In [54]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Load Dataset**

In [55]:
import numpy as np
import pandas as pd
import sklearn
from sklearn import preprocessing

data = pd.read_csv('/content/drive/My Drive/CSE 4104 AI Lab/DATASET/diabetes.csv')

In [56]:
data.head()

Unnamed: 0,P,Glucose,Blood Pressure,ST,I,BMI,DP,AGE,Outcome
0,1,85,66,29,0,26.6,0.351,31,0
1,8,183,64,0,0,23.3,0.672,32,1
2,1,89,66,23,94,28.1,0.167,21,0
3,0,137,40,35,168,43.1,2.288,33,1
4,5,116,74,0,0,25.6,0.201,30,0


In [57]:
X = data.iloc[:,0:8].copy()  #Features - Independent Variables
y = data.iloc[:,8].copy()   #Target Variable - Dependent Variable


In [58]:
X.shape


(767, 8)

In [59]:

print(X)


      P  Glucose  Blood Pressure  ST    I   BMI     DP  AGE
0     1       85              66  29    0  26.6  0.351   31
1     8      183              64   0    0  23.3  0.672   32
2     1       89              66  23   94  28.1  0.167   21
3     0      137              40  35  168  43.1  2.288   33
4     5      116              74   0    0  25.6  0.201   30
..   ..      ...             ...  ..  ...   ...    ...  ...
762  10      101              76  48  180  32.9  0.171   63
763   2      122              70  27    0  36.8  0.340   27
764   5      121              72  23  112  26.2  0.245   30
765   1      126              60   0    0  30.1  0.349   47
766   1       93              70  31    0  30.4  0.315   23

[767 rows x 8 columns]


In [60]:

print(y)



0      0
1      1
2      0
3      1
4      0
      ..
762    0
763    0
764    0
765    1
766    0
Name: Outcome, Length: 767, dtype: int64


**Normalization**

In [61]:
normalized = preprocessing.normalize(X)
print("Normalized Data = ", normalized)

Normalized Data =  [[0.008424   0.71604034 0.55598426 ... 0.22407851 0.00295683 0.26114412]
 [0.04039768 0.92409698 0.32318146 ... 0.11765825 0.00339341 0.16159073]
 [0.00661199 0.58846737 0.43639153 ... 0.185797   0.0011042  0.13885185]
 ...
 [0.02691539 0.65135243 0.38758161 ... 0.14103664 0.00131885 0.16149234]
 [0.00665306 0.83828547 0.39918356 ... 0.20025708 0.00232192 0.31269379]
 [0.00791454 0.73605211 0.55401772 ... 0.24060198 0.00249308 0.18203439]]


**Splitting Data**

In [62]:
#split X and y into Training and Testing Sets

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(normalized, y, test_size = 0.5)


In [63]:
X_train.shape, X_test.shape

((383, 8), (384, 8))

**Model development and Prediction**

In [64]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()

#Fit the model with data or Train the model
logreg.fit(X_train, y_train)

#Predict the Output
y_pred = logreg.predict(X_test)

In [65]:
print(y_test)

513    0
537    0
427    0
67     0
153    1
      ..
284    0
741    0
586    0
330    0
724    0
Name: Outcome, Length: 384, dtype: int64


In [66]:
print(y_pred)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 0 0 0 0]


**Evaluation using Classification Report**

In [67]:
from sklearn.metrics import classification_report
target = ['Without Diabetes', 'With Diabetes']
print(classification_report(y_test, y_pred, target_names = target))

                  precision    recall  f1-score   support

Without Diabetes       0.65      0.96      0.78       248
   With Diabetes       0.48      0.07      0.13       136

        accuracy                           0.64       384
       macro avg       0.56      0.51      0.45       384
    weighted avg       0.59      0.64      0.55       384

