In [28]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics


In [22]:
A = pd.read_csv("pima-indians-diabetes.data.csv",header=None)
# when we dont have columns names, then the row data becomes the header when we do .head()
# to get default columns names like 0,1,2....  we use header=None during file calling.

In [8]:
A.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [9]:
colnames = ['Preg','Glu','Bp','sft','ins','Bmi','Dpf','Age','Class']

In [10]:
diabetes_df = pd.read_csv("pima-indians-diabetes.data.csv",names=colnames)

In [11]:
diabetes_df.head()

Unnamed: 0,Preg,Glu,Bp,sft,ins,Bmi,Dpf,Age,Class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [14]:
diabetes_df['Class'].value_counts()
# class o means healthy and 1 means not healthy.
# total 768 records we have.

0    500
1    268
Name: Class, dtype: int64

In [15]:
diabetes_df['Preg'].nunique()

17

In [19]:
X = diabetes_df.drop(['Class'],axis=1)
Y = diabetes_df['Class']

In [23]:
Xtrain,Xtest,Ytrain,Ytest=train_test_split(X,Y,test_size=0.3,random_state=2)

In [24]:
model = LogisticRegression()
model.fit(Xtrain,Ytrain)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [25]:
y_pred = model.predict(Xtest)

In [26]:
y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0])

In [29]:
acc = metrics.accuracy_score(Ytest,y_pred)
# to compare the evaluated y_pred with the actual y oe; Ytest.

In [30]:
acc
# accuracy score

0.7359307359307359

In [31]:
cm = metrics.confusion_matrix(Ytest,y_pred)
# confusion mtrix

In [33]:
print(cm)
# the y axis of matrix is Ytest and top side is y_pred.

[[138  17]
 [ 44  32]]


In [34]:
Ytest.value_counts()
# no of 0 and 1 in Ytest.

0    155
1     76
Name: Class, dtype: int64

In [37]:
# sensitivity
tpr = cm[1,1]/(cm[1,0]+cm[1,1])
tpr 
#true positive rate

0.42105263157894735

In [38]:
# specificity
tnr = cm[0,0]/(cm[0,1]+cm[0,0])
tnr 
#true negative rate

0.8903225806451613

In [39]:
from sklearn import preprocessing

In [40]:
A_scaled = preprocessing.scale(diabetes_df)

In [41]:
B = pd.DataFrame(A_scaled,columns=diabetes_df.columns)

In [44]:
B.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Preg,768.0,2.5442610000000002e-17,1.000652,-1.141852,-0.844885,-0.250952,0.639947,3.906578
Glu,768.0,3.614007e-18,1.000652,-3.783654,-0.685236,-0.121888,0.605771,2.444478
Bp,768.0,-1.3272440000000001e-17,1.000652,-3.572597,-0.367337,0.149641,0.563223,2.734528
sft,768.0,7.994184000000001e-17,1.000652,-1.288212,-1.288212,0.154533,0.719086,4.921866
ins,768.0,-3.556183e-17,1.000652,-0.692891,-0.692891,-0.428062,0.412008,6.652839
Bmi,768.0,2.295979e-16,1.000652,-4.060474,-0.595578,0.000942,0.584771,4.455807
Dpf,768.0,2.398978e-16,1.000652,-1.189553,-0.688969,-0.300128,0.466227,5.883565
Age,768.0,1.8576e-16,1.000652,-1.041549,-0.786286,-0.360847,0.660206,4.063716
Class,768.0,2.408374e-16,1.000652,-0.73212,-0.73212,-0.73212,1.365896,1.365896
