In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

In [3]:
df = pd.read_csv('./pima.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
cols = ['Pregnancies','Glucose','BloodPressure','SkinThickness',
        'Insulin','BMI','DiabetesPedigreeFunction','Age']
X = df[cols]
y = df['Outcome']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

LogisticRegression(max_iter=1000)

In [7]:
y_pred = lr.predict(X_test)

In [8]:
confusion_matrix(y_test, y_pred)

array([[120,  31],
       [ 30,  50]], dtype=int64)

In [9]:
accuracy_score(y_test, y_pred)

0.7359307359307359

In [10]:
lr.coef_

array([[ 0.05775896,  0.03589842, -0.01086401, -0.00141162, -0.00098457,
         0.10906779,  0.3738797 ,  0.03600388]])

In [11]:
lr.intercept_

array([-9.43019632])

In [12]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [42]:
y_pred_m = []
y_pred_proba = []
for i, v in enumerate(X_test.values):
    y_pred_m.append(round(sigmoid(np.dot(v, lr.coef_.T) + lr.intercept_)[0]))
    y_pred_proba.append((np.dot(v, lr.coef_.T) + lr.intercept_)[0])

In [14]:
confusion_matrix(y_pred, y_pred_m)

array([[150,   0],
       [  0,  81]], dtype=int64)

In [15]:
accuracy_score(y_pred, y_pred_m)

1.0

In [37]:
for row in X_test.values:
    row = np.append(row, [1])
    print((',').join('{:.3f}'.format(float(x)) for x in row))

6.000,98.000,58.000,33.000,190.000,34.000,0.430,43.000,1.000
2.000,112.000,75.000,32.000,0.000,35.700,0.148,21.000,1.000
2.000,108.000,64.000,0.000,0.000,30.800,0.158,21.000,1.000
8.000,107.000,80.000,0.000,0.000,24.600,0.856,34.000,1.000
7.000,136.000,90.000,0.000,0.000,29.900,0.210,50.000,1.000
6.000,103.000,72.000,32.000,190.000,37.700,0.324,55.000,1.000
1.000,71.000,48.000,18.000,76.000,20.400,0.323,22.000,1.000
0.000,117.000,0.000,0.000,0.000,33.800,0.932,44.000,1.000
4.000,154.000,72.000,29.000,126.000,31.300,0.338,37.000,1.000
5.000,147.000,78.000,0.000,0.000,33.700,0.218,65.000,1.000
10.000,111.000,70.000,27.000,0.000,27.500,0.141,40.000,1.000
7.000,179.000,95.000,31.000,0.000,34.200,0.164,60.000,1.000
4.000,148.000,60.000,27.000,318.000,30.900,0.150,29.000,1.000
5.000,96.000,74.000,18.000,67.000,33.600,0.997,43.000,1.000
2.000,88.000,58.000,26.000,16.000,28.400,0.766,22.000,1.000
1.000,125.000,50.000,40.000,167.000,33.300,0.962,28.000,1.000
3.000,84.000,72.000,32.000,0.000,37.

In [43]:
for i, v in enumerate(y_pred):
    print(v, y_pred_proba[i])

0 -1.0121222299751853
0 -1.4488925037796996
0 -1.9585035397462667
0 -1.7688742244496023
1 0.018375928649163598
0 -0.1873485283964449
0 -4.3075273822572075
1 0.3890366507666112
1 0.2543308365254493
1 1.3856156300621354
0 -1.1742403744978542
1 2.275759419787457
0 -0.4188535099909796
0 -1.0048629717021953
0 -2.662183085702134
0 -0.6494855557122658
0 -1.903576650967473
0 -2.608287669762383
1 1.8431775394142509
1 0.39624823553971567
0 -1.285360635307919
0 -2.505785970004659
0 -0.053927500246912174
0 -2.2600740487410693
1 0.21735261746858292
1 2.1010578085707454
0 -2.0848506963456668
0 -3.458123526417059
0 -0.9181551854222878
0 -2.082355552093638
1 2.3684528150924766
1 2.0004030936908155
1 1.4813088520300752
1 1.4815306865770275
1 0.5160927160819888
1 0.734565227913599
1 3.0889523950430338
0 -1.283132612016228
0 -0.0453987557680815
1 1.0212735354494367
0 -2.7026068272518806
1 0.28839844894303823
1 0.21083469738440108
0 -0.7831865592494367
0 -3.531630123663252
1 0.07014985064514434
1 0.315527