## Your task is to make a diagnostic tool (not for real medical use) that asks a medical expert six numerical quantities obtained by radiographic measurements of a patient:

1. pelvic incidence
2. pelvic tilt
3. lumbar lordosis angle
4. sacral slope
5. pelvic radius
6. grade of spondylolisthesis

As an output, your program should provide a probability estimate of the patient having a
vertebral abnormality (either disk hernia or spondylolisthesis).

In [1]:
from scipy.io import arff
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
#from sklearn.cross_validation import cross_val_score
from sklearn.model_selection import cross_val_score



df=pd.read_csv("column_2C_weka1.txt",delimiter=",", names=('pelvic_incidence','pelvic_tilt','lumbar_lordosis_angle','sacral_slope','pelvic_radius','degree_spondylolisthesis','class'))
df.head()






Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,class
0,63.027818,22.552586,39.609117,40.475232,98.672917,-0.2544,Abnormal
1,39.056951,10.060991,25.015378,28.99596,114.405425,4.564259,Abnormal
2,68.832021,22.218482,50.092194,46.613539,105.985135,-3.530317,Abnormal
3,69.297008,24.652878,44.311238,44.64413,101.868495,11.211523,Abnormal
4,49.712859,9.652075,28.317406,40.060784,108.168725,7.918501,Abnormal


In [2]:

df['class'].replace(['Abnormal', 'Normal'], [0,1], inplace=True)


df.head(10)

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,class
0,63.027818,22.552586,39.609117,40.475232,98.672917,-0.2544,0
1,39.056951,10.060991,25.015378,28.99596,114.405425,4.564259,0
2,68.832021,22.218482,50.092194,46.613539,105.985135,-3.530317,0
3,69.297008,24.652878,44.311238,44.64413,101.868495,11.211523,0
4,49.712859,9.652075,28.317406,40.060784,108.168725,7.918501,0
5,40.2502,13.921907,25.12495,26.328293,130.327871,2.230652,0
6,53.432928,15.864336,37.165934,37.568592,120.567523,5.988551,0
7,45.366754,10.755611,29.038349,34.611142,117.270068,-10.675871,0
8,43.79019,13.533753,42.690814,30.256437,125.002893,13.289018,0
9,36.686353,5.010884,41.948751,31.675469,84.241415,0.664437,0


In [3]:
df.describe()

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,class
count,310.0,310.0,310.0,310.0,310.0,310.0,310.0
mean,60.496653,17.542822,51.93093,42.953831,117.920655,26.296694,0.322581
std,17.23652,10.00833,18.554064,13.423102,13.317377,37.559027,0.46822
min,26.147921,-6.554948,14.0,13.366931,70.082575,-11.058179,0.0
25%,46.430294,10.667069,37.0,33.347122,110.709196,1.603727,0.0
50%,58.691038,16.357689,49.562398,42.404912,118.268178,11.767934,0.0
75%,72.877696,22.120395,63.0,52.695888,125.467674,41.287352,1.0
max,129.834041,49.431864,125.742385,121.429566,163.071041,418.543082,1.0


In [4]:
# split into explanatory and response variables 
X = df.iloc[:,:6]
Y = df.iloc[:,6]
Y.head()

0    0
1    0
2    0
3    0
4    0
Name: class, dtype: int64

In [6]:
# build and fit model
reg = LogisticRegression(solver='lbfgs')
reg.fit(X,Y)

print("Coefficients: ",reg.coef_)
print("Intercept: ", reg.intercept_)

# compute predicted values from training set
Y_pred = reg.predict(X)

cm = confusion_matrix(Y, Y_pred)
print("Confusion matrix:\n",cm)

accuracy = (cm[0][0]+cm[1][1])/(cm[0][0]+cm[1][1]+cm[0][1]+cm[1][0])
print("Accuracy calculated from the training set = %.3f" % (accuracy))

print(classification_report(Y, Y_pred, target_names=['no', 'yes']))


Coefficients:  [[ 0.0070301  -0.08258075  0.01870319  0.08961076  0.1067678  -0.16811009]]
Intercept:  [-15.1549949]
Confusion matrix:
 [[188  22]
 [ 22  78]]
Accuracy calculated from the training set = 0.858
              precision    recall  f1-score   support

          no       0.90      0.90      0.90       210
         yes       0.78      0.78      0.78       100

   micro avg       0.86      0.86      0.86       310
   macro avg       0.84      0.84      0.84       310
weighted avg       0.86      0.86      0.86       310



In [7]:
# cross-validate
# number of folds
k = 10
scores = cross_val_score(estimator=reg,
                        X=X,
                        y=Y,
                        scoring="accuracy",
                        cv=k)
print("Accuracies from %d individual folds:" % k)
print(scores)
print("Accuracy calculated using %d-fold cross validation = %.3f" % (k, scores.mean()))

Accuracies from 10 individual folds:
[0.58064516 0.67741935 0.74193548 0.77419355 0.90322581 0.93548387
 0.93548387 0.96774194 0.83870968 0.93548387]
Accuracy calculated using 10-fold cross validation = 0.829


In [8]:
# retrieve estimated probabilities (from training set)
reg.predict_proba(X)



array([[8.35980777e-01, 1.64019223e-01],
       [7.68085082e-01, 2.31914918e-01],
       [3.73478566e-01, 6.26521434e-01],
       [9.46990721e-01, 5.30092785e-02],
       [7.79941248e-01, 2.20058752e-01],
       [4.14030885e-01, 5.85969115e-01],
       [5.40356986e-01, 4.59643014e-01],
       [9.65848200e-02, 9.03415180e-01],
       [7.92941894e-01, 2.07058106e-01],
       [9.42960394e-01, 5.70396058e-02],
       [2.88030623e-01, 7.11969377e-01],
       [8.96900186e-01, 1.03099814e-01],
       [8.48447877e-01, 1.51552123e-01],
       [9.01896079e-01, 9.81039215e-02],
       [8.02472869e-01, 1.97527131e-01],
       [5.03657813e-01, 4.96342187e-01],
       [3.48913908e-01, 6.51086092e-01],
       [2.49685314e-01, 7.50314686e-01],
       [5.60030665e-01, 4.39969335e-01],
       [5.03009614e-01, 4.96990386e-01],
       [3.48070587e-01, 6.51929413e-01],
       [4.31745005e-01, 5.68254995e-01],
       [9.71898241e-01, 2.81017591e-02],
       [2.69482814e-01, 7.30517186e-01],
       [8.962344

In [9]:
# retrieve estimated probabilities (from training set)
reg.predict_proba(X)[309]
#the probability of the last row

array([0.23255201, 0.76744799])

# Adding new row 

In [10]:
X.loc[310] = (5,4,3,2,1,7)

In [11]:
reg.predict_proba(X)[310]

array([9.99999915e-01, 8.46416620e-08])

In [12]:
pic= input('Pelvic incidence: ' )
pti= input('Pelvic tilt: ')
lumbar = input('lumbar lordosis angle: ')
sacral= input('sacral slope: ')
pra= input('pelvic radius: ')
grade= input('grade of spondylolisthesis: ')

k= X.shape[0]

X.loc[k] = (pic,pti,lumbar,sacral,pra,grade)

x= X.shape[0] -1

if (reg.predict_proba(X)[x, 1] > reg.predict_proba(X)[x, 0]):
    print('The patient is Normal')
elif(reg.predict_proba(X)[x, 1] < reg.predict_proba(X)[x, 0]):
    print('The patient is Abnormal')
else :
    print('It is 50/50')


Pelvic incidence: 50
Pelvic tilt: 25
lumbar lordosis angle: 66
sacral slope: 55
pelvic radius: 130
grade of spondylolisthesis: 43
The patient is Abnormal


In [13]:
reg.predict_proba(X)

array([[8.35980777e-01, 1.64019223e-01],
       [7.68085082e-01, 2.31914918e-01],
       [3.73478566e-01, 6.26521434e-01],
       [9.46990721e-01, 5.30092785e-02],
       [7.79941248e-01, 2.20058752e-01],
       [4.14030885e-01, 5.85969115e-01],
       [5.40356986e-01, 4.59643014e-01],
       [9.65848200e-02, 9.03415180e-01],
       [7.92941894e-01, 2.07058106e-01],
       [9.42960394e-01, 5.70396058e-02],
       [2.88030623e-01, 7.11969377e-01],
       [8.96900186e-01, 1.03099814e-01],
       [8.48447877e-01, 1.51552123e-01],
       [9.01896079e-01, 9.81039215e-02],
       [8.02472869e-01, 1.97527131e-01],
       [5.03657813e-01, 4.96342187e-01],
       [3.48913908e-01, 6.51086092e-01],
       [2.49685314e-01, 7.50314686e-01],
       [5.60030665e-01, 4.39969335e-01],
       [5.03009614e-01, 4.96990386e-01],
       [3.48070587e-01, 6.51929413e-01],
       [4.31745005e-01, 5.68254995e-01],
       [9.71898241e-01, 2.81017591e-02],
       [2.69482814e-01, 7.30517186e-01],
       [8.962344

In [18]:
reg.predict_proba(X)[311]


array([0.98294192, 0.01705808])