In [33]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
from sklearn.utils import shuffle

df = pd.read_csv('column_3C.dat', sep=' ')
df.columns = [
    "pelvic_incidence",
    "pelvic_tilt",
     "lumbar_lordosis_angle",
     "sacral_slope",
     "pelvic_radius",
     "degree_spondylolisthesis",
     "result"
     ]
df = shuffle(df)
df.head(10)

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,result
120,83.88,23.08,87.14,60.8,124.65,80.56,SL
19,43.92,14.18,37.83,29.74,134.46,6.45,DH
128,77.24,16.74,49.78,60.5,110.69,39.79,SL
298,82.91,29.89,58.25,53.01,110.71,6.08,NO
107,63.17,6.33,63.0,56.84,110.64,42.61,SL
38,52.42,19.01,35.87,33.41,116.56,1.69,DH
208,48.26,16.42,36.33,31.84,94.88,28.34,SL
137,72.05,24.7,79.87,47.35,107.17,56.43,SL
290,51.08,14.21,35.95,36.87,115.8,6.91,NO
51,50.21,29.76,36.1,20.45,128.29,5.74,DH


In [34]:
# change result column to numerical
df['result'].replace(['DH','SL', 'NO'], [1,2,3], inplace=True)
df.head(10)

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,result
120,83.88,23.08,87.14,60.8,124.65,80.56,2
19,43.92,14.18,37.83,29.74,134.46,6.45,1
128,77.24,16.74,49.78,60.5,110.69,39.79,2
298,82.91,29.89,58.25,53.01,110.71,6.08,3
107,63.17,6.33,63.0,56.84,110.64,42.61,2
38,52.42,19.01,35.87,33.41,116.56,1.69,1
208,48.26,16.42,36.33,31.84,94.88,28.34,2
137,72.05,24.7,79.87,47.35,107.17,56.43,2
290,51.08,14.21,35.95,36.87,115.8,6.91,3
51,50.21,29.76,36.1,20.45,128.29,5.74,1


In [35]:
# split into explanatory and response variables
# scale explanatory variables
x = df.iloc[:,:6]

scaler = StandardScaler(with_mean=True, with_std=True)
x_scaled = pd.DataFrame(scaler.fit_transform(x), columns = list(x.columns.values))
x_scaled.head(10)

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis
0,1.357182,0.555104,1.896899,1.32902,0.502333,1.443635
1,-0.961288,-0.334534,-0.762687,-0.985089,1.241462,-0.531135
2,0.971931,-0.078638,-0.118151,1.306669,-0.549476,0.357259
3,1.300903,1.235827,0.338687,0.74863,-0.547969,-0.540994
4,0.155592,-1.119214,0.594883,1.033983,-0.553243,0.432402
5,-0.46812,0.14827,-0.868402,-0.711658,-0.107204,-0.657972
6,-0.709482,-0.110625,-0.843591,-0.82863,-1.740671,0.052156
7,0.670808,0.717038,1.504784,0.326935,-0.814688,0.800656
8,-0.545867,-0.331535,-0.864087,-0.453872,-0.164465,-0.518878
9,-0.596344,1.222832,-0.855996,-1.677236,0.776587,-0.550054


In [36]:
y = df['result']
y.head(10)

120    2
19     1
128    2
298    3
107    2
38     1
208    2
137    2
290    3
51     1
Name: result, dtype: int64

In [37]:
# build and fit model
model = LogisticRegression(solver='lbfgs')
model.fit(x_scaled,y)

print("Coefficients: ",model.coef_)
print("Intercept: ", model.intercept_)

# compute predicted values from training set
y_pred = model.predict(x_scaled)

# cross-validate
k = 10
scores = cross_val_score(estimator=model,
                        X=x_scaled,
                        y=y,
                        scoring="accuracy",
                        cv=k)

#statistics for the "ever-suspicious radiological community".
cm = confusion_matrix(y, y_pred)
print("Confusion matrix:\n",cm)
print(classification_report(y, y_pred, target_names=['DH', 'SL', 'NO']))


print("Accuracies from %d individual folds:" % k)
print(scores)
print("Accuracy calculated using %d-fold cross validation = %.3f" % (k, scores.mean()))

Coefficients:  [[-0.35903615  0.69522055 -0.50209041 -0.97908695 -0.66989756 -1.50253194]
 [ 0.28029143 -0.23102742  0.2945367   0.53243307 -0.03989836  3.88168854]
 [ 0.07874472 -0.46419313  0.20755371  0.44665388  0.70979592 -2.3791566 ]]
Intercept:  [-1.41970281  1.6827074  -0.26300459]
Confusion matrix:
 [[ 38   1  20]
 [  2 144   4]
 [ 13   2  85]]
              precision    recall  f1-score   support

          DH       0.72      0.64      0.68        59
          SL       0.98      0.96      0.97       150
          NO       0.78      0.85      0.81       100

    accuracy                           0.86       309
   macro avg       0.83      0.82      0.82       309
weighted avg       0.86      0.86      0.86       309

Accuracies from 10 individual folds:
[0.83870968 0.83870968 0.87096774 0.93548387 0.83870968 0.90322581
 0.74193548 0.90322581 0.77419355 0.9       ]
Accuracy calculated using 10-fold cross validation = 0.855


In [38]:
# tool to classify individual cases

def classify(pelvic_incidence,	pelvic_tilt,	lumbar_lordosis_angle,	sacral_slope,	pelvic_radius,degree_spondylolisthesis):
  array = np.array([pelvic_incidence,	pelvic_tilt,	lumbar_lordosis_angle,	sacral_slope,	pelvic_radius,degree_spondylolisthesis])
  # Reshaping into 2D
  array = array.reshape(1, -1)
  # scale data
  scaled = pd.DataFrame(scaler.transform(array))
  
  result = model.predict(scaled)
  if(result[0] == 1):
    print("Disk hernia")
  elif(result[0] == 2):
    print("Spondylolisthesis")
  else:
    print("Normal")

# example with first sample
classify(df['pelvic_incidence'][0], df['pelvic_tilt'][0], df['lumbar_lordosis_angle'][0], df['sacral_slope'][0], df['pelvic_radius'][0], df['degree_spondylolisthesis'][0])

Disk hernia


