# Biomechanical features of orthopedic patients

<img src='dataset-cover.jpg'>

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('column_2C_weka.csv')

In [3]:
df.head()

Unnamed: 0,pelvic_incidence,pelvic_tilt numeric,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,class
0,63.027817,22.552586,39.609117,40.475232,98.672917,-0.2544,Abnormal
1,39.056951,10.060991,25.015378,28.99596,114.405425,4.564259,Abnormal
2,68.832021,22.218482,50.092194,46.613539,105.985135,-3.530317,Abnormal
3,69.297008,24.652878,44.311238,44.64413,101.868495,11.211523,Abnormal
4,49.712859,9.652075,28.317406,40.060784,108.168725,7.918501,Abnormal


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 310 entries, 0 to 309
Data columns (total 7 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   pelvic_incidence          310 non-null    float64
 1   pelvic_tilt numeric       310 non-null    float64
 2   lumbar_lordosis_angle     310 non-null    float64
 3   sacral_slope              310 non-null    float64
 4   pelvic_radius             310 non-null    float64
 5   degree_spondylolisthesis  310 non-null    float64
 6   class                     310 non-null    object 
dtypes: float64(6), object(1)
memory usage: 17.1+ KB


In [14]:
df['class'] = df['class'].map({'Abnormal':1,
                             'Normal':0},
                             na_action=None)  

In [6]:
 X = df.drop(columns='class')

In [7]:
y = df['class']
y = df['class'].map({'Abnormal':1,
                             'Normal':0},
                             na_action=None)  

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)

In [10]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier()
from sklearn.ensemble import RandomForestClassifier
r= RandomForestClassifier()
from sklearn.tree import DecisionTreeClassifier
d= DecisionTreeClassifier()
from sklearn.linear_model import LogisticRegression
log = LogisticRegression()
from sklearn.neighbors import KNeighborsClassifier
k = KNeighborsClassifier()
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB

g = GaussianNB()
b = BernoulliNB();

In [11]:
algorithms = [g,b,k,log,gbc,r,d]
names = ['GaussianNB','BernoulliNB', 'K Nearest', 'Logistic','GradientBoosting','RandomForest','DecisionTree',]
def algo_test(A, b, algorithms = algorithms,names = names):
    # fit the data
    for i in range(len(algorithms)):
        algorithms[i] = algorithms[i].fit(A, b)
    
    accuracy = []
    precision = []
    recall = []
    f1 = []
    for i in range(len(algorithms)):
        accuracy.append(accuracy_score(y, algorithms[i].predict(X)))
        precision.append(precision_score(y, algorithms[i].predict(X)))
        recall.append(recall_score(y,algorithms[i].predict(X)))
        f1.append(f1_score(y, algorithms[i].predict(X)))
    metrics = pd.DataFrame(columns = ['Accuracy', 'Precision', 'Recall', 'F1'], index = names)
    metrics['Accuracy'] = accuracy
    metrics['Precision'] = precision
    metrics['Recall'] = recall
    metrics['F1'] = f1
    return metrics.sort_values('F1', ascending = False)

In [12]:
algo_test(X_test, y_test)

Unnamed: 0,Accuracy,Precision,Recall,F1
Logistic,0.867742,0.904306,0.9,0.902148
DecisionTree,0.841935,0.870968,0.9,0.885246
RandomForest,0.841935,0.877934,0.890476,0.884161
GradientBoosting,0.841935,0.881517,0.885714,0.88361
K Nearest,0.83871,0.892157,0.866667,0.879227
BernoulliNB,0.735484,0.758065,0.895238,0.820961
GaussianNB,0.770968,0.888268,0.757143,0.817481


# PyCaret Classification Tahminleme Yapalım

In [16]:
from pycaret.classification import*

In [17]:
##Modelimizi Compile Ediyoruz
exp_name=setup(df,target='class')

Unnamed: 0,Description,Value
0,Session id,4619
1,Target,class
2,Target type,Binary
3,Original data shape,"(310, 7)"
4,Transformed data shape,"(310, 7)"
5,Transformed train set shape,"(217, 7)"
6,Transformed test set shape,"(93, 7)"
7,Numeric features,6
8,Preprocess,True
9,Imputation type,simple


In [18]:
best_model=compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.858,0.9241,0.8986,0.8991,0.8953,0.6707,0.6857,0.116
lightgbm,Light Gradient Boosting Machine,0.8442,0.9289,0.8848,0.8904,0.8856,0.6387,0.6458,0.132
gbc,Gradient Boosting Classifier,0.8439,0.918,0.8781,0.8973,0.8853,0.638,0.6447,0.092
xgboost,Extreme Gradient Boosting,0.84,0.9269,0.8857,0.8835,0.8824,0.6296,0.6402,0.042
lr,Logistic Regression,0.8348,0.9251,0.8648,0.8912,0.8771,0.6248,0.6276,0.302
et,Extra Trees Classifier,0.8346,0.9111,0.8919,0.8749,0.8806,0.6084,0.6171,0.143
ada,Ada Boost Classifier,0.8301,0.885,0.8848,0.8743,0.8766,0.5998,0.6091,0.063
knn,K Neighbors Classifier,0.8297,0.9069,0.8705,0.8857,0.8743,0.606,0.6178,0.021
ridge,Ridge Classifier,0.8255,0.0,0.919,0.8424,0.8777,0.5735,0.5861,0.015
lda,Linear Discriminant Analysis,0.821,0.8786,0.9124,0.8415,0.8738,0.565,0.5778,0.026


Processing:   0%|          | 0/65 [00:00<?, ?it/s]