# EE257 Project - Feature Extraction

[Shoulder Implant X-Ray Manufacturer Classification Data Set (2020)](https://archive.ics.uci.edu/ml/datasets/Shoulder+Implant+X-Ray+Manufacturer+Classification)

In [27]:
import os

current_path = os.getcwd()
dataset_path = current_path + '\dataset'
dataset_path

'c:\\Users\\tickn\\ml\\EE257\\EE257 Project\\dataset'

In [28]:
# Load dataset and split
import tensorflow as tf
from PIL import Image
import random
import pathlib

data_dir = pathlib.Path(dataset_path + '\data')
batch_size = 32

def describe_img(filepath):
    rand_img = random.choice(list(filepath.glob('**\*.jpg')))
    width, height = Image.open(str(rand_img)).size

    return width, height

img_width, img_height = describe_img(data_dir)

# load image dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split = 0.2,
    subset = "training",
    seed = 123,
    color_mode="grayscale",
    image_size = (img_height , img_width),
    batch_size = batch_size
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split = 0.2,
    subset = "validation",
    seed = 123,
    color_mode="grayscale",
    image_size = (img_height , img_width),
    batch_size = batch_size
)



Found 597 files belonging to 4 classes.
Using 478 files for training.
Found 597 files belonging to 4 classes.
Using 119 files for validation.


In [29]:
import numpy as np

def dataset_to_2D(dataset):
    x = []
    y = []
    for img_batch, label_batch in dataset:
        # flatten images since model fit() needs 2D input
        for img in img_batch:
            x.append(img.flatten())
        for label in label_batch:
            y.append(label)
    return x, y
        
x_train, y_train = dataset_to_2D(train_ds.as_numpy_iterator())
x_test, y_test = dataset_to_2D(test_ds.as_numpy_iterator())

print(np.shape(x_train))
print(np.shape(y_train))

    


(478, 62500)
(478,)


In [30]:
# Model 1 -- Logistic Regression
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='newton-cg',max_iter=1000)

In [31]:
model.fit(x_train,y_train)

LogisticRegression(max_iter=1000, solver='newton-cg')

In [32]:
from sklearn.metrics import confusion_matrix, classification_report

logreg_pred = model.predict(x_test)

print("Confusion Matrix")
print(confusion_matrix(y_test, logreg_pred))
print("--------------------------")
print(classification_report(y_test, logreg_pred, target_names=['Cofield' , 'Depuy' , 'Tornier' , 'Zimmer']))

Confusion Matrix
[[ 5  8  2  0]
 [ 1 36  8 16]
 [ 1  4  3 10]
 [ 3 12  2  8]]
--------------------------
              precision    recall  f1-score   support

     Cofield       0.50      0.33      0.40        15
       Depuy       0.60      0.59      0.60        61
     Tornier       0.20      0.17      0.18        18
      Zimmer       0.24      0.32      0.27        25

    accuracy                           0.44       119
   macro avg       0.38      0.35      0.36       119
weighted avg       0.45      0.44      0.44       119



In [33]:
print(" Training error: %f " %model.score(x_train, y_train))
print(" Test error: %f " %model.score(x_test, y_test))

 Training error: 1.000000 
 Test error: 0.436975 


In [34]:
# Model 3 - Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier()
tree.fit(x_train, y_train)

tree_predict = tree.predict(x_test)

print("Confusion Matrix")
print(confusion_matrix(y_test, tree_predict))
print("--------------------------")
print(classification_report(y_test, tree_predict, target_names=['Cofield' , 'Depuy' , 'Tornier' , 'Zimmer']))

Confusion Matrix
[[ 8  4  2  1]
 [ 7 41  2 11]
 [ 1  6  2  9]
 [ 2 11  2 10]]
--------------------------
              precision    recall  f1-score   support

     Cofield       0.44      0.53      0.48        15
       Depuy       0.66      0.67      0.67        61
     Tornier       0.25      0.11      0.15        18
      Zimmer       0.32      0.40      0.36        25

    accuracy                           0.51       119
   macro avg       0.42      0.43      0.42       119
weighted avg       0.50      0.51      0.50       119



In [35]:
print(" Training error: %f " %tree.score(x_train, y_train))
print(" Test error: %f " %tree.score(x_test, y_test))

 Training error: 1.000000 
 Test error: 0.512605 


In [36]:
from sklearn.svm import SVC

svm = SVC(C=100)
svm.fit(x_train, y_train)

svm_predict = svm.predict(x_test)

print("Confusion Matrix")
print(confusion_matrix(y_test, svm_predict))
print("--------------------------")
print(classification_report(y_test, svm_predict, target_names=['Cofield' , 'Depuy' , 'Tornier' , 'Zimmer']))



Confusion Matrix
[[ 8  4  1  2]
 [ 4 40  3 14]
 [ 3  7  1  7]
 [ 4 12  1  8]]
--------------------------
              precision    recall  f1-score   support

     Cofield       0.42      0.53      0.47        15
       Depuy       0.63      0.66      0.65        61
     Tornier       0.17      0.06      0.08        18
      Zimmer       0.26      0.32      0.29        25

    accuracy                           0.48       119
   macro avg       0.37      0.39      0.37       119
weighted avg       0.46      0.48      0.46       119



In [37]:
print(" Training error: %f " %svm.score(x_train, y_train))
print(" Test error: %f " %svm.score(x_test, y_test))

 Training error: 1.000000 
 Test error: 0.478992 


In [38]:
# Model  -- LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis()
lda.fit(x_train, y_train)

lda_predict = lda.predict(x_test)

print("Confusion Matrix")
print(confusion_matrix(y_test, lda_predict))
print("--------------------------")
print(classification_report(y_test, lda_predict, target_names=['Cofield' , 'Depuy' , 'Tornier' , 'Zimmer']))



Confusion Matrix
[[ 7  6  1  1]
 [ 2 41  4 14]
 [ 0 10  1  7]
 [ 3 13  4  5]]
--------------------------
              precision    recall  f1-score   support

     Cofield       0.58      0.47      0.52        15
       Depuy       0.59      0.67      0.63        61
     Tornier       0.10      0.06      0.07        18
      Zimmer       0.19      0.20      0.19        25

    accuracy                           0.45       119
   macro avg       0.36      0.35      0.35       119
weighted avg       0.43      0.45      0.44       119



In [39]:
print(" Training error: %f " %lda.score(x_train, y_train))
print(" Test error: %f " %lda.score(x_test, y_test))

 Training error: 0.887029 
 Test error: 0.453782 
