In [99]:
import os
import numpy as np

from PIL import Image 


dataset_path = '../images'

data = []
labels = []

image_size = (64, 64)

folder_labels = {
    'Sample001': 0,
    'Sample002': 1,
    'Sample003': 2,
    'Sample004': 3,
    'Sample005': 4,
    'Sample006': 5,
    'Sample007': 6,
    'Sample008': 7,
    'Sample009': 8,
    'Sample010': 9,
}





# extracting images


In [100]:
for folder_name, label in folder_labels.items():
    folder_path = os.path.join(dataset_path, folder_name)
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.jpg') or file_name.endswith('.png'):  
            image_path = os.path.join(folder_path, file_name)
            
            image = Image.open(image_path).convert('L')  
            image = image.resize(image_size)
            image_array = np.array(image).flatten()  
            data.append(image_array)
            labels.append(label)

X = np.array(data)
y = np.array(labels)

In [101]:
print(X.shape, y.shape)

(593, 4096) (593,)


<p>
    <img src="./spinning-cat.gif" alt="Cat">
    
</p>

<style>
    h1 {
        color: red;
    }
    p {
        position: relative;
        /* animation: moveRightLeft 2s infinite ease-in-out; */
    }
    img
    {
        width :100px;
        height :100px;
    }

    @keyframes moveRightLeft {
        0% {
            left: 0;
        }
        50% {
            left: 100px;
        }
        100% {
            left: 0;
        }
    }
</style>

In [102]:


from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier  
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [103]:
from sklearn.linear_model import LogisticRegression
import plotly.graph_objects as go

from sklearn.preprocessing import label_binarize
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

classifier = LogisticRegression(max_iter=1000, random_state=42)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

y_pred_prob_LR = classifier.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

y_test_bin = label_binarize(y_test, classes=list(set(y_test)))

print("y_test_bin shape:", y_test_bin.shape)
print("y_pred_prob_LR shape:", y_pred_prob_LR.shape)





Accuracy: 22.69%
y_test_bin shape: (119, 10)
y_pred_prob_LR shape: (119, 10)


In [104]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)

import plotly.express as px

fig = px.imshow(confusion_matrix(y_test, y_pred),
                labels=dict(x='Predicted', y='Actual', color='Count'),
                x=[f'Predicted {label}' for label in folder_labels],
                y=[f'Actual {label}' for label in folder_labels],
                title='Confusion Matrix for logistic regression classifier')

fig.show()

In [105]:
from sklearn.metrics import roc_auc_score, precision_score, recall_score

precision = precision_score(y_test, y_pred, average='weighted')
print(f'Precision: {precision:.2f}')

recall = recall_score(y_test, y_pred, average='weighted')
print(f'Recall: {recall:.2f}')

Precision: 0.21
Recall: 0.23


In [106]:
roc_auc = roc_auc_score(y_test_bin, y_pred_prob_LR, multi_class='ovr', average='macro')
print(f'ROC AUC: {roc_auc:.2f}')


ROC AUC: 0.55


In [107]:
from sklearn.metrics import roc_curve


fpr = {}
tpr = {}
roc_auc = {}

for i in range(y_test_bin.shape[1]):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_pred_prob_LR[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

fig = go.Figure()

colors = ['blue', 'green', 'red', 'orange', 'purple', 'brown', 'pink', 'gray', 'cyan', 'yellow']

for i, color in zip(range(y_test_bin.shape[1]), colors):
    fig.add_trace(go.Scatter(x=fpr[i], y=tpr[i], mode='lines', name=f'Class {i} (AUC = {roc_auc[i]:.2f})', line=dict(color=color)))

fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Random Classifier', line=dict(color='black', dash='dash')))

fig.update_layout(
    title='ROC Curves for Multiclass Classification',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    legend_title='Classes',
    template='plotly_white'
)

fig.show()

In [108]:
from sklearn.neighbors import KNeighborsClassifier


classifier = KNeighborsClassifier(n_neighbors=5)

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)
y_pred_prob_KNN = classifier.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

y_test_bin = label_binarize(y_test, classes=list(set(y_test)))


Accuracy: 61.34%


In [109]:
roc_auc = roc_auc_score(y_test_bin, y_pred_prob_KNN, multi_class='ovr', average='macro')
print(f'ROC AUC: {roc_auc:.2f}')


ROC AUC: 0.87


In [110]:
fpr = {}
tpr = {}
roc_auc = {}

for i in range(y_test_bin.shape[1]):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_pred_prob_KNN[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

fig = go.Figure()

colors = ['blue', 'green', 'red', 'orange', 'purple', 'brown', 'pink', 'gray', 'cyan', 'yellow']

for i, color in zip(range(y_test_bin.shape[1]), colors):
    fig.add_trace(go.Scatter(x=fpr[i], y=tpr[i], mode='lines', name=f'Class {i} (AUC = {roc_auc[i]:.2f})', line=dict(color=color)))

fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Random Classifier', line=dict(color='black', dash='dash')))

fig.update_layout(
    title='ROC Curves for Multiclass Classification',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    legend_title='Classes',
    template='plotly_white'
)

fig.show()

In [111]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)

import plotly.express as px

fig = px.imshow(confusion_matrix(y_test, y_pred),
                labels=dict(x='Predicted', y='Actual', color='Count'),
                x=[f'Predicted {label}' for label in folder_labels],
                y=[f'Actual {label}' for label in folder_labels],
                title='Confusion Matrix for logistic regression classifier')

fig.show()

In [112]:
from sklearn.metrics import roc_auc_score, precision_score, recall_score

precision = precision_score(y_test, y_pred, average='weighted')
print(f'Precision: {precision:.2f}')

recall = recall_score(y_test, y_pred, average='weighted')
print(f'Recall: {recall:.2f}')

Precision: 0.66
Recall: 0.61


In [113]:
from sklearn.svm import SVC

classifier = SVC(kernel='rbf', random_state=42 , C=10)

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 73.95%


In [114]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)

import plotly.express as px

fig = px.imshow(confusion_matrix(y_test, y_pred),
                labels=dict(x='Predicted', y='Actual', color='Count'),
                x=[f'Predicted {label}' for label in folder_labels],
                y=[f'Actual {label}' for label in folder_labels],
                title='Confusion Matrix for support vector machine classifier')

fig.show()

# ROC AUC Pecesion recall

In [115]:
from sklearn.metrics import roc_auc_score, precision_score, recall_score,roc_curve

precision = precision_score(y_test, y_pred, average='weighted')
print(f'Precision: {precision:.2f}')

recall = recall_score(y_test, y_pred, average='weighted')
print(f'Recall: {recall:.2f}')



Precision: 0.74
Recall: 0.74


In [116]:

classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 67.23%


In [117]:
import pandas as pd

comparison_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
comparison_df.head(10)

Unnamed: 0,Actual,Predicted
0,0,6
1,0,0
2,2,6
3,6,4
4,2,3
5,5,5
6,3,0
7,8,6
8,0,0
9,1,1


In [118]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)

import plotly.express as px

fig = px.imshow(confusion_matrix(y_test, y_pred),
                labels=dict(x='Predicted', y='Actual', color='Count'),
                x=[f'Predicted {label}' for label in folder_labels],
                y=[f'Actual {label}' for label in folder_labels],
                title='Confusion Matrix for random forest classifier')

fig.show()

In [119]:
from sklearn.metrics import roc_auc_score, precision_score, recall_score

precision = precision_score(y_test, y_pred, average='weighted')
print(f'Precision: {precision:.2f}')

recall = recall_score(y_test, y_pred, average='weighted')
print(f'Recall: {recall:.2f}')

Precision: 0.73
Recall: 0.67


In [120]:

print(y_test.shape, y_pred.shape)   



(119,) (119,)
