In [2]:
# ---------------------------------------------------------------------------------------------------------------
# Task 2 and 3 skeleton
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
import warnings

import skimage
from skimage.io import imread
from skimage.transform import resize


# Suppress all warnings
warnings.simplefilter("ignore")

def load_image_files(container_path, dimension=(30, 30)):
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "Your own dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            img = skimage.io.imread(file)
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
            flat_data.append(img_resized.flatten())
            images.append(img_resized)
            target.append(i)
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    # return in the exact same format as the built-in datasets
    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)




In [3]:
#1) Data Acquisition------------------------

image_dataset = load_image_files("images/")

In [4]:
#2) Data sampling------------------------

'''Split data, but randomly allocate to training/test sets'''
X_train, X_test, y_train, y_test = train_test_split(image_dataset.data, image_dataset.target, test_size=0.5, random_state=42)

In [5]:
# 3)Exploratory Data Analysis-------------------------------
#skip for now


In [6]:
#------------------------------------------------------------------------------------------
#4) Data scaling( good for Distance and Gradient Descent based Problems)
#initialise the scalers
StandardScaler_scaler= StandardScaler()
MinMaxScaler_scaler=MinMaxScaler()

#fit and transform X_train & transform the X_test
X_train_standardScaled=StandardScaler_scaler.fit_transform(X_train)
#X_train_minmaxScaled= MinMaxScaler_scaler.fit_transform(X_train)

X_test_standardScaled=StandardScaler_scaler.transform(X_test)
#X_test_minmaxScaled=MinMaxScaler_scaler.transform(X_test)




In [7]:
#------------------------------------------------------------------------------------------
#5) Feature selection and extraction 
#skip for now

In [8]:
#------------------------------------------------------------------------------------------
#6) Initialize model
# instantiate classifier with default hyperparameters
svmModel=svm.SVC(gamma='auto')

mlpModel = MLPClassifier()



In [9]:
#------------------------------------------------------------------------------------------
#7) Train model
tune_param =[
                    {'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],'C': [0.01,1,10]},
                    {'kernel': ['linear'], 'C': [0.01,1,10]},
                    {'kernel': ['poly'], 'C': [0.01,1,10]}
            ]

mlp_parameters = {
    'hidden_layer_sizes': [(32,32,32),(28,28,28),(16,16,16)],
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'solver': ['adam', 'sgd'],
    'random_state': [42],                # Random state
    'early_stopping': [True, False],      # Early stopping
    'alpha':  [0.0001,0.001,0.01,1,10]
}
# Define weighted precision as the scoring metric
grid_search=GridSearchCV(svmModel,tune_param,cv=10,scoring='precision_weighted')
mlp_grid_search=GridSearchCV(mlpModel,mlp_parameters,cv=10,scoring='precision_weighted')
#MinMaxScaler_gridSearchCV=grid_search.fit(X_test_minmaxScaled,y_train)
StandardScaler_gridSearchCV=grid_search.fit(X_train_standardScaled,y_train)
StandardScaler_mlp_gridSearchCV=mlp_grid_search.fit(X_train_standardScaled,y_train)

# examine the best model
# best score achieved during the GridSearchCV
print('GridSearch CV best score for SVM : {:.4f}\n'.format(StandardScaler_gridSearchCV.best_score_))
print('GridSearch CV best score for MLP : {:.4f}\n'.format(StandardScaler_mlp_gridSearchCV.best_score_))
# print parameters that give the best results
print('Parameters that give the best results for SVM :','\n', (StandardScaler_gridSearchCV.best_params_))
print('Parameters that give the best results for MLP:','\n', (StandardScaler_mlp_gridSearchCV.best_params_))

# print estimator that was chosen by the GridSearch
print('\n\nEstimator that was chosen by the search for SVM :','\n', (StandardScaler_gridSearchCV.best_estimator_))
print('\n\nEstimator that was chosen by the search for MLP:','\n', (StandardScaler_mlp_gridSearchCV.best_estimator_))


print("Evaluation for Standard Scaler for SVM\n")
means =StandardScaler_gridSearchCV.cv_results_['mean_test_score']
stds = StandardScaler_gridSearchCV.cv_results_['std_test_score']
params_list = StandardScaler_gridSearchCV.cv_results_['params']
# for mean, std, p in zip(means, stds, params_list):
#     print(f"{mean:.3f} (+/-{std * 2:.03f}) for {p}")
# print("\n")

print("Evaluation for Standard Scaler for MLP\n")
means2 =StandardScaler_mlp_gridSearchCV.cv_results_['mean_test_score']
stds2 = StandardScaler_mlp_gridSearchCV.cv_results_['std_test_score']
params_list2 = StandardScaler_mlp_gridSearchCV.cv_results_['params']
# for mean2, std2, p2 in zip(means2, stds2, params_list2):
#     print(f"{mean2:.3f} (+/-{std2 * 2:.03f}) for {p2}")
# print("\n")




GridSearch CV best score for SVM : 0.6322

GridSearch CV best score for MLP : 0.6640

Parameters that give the best results for SVM : 
 {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
Parameters that give the best results for MLP: 
 {'activation': 'relu', 'alpha': 10, 'early_stopping': True, 'hidden_layer_sizes': (16, 16, 16), 'random_state': 42, 'solver': 'adam'}


Estimator that was chosen by the search for SVM : 
 SVC(C=1, gamma=0.001)


Estimator that was chosen by the search for MLP: 
 MLPClassifier(alpha=10, early_stopping=True, hidden_layer_sizes=(16, 16, 16),
              random_state=42)
Evaluation for Standard Scaler for SVM

Evaluation for Standard Scaler for MLP



In [10]:
#------------------------------------------------------------------------------------------
#9) Evaluate model
best_model = StandardScaler_gridSearchCV.best_estimator_

y_pred = best_model.predict(X_test_standardScaled)
print("Classification Report for test data for SVM:")
print(classification_report(y_test, y_pred, target_names=image_dataset.target_names))

best_model2 = StandardScaler_mlp_gridSearchCV.best_estimator_

y_pred2 = best_model.predict(X_test_standardScaled)
print("Classification Report for test data for MLP:")
print(classification_report(y_test, y_pred2, target_names=image_dataset.target_names))

""" kernel_means = {'rbf': [], 'poly': [], 'linear': []}
for mean, std, params in zip(means, stds, params_list):
    kernel_means[params['kernel']].append(mean)

    
rbf_mean = np.mean(kernel_means['rbf'])
print(rbf_mean)
poly_mean = np.mean(kernel_means['poly'])
print(poly_mean)
linear_mean = np.mean(kernel_means['linear'])
print(linear_mean)


# Plot the bar graph
bars = plt.bar(['rbf', 'poly', 'linear'], [rbf_mean, poly_mean, linear_mean],color=['blue','red','yellow'])

# Add text labels for each bar
for bar, mean in zip(bars, [rbf_mean, poly_mean, linear_mean]):
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01, f'{mean:.2f}', ha='center', va='bottom')
    
plt.title("Mean Precision Score per Kernel")
plt.xlabel("Kernel")
plt.ylabel("Mean Precision Score")
plt.show() """

Classification Report for test data for SVM:
              precision    recall  f1-score   support

        cats       0.55      0.73      0.63        51
        dogs       0.58      0.39      0.46        49

    accuracy                           0.56       100
   macro avg       0.56      0.56      0.55       100
weighted avg       0.56      0.56      0.55       100

Classification Report for test data for MLP:
              precision    recall  f1-score   support

        cats       0.55      0.73      0.63        51
        dogs       0.58      0.39      0.46        49

    accuracy                           0.56       100
   macro avg       0.56      0.56      0.55       100
weighted avg       0.56      0.56      0.55       100



' kernel_means = {\'rbf\': [], \'poly\': [], \'linear\': []}\nfor mean, std, params in zip(means, stds, params_list):\n    kernel_means[params[\'kernel\']].append(mean)\n\n    \nrbf_mean = np.mean(kernel_means[\'rbf\'])\nprint(rbf_mean)\npoly_mean = np.mean(kernel_means[\'poly\'])\nprint(poly_mean)\nlinear_mean = np.mean(kernel_means[\'linear\'])\nprint(linear_mean)\n\n\n# Plot the bar graph\nbars = plt.bar([\'rbf\', \'poly\', \'linear\'], [rbf_mean, poly_mean, linear_mean],color=[\'blue\',\'red\',\'yellow\'])\n\n# Add text labels for each bar\nfor bar, mean in zip(bars, [rbf_mean, poly_mean, linear_mean]):\n    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01, f\'{mean:.2f}\', ha=\'center\', va=\'bottom\')\n    \nplt.title("Mean Precision Score per Kernel")\nplt.xlabel("Kernel")\nplt.ylabel("Mean Precision Score")\nplt.show() '

In [11]:
#below here I wa