In [None]:
from google.colab import drive 

drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import os
import copy
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import average_precision_score

import tensorflow as tf
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Input, Flatten, Dense, \
                                    GlobalAveragePooling2D, Rescaling, \
                                    AveragePooling2D

# Load Data

In [None]:
DATAPATH = '/content/drive/MyDrive/CS114-ML/data_ndarray'

In [None]:
with open(f'{DATAPATH}/X.npy', 'rb') as f:
    X = np.load(f)
with open(f'{DATAPATH}/y.npy', 'rb') as f:
    y = np.load(f)

# Feature Extraction

## Declare model

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
input_shape = X.shape[1:]
num_classes = y.shape[1]

In [None]:
base_model = MobileNet(include_top=False, weights='imagenet', input_shape=input_shape)
base_model._name = "MobileNet"

In [None]:
model = Sequential([
            Input(shape=input_shape),
            Rescaling(1./255),
            base_model,
            AveragePooling2D(pool_size=(6, 6)),
            Flatten(),
            # Dense(35, activation=('sigmoid'), name='predictions')
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 224, 224, 3)       0         
                                                                 
 MobileNet (Functional)      (None, 7, 7, 1024)        3228864   
                                                                 
 average_pooling2d (AverageP  (None, 1, 1, 1024)       0         
 ooling2D)                                                       
                                                                 
 flatten (Flatten)           (None, 1024)              0         
                                                                 
Total params: 3,228,864
Trainable params: 3,206,976
Non-trainable params: 21,888
_________________________________________________________________


## Feature Extraction

In [None]:
X_features = model.predict(X, verbose=True)



# Cross Validation

In [None]:
kf = KFold(n_splits=5, shuffle=True)
kf

KFold(n_splits=5, random_state=None, shuffle=True)

In [None]:
knn = KNeighborsClassifier()
dcs_tree = DecisionTreeClassifier(random_state=42)
SVC_ = SVC(probability=True, random_state=42)
log_reg = LogisticRegression(random_state=42)
random_forest = RandomForestClassifier(random_state=42)
naive = GaussianNB()

classifiers = [knn, dcs_tree, SVC_, log_reg, random_forest, naive]

In [None]:
df = pd.DataFrame(columns=["classifier", "fold_1", "fold_2", "fold_3", "fold_4", "fold_5", "mWAP", "std"])
ap_results = []

In [None]:
for i in range(len(classifiers)):
    ap_results = []
    print(f">>> {classifiers[i]}")
    for train_index, test_index in kf.split(X_features):
        X_train, X_test = X_features[train_index], X_features[test_index]
        y_train, y_test = y[train_index], y[test_index]

        clf = classifiers[i]

        multi_clf = MultiOutputClassifier(estimator=clf)
        multi_clf.fit(X_train, y_train)

        y_preds = np.array(multi_clf.predict_proba(X_test))
        y_preds = np.around(y_preds[:, :, 1].T, decimals=3)

        weighted_ap = average_precision_score(y_test, y_preds, average='weighted')
        print(f"> {weighted_ap}")
        ap_results.append(weighted_ap)

    mwap = np.round(np.mean(ap_results), 3)
    std = np.round(np.std(ap_results), 3)

    ap_results = np.around(ap_results, decimals=3)

    df.loc[i] = [f"{classifiers[i]}"] + [ap_results[0]] + [ap_results[1]] + [ap_results[2]] + [ap_results[3]] + [ap_results[4]] + [mwap] + [std]
    print(df.loc[[i]])

>>> KNeighborsClassifier()
> 0.7969710743358839
> 0.7981291198989509
> 0.7684773892644767
> 0.7870840652281067
> 0.7859685117970759
               classifier  fold_1  fold_2  fold_3  fold_4  fold_5   mWAP    std
0  KNeighborsClassifier()   0.797   0.798   0.768   0.787   0.786  0.787  0.011
>>> DecisionTreeClassifier(random_state=42)
> 0.40539075440037964
> 0.40138686820612884
> 0.3997256300749574
> 0.42332210190279507
> 0.4359664787656464
                                classifier  fold_1  ...   mWAP    std
1  DecisionTreeClassifier(random_state=42)   0.405  ...  0.413  0.014

[1 rows x 8 columns]
>>> SVC(probability=True, random_state=42)
> 0.8923717039146162
> 0.8794903655791798
> 0.8824951629407523
> 0.8824299243765897
> 0.88135406254021
                               classifier  fold_1  fold_2  ...  fold_5   mWAP    std
2  SVC(probability=True, random_state=42)   0.892   0.879  ...   0.881  0.884  0.005

[1 rows x 8 columns]
>>> LogisticRegression(random_state=42)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

> 0.8143925113144636


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

> 0.8151711469717751


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

> 0.8088488025888775


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

> 0.8199450728406553


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

> 0.803418298759047
                            classifier  fold_1  fold_2  ...  fold_5   mWAP    std
3  LogisticRegression(random_state=42)   0.814   0.815  ...   0.803  0.812  0.006

[1 rows x 8 columns]
>>> RandomForestClassifier(random_state=42)
> 0.8062510035267955
> 0.8049916147437853
> 0.8185911499651243
> 0.8093457631014618
> 0.8241029438530418
                                classifier  fold_1  ...   mWAP    std
4  RandomForestClassifier(random_state=42)   0.806  ...  0.813  0.007

[1 rows x 8 columns]
>>> GaussianNB()
> 0.4784308125592235
> 0.47174129317656477
> 0.49143673217929545
> 0.4708503631083312
> 0.46934450899675634
     classifier  fold_1  fold_2  fold_3  fold_4  fold_5   mWAP    std
5  GaussianNB()   0.478   0.472   0.491   0.471   0.469  0.476  0.008


In [None]:
ap_results

array([0.478, 0.472, 0.491, 0.471, 0.469])

In [None]:
df.to_csv("./classifier.csv")