In [None]:
from google.colab import drive 

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import copy
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import average_precision_score

import tensorflow as tf
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Input, Flatten, Dense, \
                                    GlobalAveragePooling2D, Rescaling, \
                                    AveragePooling2D

# Load Data

In [None]:
DATAPATH = '/content/drive/MyDrive/CS114-ML/data_ndarray'

In [None]:
with open(f'{DATAPATH}/X.npy', 'rb') as f:
    X = np.load(f)
with open(f'{DATAPATH}/y.npy', 'rb') as f:
    y = np.load(f)

# Flatten data

In [None]:
X = np.resize(X, (4050, 16, 16, 3))
X = X / 225.0
d1, d2, d3, d4 = X.shape
X_features = X.reshape((d1, d2 * d3 * d4))
print(X_features.shape)

(4050, 768)


# Cross Validation

In [None]:
kf = KFold(n_splits=5, shuffle=True)
kf

KFold(n_splits=5, random_state=None, shuffle=True)

In [None]:
knn = KNeighborsClassifier()
dcs_tree = DecisionTreeClassifier(random_state=42)
SVC_ = SVC(probability=True, random_state=42)
log_reg = LogisticRegression(random_state=42)
random_forest = RandomForestClassifier(random_state=42)
naive = GaussianNB()

classifiers = [knn, dcs_tree, SVC_, log_reg, random_forest, naive]

In [None]:
df = pd.DataFrame(columns=["classifier", "fold_1", "fold_2", "fold_3", "fold_4", "fold_5", "mWAP", "std"])
ap_results = []

In [None]:
for i in range(len(classifiers)):
    ap_results = []
    print(f">>> {classifiers[i]}")
    for train_index, test_index in kf.split(X_features):
        X_train, X_test = X_features[train_index], X_features[test_index]
        y_train, y_test = y[train_index], y[test_index]

        clf = classifiers[i]

        multi_clf = MultiOutputClassifier(estimator=clf)
        multi_clf.fit(X_train, y_train)

        y_preds = np.array(multi_clf.predict_proba(X_test))
        y_preds = np.around(y_preds[:, :, 1].T, decimals=3)

        weighted_ap = average_precision_score(y_test, y_preds, average='weighted')
        print(f"> {weighted_ap}")
        ap_results.append(weighted_ap)

    mwap = np.round(np.mean(ap_results), 3)
    std = np.round(np.std(ap_results), 3)

    ap_results = np.around(ap_results, decimals=3)

    df.loc[i] = [f"{classifiers[i]}"] + [ap_results[0]] + [ap_results[1]] + [ap_results[2]] + [ap_results[3]] + [ap_results[4]] + [mwap] + [std]
    print(df.loc[[i]])

>>> KNeighborsClassifier()
> 0.4447008826577699
> 0.47174262717587123
> 0.4595733776892476
> 0.4550826540680023
> 0.4730558383794529
               classifier  fold_1  fold_2  fold_3  fold_4  fold_5   mWAP    std
0  KNeighborsClassifier()   0.445   0.472    0.46   0.455   0.473  0.461  0.011
>>> DecisionTreeClassifier(random_state=42)
> 0.23884098943490067
> 0.2418828626659259
> 0.24341542974250696
> 0.2470694704078514
> 0.24056703346160918
                                classifier  fold_1  ...   mWAP    std
1  DecisionTreeClassifier(random_state=42)   0.239  ...  0.242  0.003

[1 rows x 8 columns]
>>> SVC(probability=True, random_state=42)
> 0.5025535227143048
> 0.51782965156902
> 0.4982836710834523
> 0.5383766798259143
> 0.5328021765056523
                               classifier  fold_1  fold_2  ...  fold_5   mWAP    std
2  SVC(probability=True, random_state=42)   0.503   0.518  ...   0.533  0.518  0.016

[1 rows x 8 columns]
>>> LogisticRegression(random_state=42)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

> 0.29729992753995155


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

> 0.29549177939300736


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

> 0.3002303313699825


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

> 0.29825755230030704


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

> 0.2848599463977741
                            classifier  fold_1  fold_2  ...  fold_5   mWAP    std
3  LogisticRegression(random_state=42)   0.297   0.295  ...   0.285  0.295  0.005

[1 rows x 8 columns]
>>> RandomForestClassifier(random_state=42)
> 0.5627823479933542
> 0.5757056752108107
> 0.575085994465009
> 0.5600060068998178
> 0.5781846753478642
                                classifier  fold_1  fold_2  ...  fold_5  mWAP    std
4  RandomForestClassifier(random_state=42)   0.563   0.576  ...   0.578  0.57  0.007

[1 rows x 8 columns]
>>> GaussianNB()
> 0.20154564191051844
> 0.21298648207407006
> 0.19849743041121665
> 0.2094191282172881
> 0.22506625842718153
     classifier  fold_1  fold_2  fold_3  fold_4  fold_5  mWAP    std
5  GaussianNB()   0.202   0.213   0.198   0.209   0.225  0.21  0.009


In [None]:
ap_results

array([0.202, 0.213, 0.198, 0.209, 0.225])

In [None]:
df.to_csv("./raw_classifier.csv")

# Pure training

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=42, shuffle=True)

In [None]:
SVC_clf = SVC(probability=True, random_state=42)

multi_clf = MultiOutputClassifier(estimator=SVC_clf)
multi_clf.fit(X_train, y_train)

y_preds = np.array(multi_clf.predict_proba(X_test))
y_preds = np.around(y_preds[:, :, 1].T, decimals=3)

weighted_ap = average_precision_score(y_test, y_preds, average='weighted')
print(f"> {weighted_ap}")

> 0.8727113054338412


In [None]:
import pickle
model_name = "mobilenet_svc.pkl"
save_path = "/content/drive/MyDrive/CS114-ML/final_model/mobilenet_svc"
with open(os.path.join(save_path, model_name), 'wb') as fid:
    pickle.dump(multi_clf, fid)    