In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
pth = '/kaggle/input/face-expression-recognition-dataset/images/'
classes = os.listdir(pth +'images/train')
num_classes = len(classes)
classes

In [None]:
from fastai.vision import *
np.random.seed(0)
tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.3)

data = ImageDataBunch.from_folder(pth,valid='validation', ds_tfms = tfms, size = 240).normalize(imagenet_stats)
data.show_batch(rows=3, fig_size = (10,10))

In [None]:
import matplotlib.pyplot as plt
classes = os.listdir(pth +'images/train')

train_classes = [len(os.listdir(pth + "/train/" + cls)) for cls in classes]
test_classes = [len(os.listdir(pth + "/validation/" + cls)) for cls in classes]

print("Total training data " + str(sum(train_classes)))
print("Total test data " + str(sum(test_classes)))

train_classes = np.array(train_classes)/sum(train_classes)
test_classes = np.array(test_classes)/sum(test_classes)

plt.hist(train_classes)
plt.hist(test_classes)

plt.show()

#free up RAM
train_classes=None
test_classes = None

In [None]:
learn = cnn_learner(data=data, base_arch=models.densenet201, model_dir='/kaggle/working/models')
learn.apply_dropout(0.4)
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
#learn.load("/kaggle/input/latest/frozen(5)")
lr = 0.01
num_cycle = 100

for _ in range(num_cycle):
    learn.fit_one_cycle(12, slice(lr,lr/12))
    learn.save('/kaggle/working/frozen')

In [None]:
#learn.load('/kaggle/input/latest/frozen(5)')

learn.unfreeze()
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.load('/kaggle/working/unfreeze_final')
lr_unfreeze = 1e-05
num_cycles_uf = 100
for _ in range(num_cycles_uf//15):
    learn.fit_one_cycle(15,slice(lr_unfreeze,lr_unfreeze/10))
    learn.save('/kaggle/working/unfreeze_final')

In [None]:
learn.save('/kaggle/working/unfreeze_final')

In [None]:
learn.load('/kaggle/working/unfreeze_final')

In [None]:
learn.show_results()

In [None]:
#LABELS FIX
from torch import Tensor
from sklearn.preprocessing import OneHotEncoder

def ohe_fastai_lbls(lbls):
    ohe = OneHotEncoder()
    lbls = np.array(lbls).reshape(-1,1)
    ohe.fit(lbls)
    return Tensor(ohe.transform(lbls).toarray())

#Calculate metrics
train_prdctns, train_lbls = learn.get_preds(ds_type = DatasetType.Train)
valid_prdctns, valid_lbls = learn.get_preds()

def ohe_to_lbls(ohe):
    return list(map(lambda x: int(np.argmax(x)),ohe))

valid_prdctns = ohe_to_lbls(valid_prdctns)
train_prdctns = ohe_to_lbls(train_prdctns)

In [None]:
from sklearn.metrics import accuracy_score, f1_score

print("validation accuracy:",accuracy_score(y_true = valid_lbls, y_pred = valid_prdctns))
print("training accuracy:", accuracy_score(y_true = train_lbls, y_pred = train_prdctns))

print("validation f1_score:",f1_score(y_true = valid_lbls, y_pred = valid_prdctns, average='macro'))
print("training f1_score:", f1_score(y_true = train_lbls, y_pred = train_prdctns, average='macro'))

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt     

cm = confusion_matrix(y_true = valid_lbls, y_pred = valid_prdctns)

ax= plt.subplot()
sns.heatmap(cm, annot=False, ax = ax); #annot=True to annotate cells

# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); 
ax.set_title('Confusion Matrix'); 
ax.xaxis.set_ticklabels(data.valid_ds.classes); ax.yaxis.set_ticklabels(data.valid_ds.classes, rotation = 'horizontal');

In [None]:
from torch import Tensor
from sklearn.decomposition import PCA

#format data from 3d arrays to 1d
def frmt_data(data):
    return np.array([np.array(x.data).flatten() for x in data])

train_x =  frmt_data(data.train_ds.x)
valid_x = frmt_data(data.valid_ds.x)


#apply pca 
pca = PCA()
pca.fit(train_x)

#show components vs data loss
plt.figure(1, figsize= (32,8))
plt.plot(pca.explained_variance_, linewidth=2)
plt.xlabel("Components")
plt.ylabel("Explained Variances")
plt.xticks([150])
plt.show()

In [None]:
#apply pca again keeping chosen components 
def pca_with_cmpnnts(n_components, data)
    pca = PCA(n_components = n_components)
    pca.fit(data)
    return pca.transform(data)

n_components = 150
train_x_pca = pca_with_cmpnnts(n_components, train_x)
valid_x_pca = pca_with_cmpnnts(n_components, valid_x)

#format labels
train_y = list(map(lambda x: int(x.__repr__()[-1:]),data.train_ds.y))
valid_y = list(map(lambda x: int(x.__repr__()[-1:]),data.valid_ds.y))

In [None]:
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

models=[]
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(("LR",LogisticRegression()))
models.append(("NB",GaussianNB()))
models.append(("KNN",KNeighborsClassifier(n_neighbors=5)))
models.append(("DT",DecisionTreeClassifier()))
models.append(("SVM",SVC()))

for name, model in models:
    
    clf=model

    clf.fit(train_x_pca, train_y)

    y_pred=clf.predict(valid_x_pca)
    print(10*"=","{} Result".format(name).upper(),10*"=")
    print("Accuracy score:{:0.2f}".format(accuracy_score(valid_y, y_pred)))
    print()