In [None]:
COLAB = True if 'google.colab' in str(get_ipython()) else False

if COLAB:
    !rm -rf interview
    !git clone https://github.com/lukoshkin/interview.git
    !mv -n interview/CV/* .
    !unzip -nq EyesDataset.zip

In [None]:
if COLAB:
    %matplotlib inline
else:
    %matplotlib notebook

import random
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from sklearn.cluster import KMeans, SpectralClustering
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from IPython.display import clear_output

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as torch_data

from src.model.architectures import SimpleNet, AutoEncoder
from src.model.scoring import LabelSmoothedBCEwLL, ROC_EER
from src.model.utils import train_open_eyes_clf, train_ae
from src.data.loaders import BatchLoader, AEBatchLoader, MRLEyesData
from src.data.utils import mend_labels

# seed = 0
# torch.manual_seed(seed)
# np.random.seed(seed)  # likely sklearn is based on numpy
# random.seed(seed)     # not sure there are python functions
                      # leveraging 'random' lib, just in case

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
! unzip -qn EyesDataset.zip

dset = []
for file in Path('EyesDataset').iterdir():
    img = plt.imread(str(file), )
    dset.append(img)
    
dset = np.array(dset)

# Classifier

In [None]:
class OpenEyesClassifier(SimpleNet):
    """
    EyeDataset Classifier.
    """
    def __init__(self):
        """
        Initializes the model with 'OCE-weights.pth'
        that should be located in the same directory
        where the invocation takes place.
        """
        super().__init__()
        state = torch.load('OEC-weights.pth')
        self.load_state_dict(state)
        self.eval()

    def __call__(self, inplm):
        """
        Alias for self.predict method.
        """
        return self.predict(inplm)

    def predict(self, inplm):
        """
        Parameters:  inplm : str - path to an eye image.
        Returns:     is_open_score : float - classification score
                     (0 - eye is closed, 1 - is open).
        """
        img = torch.Tensor(plt.imread(inplm).copy())[None, None]
        is_open_score = self.forward(img).sigmoid().item()

#         plt.figure()
#         plt.imshow(img[0, 0], cmap='gray')
#         plt.title(f'is_open_score = {is_open_score}')
        return is_open_score

In [None]:
clf = OpenEyesClassifier()
clf.predict('EyesDataset/000048.jpg')

# Third-Party Dataset

In [None]:
! wget -nc http://mrl.cs.vsb.cz/data/eyedataset/mrlEyes_2018_01.zip
! unzip -nq mrlEyes_2018_01.zip
fnames = list(Path('mrlEyes_2018_01').rglob('*.png'))
train_files, test_files = train_test_split(fnames, test_size=.2)

In [None]:
train_ds = MRLEyesData(fnames=train_files)
val_ds = MRLEyesData(fnames=test_files)

num_workers = !lscpu | grep 'CPU(s)' | head -1 | tr -s ' ' | cut -d ' ' -f2
num_workers = int(num_workers[0])

train_bl = torch_data.DataLoader(
    train_ds, batch_size=100, shuffle=True, num_workers=num_workers)
val_bl = torch_data.DataLoader(
    val_ds, batch_size=100, shuffle=True, num_workers=num_workers)

print(sum(train_ds.targets) / len(train_ds))

In [None]:
label_smoothing = False

net = SimpleNet().to(device)
opt = optim.Adam(net.parameters(), lr=3e-3)

criterion = nn.BCEWithLogitsLoss()
if label_smoothing:
    criterion = LabelSmoothedBCEwLL(.2)

scheduler = optim.lr_scheduler.StepLR(opt, 1, gamma=.7)

In [None]:
best_score = float('inf')
best_score, state = train_open_eyes_clf(
    net, criterion, opt, train_bl, val_bl, scheduler, device=device,
    metric=ROC_EER, epochs=10, continue_val_score=best_score)

if state is not None:
    torch.save(state, 'mrl_eyes_weights.pth')

In [None]:
if Path('mrl_eyes_weights.pth').exists():
    net = SimpleNet().to(device)
    net.load_state_dict(torch.load('mrl_eyes_weights.pth'))

In [None]:
X = torch.Tensor(dset)[:, None] / 255
probs = net.predict(X.to(device)).detach().cpu()
mask = ~((probs > .2) & (probs < .8))
print(mask.sum() / len(mask))

X_train, X_test, y_train, y_test = train_test_split(
    X[mask], (probs[mask] > .5).float(), test_size=.2)

In [None]:
label_smoothing = True

opt = optim.Adam(net.parameters(), lr=5e-4)
criterion = LabelSmoothedBCEwLL(.2)
scheduler = optim.lr_scheduler.StepLR(opt, 1, gamma=.8)

train_bl = BatchLoader(X_train, y_train, 40)
val_bl = BatchLoader(X_test, y_test, 100)

In [None]:
best_score, state = train_open_eyes_clf(
    net, criterion, opt, train_bl, val_bl, scheduler, device=device,
    metric=ROC_EER, epochs=10, continue_val_score=best_score)

if state is not None:
    print('Saving the model weights')
    torch.save(state, 'mrl_eyes_weights.pth')

In [None]:
if Path('mrl_eyes_weights.pth').exists():
    net = SimpleNet().to(device)
    net.load_state_dict(torch.load('mrl_eyes_weights.pth'))

In [None]:
probs = net.predict(X_test.to(device)).detach().cpu()
# mend_labels(dset, (probs > .5).long());

# Clustering

In [None]:
X = torch.Tensor(dset)[:, None] / 255
X_train, X_test = train_test_split(X, test_size=.2)

In [None]:
ae = AutoEncoder(code_size=50).to(device)
opt = optim.Adam(ae.parameters(), lr=3e-3)

criterion = nn.MSELoss()
scheduler = optim.lr_scheduler.StepLR(opt, 1, gamma=.8)

train_bl = AEBatchLoader(X_train, 40)
val_bl = AEBatchLoader(X_test, 100)

In [None]:
best_score, state = train_ae(
    ae, criterion, opt, train_bl, val_bl,
    scheduler, device=device, epochs=20)
ae.load_state_dict(state)

In [None]:
dset_flat = dset.reshape(len(dset), -1)
# dset_flat = ae.enc(X).detach().cpu()

kmeans = KMeans(n_clusters=2).fit(dset_flat)
dist = kmeans.transform(dset_flat)

plt.figure()
plt.plot(np.sort(dist[:, 0]));
plt.plot(np.sort(dist[:, 1]));

labels = kmeans.labels_ 
print(labels.sum()/labels.size)

In [None]:
n_samples = 1700
ids1 = np.argsort(dist[:,1] - dist[:,0])[:n_samples]
ids0 = np.argsort(dist[:,0] - dist[:,1])[:n_samples]

plt.figure()
plt.plot(labels[ids1]);
plt.plot(labels[ids0]);

In [None]:
# dset_trunc = np.concatenate((dset[ids0], dset[ids1]))
# labels_trunc = np.r_[np.zeros(len(ids0)), np.ones(len(ids1))]

# X = dset_trunc[:, None]
# y = labels_trunc

X = dset[:, None]
y = labels

X_train, X_test, y_train, y_test = map(torch.Tensor,
    train_test_split(X, y, test_size=.2))

In [None]:
label_smoothing = True

net = SimpleNet().to(device)
opt = optim.Adam(net.parameters(), lr=3e-3)

criterion = nn.BCEWithLogitsLoss()
if label_smoothing:
    criterion = LabelSmoothedBCEwLL(.2)

scheduler = optim.lr_scheduler.StepLR(opt, 1, gamma=.8)
train_bl = BatchLoader(X_train, y_train, 40)
val_bl = BatchLoader(X_test, y_test, 100)

In [None]:
best_score, state = train_open_eyes_clf(
    net, criterion, opt, train_bl, val_bl,
    device=device, metric=None, epochs=10)

net.load_state_dict(state)

In [None]:
probs = net.predict(X_test.to(device)).detach().cpu()
# mend_labels(X_test.squeeze(1), (probs > .5).long());