In [39]:
import numpy as np
import tensorflow as tf
import os,shutil
import numpy as np
import cv2
import pickle
import pandas as pd

from tqdm import tqdm_notebook
from matplotlib import pyplot as plt

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score

In [117]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, BatchNormalization, MaxPooling2D, Dropout, Activation
from keras.initializers import glorot_normal
from keras.optimizers import Adam
from keras.losses import binary_crossentropy
from keras.utils import to_categorical

In [16]:
cur_dir = os.getcwd()

dataset_dir = os.path.join(cur_dir,"Dataset")

labels_dir = os.path.join(dataset_dir,"labels")
frames_dir = os.path.join(dataset_dir,"frames")

n_lecs = len(os.listdir(labels_dir))

listOfLectures = []

for lec in sorted(os.listdir(frames_dir)):
    if lec.startswith('.'):
        continue
    frame_list = []
    lec_path = os.path.join(frames_dir,lec)
    for frame in sorted(os.listdir(lec_path)):
        frame_list.append([os.path.join(lec_path,frame),])
    listOfLectures.append(frame_list)


lec_num = 0
for csv in sorted(os.listdir(labels_dir)):
    csv_path = os.path.join(labels_dir,csv)
    with open(csv_path,'r') as input:
        data = input.read()
        data = data.split()
        for i in range(len(data)):
            listOfLectures[lec_num][i].append(int(data[i]))
    lec_num+=1

Window size is the size of window on either side. Using window_size = 2 will cause 5 frames to be stacked for one input example

In [17]:
def load_data_fast(listOfLectures, resolution, window_size):
    X_train = []
    Y_train = []
    for i in tqdm_notebook(range(len(listOfLectures))):
        im_saved = []
        for j in tqdm_notebook(range(window_size,len(listOfLectures[i])-window_size)):
            if j == window_size:
                im_saved = [cv2.resize(cv2.imread(listOfLectures[i][j+k][0], 0), resolution) 
                       for k in range(-window_size, window_size+1)]
            else:
                ims = im_saved[1:]
                ims.append(cv2.resize(cv2.imread(listOfLectures[i][j+window_size][0], 0), resolution))
                im_saved = ims
            x = np.stack(im_saved,axis=2)
            X_train.append(x)
            Y_train.append(listOfLectures[i][j][1])
    return X_train, Y_train

In [None]:
resolution = (60,80)
window_size = 1
X, Y = load_data_fast(listOfLectures, resolution, window_size)

In [None]:
X_train, Y_train = load_data_fast(listOfLectures, (60, 80))

In [7]:
X_train = np.stack(X_train)

In [38]:
with open("./XY_train_80_60_5.pkl", "rb") as pickle_in:
    X_train, Y_train = pickle.load(pickle_in)

In [40]:
Y_train = to_categorical(Y_train)

In [41]:
print(Y_train.shape)
print(X_train.shape)

(28997, 2)
(28997, 80, 60, 5)


In [42]:
X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size=0.1, random_state=42, stratify=Y_train)

In [43]:
print(X_train.shape)
print(X_test.shape)

(26097, 80, 60, 5)
(2900, 80, 60, 5)


In [76]:
np.sum(Y_train[:, 0])

25117.0

In [81]:
from sklearn.utils import resample
X_train_pos = X_train[Y_train[:, 1] == 1]
Y_train_pos = Y_train[Y_train[:, 1] == 1]
X_train_ups, Y_train_ups =  resample(X_train_pos, Y_train_pos, 
                                 replace=True,     # sample with replacement
                                 n_samples=25117,    # to match majority class
                                 random_state=123)

In [90]:
X_train_final = np.concatenate((X_train[Y_train[:, 1] == 0], X_train_ups), axis=0)
Y_train_final = np.concatenate((Y_train[Y_train[:, 1] == 0], Y_train_ups), axis=0)

In [91]:
print(X_train_final.shape)
print(Y_train_final.shape)

(50234, 80, 60, 5)
(50234, 2)


In [92]:
num_epochs = 5
num_classes = 2
# batch_size = 100
learning_rate = 0.001

In [119]:
def gen_model():
    model = Sequential()
    model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(80,60,5), padding='valid'))
    model.add(Conv2D(64, kernel_size=3, activation='relu', padding='valid'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid'))
    
    model.add(Conv2D(32, kernel_size=3, activation='relu'))
    model.add(Conv2D(32, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid'))
    
    model.add(Flatten())
    model.add(Dense(2, activation='softmax'))
    return model

In [120]:
model = gen_model()

In [121]:
optim = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, amsgrad=True)

In [122]:
model.compile(loss='binary_crossentropy', optimizer=optim, metrics=['accuracy'])

In [123]:
history = model.fit(X_train_final, Y_train_final, epochs=5, verbose=1, validation_data=(X_test, Y_test))

Train on 50234 samples, validate on 2900 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f2ca209e1d0>

In [124]:
preds = model.predict(X_test)
preds = np.argmax(preds, axis=1)

In [125]:
print(classification_report(np.argmax(Y_test, axis=1), preds))
print(f1_score(np.argmax(Y_test, axis=1), preds))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99      2791
           1       0.59      0.88      0.70       109

   micro avg       0.97      0.97      0.97      2900
   macro avg       0.79      0.93      0.84      2900
weighted avg       0.98      0.97      0.97      2900

0.7032967032967032


In [116]:
model.save('f1_0.8.h5')