In [None]:
%matplotlib inline
import matplotlib as mpl
from matplotlib import pyplot as plt
import os
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        # Memory growth must be set at program startup
        print(e)

from tensorflow.python.client import device_lib

print(tf.__version__)
print(device_lib.list_local_devices())
print(os.cpu_count())

data_dir = './data'
output_dir = './output'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
#display(HTML('<style>.prompt{width: 0px; min-width: 0px; visibility: collapse}</style>'))

res = 224
n_of_sampled_images = 36
n_to_select = 4
batch_size = 2

In [None]:
import glob
import os

frames = sorted(glob.glob(os.path.join(data_dir, '*/*/frames/frames_*.jpg')))

total_size = 0
for frame in frames:
    total_size += os.stat(frame).st_size
    
print(total_size)

split_frames = [frame.split('/') for frame in frames]

sign = [split_frame[2] for split_frame in split_frames]

user = [split_frame[3].split('_')[1] for split_frame in split_frames]

attempt = [split_frame[3].split('_')[2] for split_frame in split_frames]

frame_num = [split_frame[5].split('_')[1].split('.')[0] for split_frame in split_frames]

class Counter(dict):
    def __missing__(self, key):
        return 0

attempts = Counter()

class Accumulator(dict):
    def __missing__(self, key):
        return []
    
class Mapper(dict):
    def __init__(self):
        self.counter = 0
    def __missing__(self, key):
        self.counter += 1
        return self.counter - 1

videos = Accumulator()
sign_ids = Mapper()

for frame, s, u, a in zip(frames, sign, user, attempt):
    attempts[s + '_' + u + '_' + a] += 1
    videos[s + '_' + u + '_' + a] += [frame]
    sign_ids[s] += 0
    

print(sign_ids.counter)
print(len(attempts))
    
s = 0
mn = 2 ** 30
mx = 0
for k, v in attempts.items():
    s += v
    mn = min(mn, v)
    mx = max(mx, v)
    
s /= len(attempts)

print(mn, s, mx)

In [None]:
import numpy as np
import cv2
import imageio
from multiprocessing import Pool

def process_video(kv):
    k, v = kv
    indices = set((np.linspace(1, attempts[k], n_of_sampled_images) + 0.5).astype('int64').tolist())
    vid = []
    vd = []
    label = sign_ids[k.split('_')[0]]
    print(k, label, len(v), indices)
    for j, frame in enumerate(v):
        if j + 1 in indices:
            vid.append(frame)
            im = imageio.imread(frame)[92: -92, 512: -512]
            n_im = cv2.resize(im, (224, 224), interpolation = cv2.INTER_CUBIC)
            vd.append(n_im)
            #plt.imshow(n_im)
            #plt.show()
            #imageio.imwrite(os.path.join(output_dir, k + '_' + str(j) + '_' + str(i + 1) + '.jpg'), n_im, quality = 95)
    return vid, vd, label

vids = []
vds = []
labels = []
with Pool() as pool:
    for i, (vid, vd, label) in enumerate(pool.imap(process_video, list(videos.items()))):
        print(i)
        vids.append(vid)
        vds.append(vd)
        labels.append(label)
    
videos_np = np.zeros([len(videos), n_of_sampled_images, res, res, 3], np.uint8)
labels_np = np.zeros([len(videos)], np.int64)
for i, (video, label) in enumerate(zip(vds, labels)):
    for j, image in enumerate(video):
        videos_np[i, j, :, :, :] = image
    labels_np[i] = label
np.save('videos', videos_np)
np.save('labels', labels_np)

In [None]:
from os.path import join
import numpy as np
dataset_dir = '.'

def load_SIGN():
    videos = np.load(join(dataset_dir, 'videos.npy'), mmap_mode = 'r')
    labels = np.load(join(dataset_dir, 'labels.npy'), mmap_mode = 'r')
    assert videos.shape[0] == labels.shape[0]
    perm = np.random.permutation(videos.shape[0])
    videos = videos[perm]
    labels = labels[perm]
    N = videos.shape[0] * 9 // 10
    return (videos[: N], labels[: N]), (videos[N:], labels[N:])

tr_data, val_data = load_SIGN()
print(tr_data[0].shape, tr_data[1].shape)
print(val_data[0].shape, val_data[1].shape)
print(tr_data[0].dtype, tr_data[1].dtype)

plt.imshow(tr_data[0][52, 18])
print(tr_data[1][52])
plt.show()

In [None]:
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

def input_fn(data, batch_size, num_epochs = 1, seed = None):
    assert len(data[0]) == len(data[1])

    def load_and_preprocess_video(video, label):
        def load_frame(frame):
            img_raw = tf.read_file(frame)
            img_tensor = tf.image.decode_jpeg(img_raw)
            return img_tensor
        
        video = tf.map_fn(load_frame, video, tf.uint8)
        video = tf.image.crop_to_bounding_box(video, 92, 512, res * 4, res * 4)
        video = tf.image.resize_bicubic(video, (res, res))
        return (video, label)
            
    data = tf.data.Dataset.from_tensor_slices(data).map(load_and_preprocess_video, 8)
    data = data.repeat(num_epochs).shuffle(100).batch(batch_size).prefetch(2)
    return data

def dataset_input_fn(data, batch_size, seed = None):
    assert data[0].shape[0] == data[1].shape[0]

    def generator():
        np.random.seed(seed)
        perm = np.random.permutation(data[0].shape[0])
        for i in range(0, data[0].shape[0], batch_size):
            ids = perm[i: i + batch_size]
            yield data[0][ids], data[1][ids]
    
    def format_input(video, label):
        video = tf.cast(video, tf.float32)
        video = preprocess_input(video)
        return video, label
    
    dataset = tf.data.Dataset.from_generator(generator, (tf.uint8, tf.int64)).map(format_input, tf.data.experimental.AUTOTUNE).prefetch(1)
    return dataset

tr_dataset = dataset_input_fn(tr_data, batch_size)
val_dataset = dataset_input_fn(val_data, batch_size)

for batch_id, (video_batch, label_batch) in enumerate(tr_dataset.take(3)):
    print(batch_id, video_batch.shape, label_batch)
    
for batch_id, (video_batch, label_batch) in enumerate(val_dataset.take(3)):
    print(batch_id, video_batch.shape, label_batch)

In [None]:
for input_batch, label_batch in tr_dataset.take(1):
    pass

from tensorflow.keras import backend as K
class MyModel(tf.keras.Model):
    def __init__(self, k):
        super(MyModel, self).__init__()
        self.k = k
        
    def build(self, input_shape):
        self.n_to_select_from = input_shape[-2]
        self.d1 = tf.keras.layers.Dense(100, activation = 'relu')
        self.flatten = tf.keras.layers.Flatten()
        self.d2 = tf.keras.layers.Dense(self.n_to_select_from * self.k)
        self.reshape = tf.keras.layers.Reshape([self.k, self.n_to_select_from], input_shape = [self.n_to_select_from * self.k])
    
    def call(self, inputs, temp):        
        x = self.d1(inputs)
        x = self.flatten(x)
        x = self.d2(x)
        logits = self.reshape(x)
        
        def samples():
            uniform = K.random_uniform(shape = [self.k, self.n_to_select_from], minval = np.finfo(tf.float32.as_numpy_dtype).tiny, maxval = 1.0)
            gumbel = -K.log(-K.log(uniform))
            noisy_logits = (logits + gumbel) / K.mean(temp)
            samples = tf.keras.activations.softmax(noisy_logits)
            return samples
        
        def discrete_logits():
            discrete_logits = K.one_hot(K.argmax(logits, axis = -1), self.n_to_select_from)
            return discrete_logits
        
        selections = K.in_train_phase(samples, discrete_logits)
        outputs = tf.einsum('ijh,ikj->ikh', inputs, selections)
        return outputs

    
batch = tf.keras.Input([n_of_sampled_images, res, res, 3])
temp = tf.keras.Input([], dtype = 'float32')
x = tf.keras.layers.Lambda(lambda x: tf.reshape(x, [-1, res, res, 3]))(batch)
base_model = tf.keras.applications.MobileNetV2(input_shape = (res, res, 3), include_top = False, weights = 'imagenet')
base_model.trainable = False
x = base_model(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Lambda(lambda x: tf.reshape(x, [-1, n_of_sampled_images, 1280]))(x)
x = MyModel(n_to_select)(inputs = x, temp = temp)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(320, activation = 'relu')(x)
x = tf.keras.layers.Dense(sign_ids.counter)(x)
x = tf.keras.layers.Softmax()(x)
model = tf.keras.Model([batch, temp], x)

learning_rate = 0.0005
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate), loss = 'sparse_categorical_crossentropy', metrics = ['sparse_categorical_accuracy'])
print(K.argmax(model(input_batch)))
print(base_model.summary())
print(model.summary())

In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(0.0005), loss = 'sparse_categorical_crossentropy', metrics = ['sparse_categorical_accuracy'])

In [None]:
model.evaluate(val_dataset)

In [None]:
history = model.fit(tr_dataset, epochs = 10, validation_data = val_dataset)

In [None]:
from __future__ import absolute_import, division, print_function   
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer, Lambda, Input, Dense, Dropout, Conv1D, MaxPool1D, Softmax, GlobalAveragePooling2D, Reshape, Flatten, Multiply
from tensorflow.keras import Input, Model
from tensorflow.keras.applications import MobileNetV2

class Sample_Concrete(Layer):
    """
    Layer for sample Concrete / Gumbel-Softmax variables. 
    """
    def __init__(self, temp, k, **kwargs): 
        self.temp = temp
        self.k = k
        super(Sample_Concrete, self).__init__(**kwargs)
    
    def call(self, logits):
        # logits: [batch_size, d, k]
        batch_size = K.shape(logits)[0]
        d = K.shape(logits)[1]
        logits_ = Reshape([-1, d])(logits)# [batch_size, k, d]
        unif_shape = [batch_size, self.k, d]
       
        uniform = K.random_uniform(unif_shape, np.finfo(tf.float32.as_numpy_dtype).tiny, 1.0)
        gumbel = -K.log(-K.log(uniform))
        noisy_logits = (gumbel + logits_) / self.temp
        samples = K.softmax(noisy_logits)
        samples = K.max(samples, axis = 1)
        print(samples)
        #logits = K.reshape(logits,[-1, d])
        #threshold = K.expand_dims(tf.math.top_k(logits, self.k, sorted = True)[0][:,-1], -1)
        #discrete_logits = K.cast(K.greater_equal(logits,threshold),tf.float32)
        
        output = samples
        #output = K.in_train_phase(samples, discrete_logits) 
        return tf.expand_dims(output, -1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1], 1)

for video_batch, label_batch in tr_dataset.take(1):
    pass

batch = Input([n_of_sampled_images, res, res, 3])
x = Lambda(lambda x: K.reshape(x, [-1, res, res, 3]))(batch)
base_model = MobileNetV2(input_shape = (res, res, 3), include_top = False, weights = 'imagenet')
base_model.trainable = True
for layer in base_model.layers[: 100]:
    layer.trainable = False
x = base_model(x)
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation = tf.nn.leaky_relu)(x)
x = Lambda(lambda x: K.reshape(x, [-1, n_of_sampled_images, 256]))(x)
logits = Flatten()(x)
logits = Dense(n_of_sampled_images * n_to_select)(logits)
logits = Reshape([-1, n_to_select])(logits)
logits = Sample_Concrete(0.5, n_to_select)(logits)
x = Multiply()([x, logits]) # x.shape = [-1, n_of_sampled_images, 256]
x = Flatten()(x)
x = Dense(sign_ids.counter, activation = tf.nn.leaky_relu)(x)
'''
logits = Conv1D(256, 5, activation = tf.nn.leaky_relu)(x) # logits.shape = [-1, n_of_sampled_images - 4, 256]
logits = Conv1D(256, 5, activation = tf.nn.leaky_relu)(logits) # logits.shape = [-1, n_of_sampled_images - 8, 256]
logits = Flatten()(logits) #logits.shape = [-1, (n_of_sampled_images - 8) * 256]
#logits = Dropout(0.5)(logits)
logits = Dense(n_of_sampled_images, activation = tf.nn.leaky_relu)(logits) # logits.shape = [-1, n_of_sampled_images]
logits = Sample_Concrete(0.5, n_to_select)(logits)
#x = Multiply()([x, logits]) # x.shape = [-1, n_of_sampled_images, 1280]
x = Conv1D(256, 5, activation = tf.nn.leaky_relu)(x) # 
x = Conv1D(256, 5, activation = tf.nn.leaky_relu)(x)
x = MaxPool1D()(x)
x = Conv1D(256, 5, activation = tf.nn.leaky_relu)(x)
x = Conv1D(256, 5, activation = tf.nn.leaky_relu)(x)
x = MaxPool1D()(x)
#x = Dropout(0.5)(x)
x = Conv1D(256, 3, activation = tf.nn.leaky_relu)(x)
x = Conv1D(sign_ids.counter, 1, activation = tf.nn.leaky_relu)(x)
x = Flatten()(x)
'''
x = Softmax()(x)

model = Model(batch, x)

model.compile(optimizer = 'rmsprop', loss = tf.losses.SparseCategoricalCrossentropy(), metrics = [tf.metrics.SparseCategoricalAccuracy()] + [tf.metrics.SparseTopKCategoricalAccuracy(i + 1, 'top_{}_accuracy'.format(i + 1)) for i in range(3)])
print(K.argmax(model(video_batch)))
print(base_model.summary())
print(model.summary())

In [None]:
#model.evaluate(val_dataset)
model.predict(val_dataset)

In [None]:
history = model.fit(tr_dataset, epochs = 50, validation_data = val_dataset)

In [None]:
K.set_learning_phase(0)
functor = K.function([model.input], [model.get_layer('sample__concrete_1').output])

for video_batch, label_batch in val_dataset.take(3):
    print([batch for batch in functor([video_batch])])

In [None]:
tf.keras.utils.plot_model(model, 'mini_resnet.png', show_shapes=True)