In [None]:
## Preprocessing for LSTM: Blobdetection and Interpolation

In [None]:
%matplotlib inline

from scipy.odr import *
from scipy.stats import *
import numpy as np
import pandas as pd
import os
import time
import matplotlib.pyplot as plt
import ast
from multiprocessing import Pool, cpu_count

import scipy

from IPython import display
from matplotlib.patches import Rectangle

from sklearn.metrics import mean_squared_error
import json

import scipy.stats as st
from sklearn.metrics import r2_score


from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt

import copy

from sklearn.model_selection import LeaveOneOut, LeavePOut

from multiprocessing import Pool
import cv2

In [None]:
import math

class Vector2:

    x = 0
    y = 0

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __add__(self, other):
        return Vector2(self.x + other.x, self.y + other.y)

    def __radd__(self, other):
        return Vector2(other.x + self.x, other.y + self.y)

    def __sub__(self, other):
        return Vector2(self.x - other.x, self.y - other.y)

    def __mul__(self, other):
        return Vector2(self.x * other, self.y * other)

    def __rmul__(self, other):
        return Vector2(other * self.x, other * self.y)

    def __truediv__(self, other):
        return Vector2(self.x / other, self.y / other)

    def __str__(self):
        return "(" + str(self.x) + "," + str(self.y) + ")"

    def __eq__(self, other):
        if isinstance(other, self.__class__):
            return self.x == other.x and self.y == other.y
        return False
    
    def __lt__(self, other):
        if self.y > other.y:
            return False
        elif self.y < other.y:
            return True
        return self.x < other.x
    
    def __le__(self, other):
        if self.y > other.y:
            return False
        elif self.y < other.y:
            return True
        return self.x <= other.x
    
    def __gt__(self, other):
        if self.y < other.y:
            return False
        elif self.y > other.y:
            return True
        return self.x > other.x
    
    def __ge__(self, other):
        if self.y < other.y:
            return False
        elif self.y > other.y:
            return True
        return self.x >= other.x
        
    def __hash__(self):
        return hash((self.x, self.y))

    def magnitude(self):
        return math.sqrt(self.x * self.x + self.y * self.y)

    @staticmethod
    def zero():
        return Vector2(0, 0)

    @staticmethod
    def normalize(vector):
        return vector / vector.magnitude()

    @staticmethod
    def dot(a, b):
        return a.x * b.x + a.y * b.y
    
    @staticmethod
    def cross(a, b):
        return (a.x * b.y) - (a.y * b.x)

    @staticmethod
    def distance(a, b):
        return Vector2(a.x - b.x, a.y - b.y).magnitude()

    @staticmethod
    def angle(a, b):
        if Vector2.cross(a, b) < 0:
            return -1.0 * np.arccos(Vector2.dot(a, b) / (a.magnitude() * b.magnitude()))
        return np.arccos(Vector2.dot(a, b) / (a.magnitude() * b.magnitude()))

In [None]:
df_filtered = pd.read_pickle("PklData/dfFiltered_LSTM.pkl")

In [None]:
df_filtered.head()

In [None]:
df_filtered.Image = df_filtered.Image.apply(lambda x: x.reshape(27, 15))
df_filtered.Image = df_filtered.Image.apply(lambda x: x.clip(min=0, max=255))
df_filtered.Image = df_filtered.Image.apply(lambda x: x.astype(np.uint8))
df_filtered["ImageSum"] = df_filtered.Image.apply(lambda x: np.sum(x))

In [None]:
#LSTMs new Blob detection (only detect, if there are blobs)
def detect_blobs(image):
    #image = image.reshape(27, 15)
    large = np.ones((29,17), dtype=np.uint8)
    large[1:28,1:16] = image
    temp, thresh = cv2.threshold(cv2.bitwise_not(large), 200, 255, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]
    lstBlob  = []
    lstMin = []
    lstMax = []
    count = 0
    return len(contours) > 0

In [None]:
%%time
pool = Pool(cpu_count() - 1)
temp_blobs = pool.map(detect_blobs, df_filtered.Image)
pool.close()

In [None]:
df_filtered["ContainsBlobs"] = temp_blobs

In [None]:
#Label if knuckle or finger
def f(row):
    if row['TaskID'] < 17:
        #val = "Knuckle"
        val = 0
    elif row['TaskID'] >= 17:
        #val = "Finger"
        val = 1
    return val
df_filtered['InputMethod'] = df_filtered.apply(f, axis=1)

In [None]:
# Problem: some timestamps are strings (XXXXE+XXXX) which is not accurate enough, switching to index instead
"""def cast_to_int(x):
    if type(x) == int:
        return x
    x = str(x).replace(",", ".")
    return int(float(x))

df_filtered.Timestamp = df_filtered.Timestamp.map(cast_to_int)"""
df_filtered.index = range(len(df_filtered))

In [None]:
%%time
# trim image sequences down to only between first and last detected blob
UserIDs = []
TaskIDs = []
VersionIDs = []
Blobs = []
for userID in df_filtered.userID.unique():
    print(userID)
    for TaskID in df_filtered[df_filtered.userID == userID].TaskID.unique():
        for VersionID in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID)].VersionID.unique():
            first_blob = -1
            last_blob = -1
            for index, row in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID)].iterrows():
                if row.ContainsBlobs:
                    last_blob = index
                    if first_blob == -1:
                        first_blob = index
            if first_blob >= 0 and last_blob >= 0:
                UserIDs.append(userID)
                TaskIDs.append(TaskID)
                VersionIDs.append(VersionID)
                Blobs.append(df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID) & (df_filtered.index >= first_blob) & (df_filtered.index <= last_blob)].Image.tolist())

In [None]:
UserIDs = np.array(UserIDs, dtype=np.int64)
TaskIDs = np.array(TaskIDs, dtype=np.int64)
VersionIDs = np.array(VersionIDs, dtype=np.int64)

In [None]:
df_lstm_all = pd.DataFrame()
df_lstm_all["userID"] = UserIDs
df_lstm_all["TaskID"] = TaskIDs
df_lstm_all["VersionID"] = VersionIDs
df_lstm_all["Blobs"] = Blobs
df_lstm_all.Blobs = df_lstm_all.Blobs.map(np.array)
df_lstm_all.head()

In [None]:
lengths = []
for index, row in df_lstm_all.iterrows():
    lengths.append(row.Blobs.shape[0])
df_lstm_all["BlobCount"] = lengths
# add a column for pure gesture recognition without finger/knuckle
df_lstm_all["GestureOnly"] = df_lstm_all.TaskID % 17

In [None]:
df_lstm_all.BlobCount.describe()

In [None]:
df_lstm_all.groupby(df_lstm_all.GestureOnly)["BlobCount"].agg(["mean", "std"])

In [None]:
# filter on gesture lengths
print("before: %s" % len(df_lstm_all))
df_lstm = df_lstm_all[(df_lstm_all.BlobCount <= 100) & (df_lstm_all.BlobCount >= 5)]
print("after: %s" % len(df_lstm))
print("ratio: %s" % ((len(df_lstm_all) - len(df_lstm)) / len(df_lstm_all) * 100))

In [None]:
df_lstm.BlobCount.describe()

In [None]:
lengths = []
for index, row in df_lstm.iterrows():
    lengths.append(row.Blobs.shape[0])

In [None]:
df_lstm["BlobCount"] = lengths

In [None]:
df_lstm.to_pickle('PklData/df_lstm2.pkl')

In [None]:
df_lstm.BlobCount.describe()

In [None]:
def lerp(a, b, c=0.5):
    return c * b + (1.0 - c) * a

#Svens new Blob detection
def detect_blobs_return(image, task):
    #image = e.Image
    large = np.ones((29,17), dtype=np.uint8)
    large[1:28,1:16] = np.copy(image)
    temp, thresh = cv2.threshold(cv2.bitwise_not(large), 205, 255, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]
    lstBlob  = []
    lstCenter = []
    lstMin = []
    lstMax = []
    count = 0
    contours.sort(key=lambda a: cv2.contourArea(a))
    if len(contours) > 0:
        # if two finger or knuckle
        cont_count = 2 if task in [1, 6, 7, 18, 23, 24] and len(contours) > 1 else 1
        for i in range(1, cont_count + 1):
            max_contour = contours[-1 * i]
            xmax, ymax = np.max(max_contour.reshape(len(max_contour),2), axis=0)
            xmin, ymin = np.min(max_contour.reshape(len(max_contour),2), axis=0)
            M = cv2.moments(max_contour)
            cX = int(M["m10"] / M["m00"]) - 1
            cY = int(M["m01"] / M["m00"]) - 1
            #croped_im = np.zeros((27,15))
            blob = large[max(ymin - 1, 0):min(ymax + 1, large.shape[0]),max(xmin - 1, 0):min(xmax + 1, large.shape[1])]
            #croped_im[0:blob.shape[0],0:blob.shape[1]] = blob
            #return (1, [croped_im])
            lstBlob.append(blob)
            lstCenter.append((cY, cX))
            lstMin.append(xmax-xmin)
            lstMax.append(ymax-ymin)
            count = count + 1
        return (count, lstBlob, lstCenter)
    else:
        return (0, [np.zeros((29, 19))], 0, 0)

In [None]:
# detects, which blob in the first image corresponds to which blob in the second image
# (depending on the distances between blobs)
def create_blob_tuples(im_a, im_b, task_id):
    blob_det_a = detect_blobs_return(im_a, task_id)
    blob_det_b = detect_blobs_return(im_b, task_id)
    blob_tuples = []
    if blob_det_a[0] == blob_det_b[0] == 2:
        M = cv2.moments(blob_det_a[1][0])
        cX_a_0 = int(M["m01"] / M["m00"])
        cY_a_0 = int(M["m10"] / M["m00"])
        M = cv2.moments(blob_det_a[1][1])
        cX_a_1 = int(M["m01"] / M["m00"])
        cY_a_1 = int(M["m10"] / M["m00"])
        M = cv2.moments(blob_det_b[1][0])
        cX_b_0 = int(M["m01"] / M["m00"])
        cY_b_0 = int(M["m10"] / M["m00"])
        M = cv2.moments(blob_det_b[1][1])
        cX_b_1 = int(M["m01"] / M["m00"])
        cY_b_1 = int(M["m10"] / M["m00"])
        dist_1 = Vector2.magnitude(Vector2(cX_a_0 - cX_b_0, cY_a_0 - cY_b_0))
        dist_2 = Vector2.magnitude(Vector2(cX_a_0 - cX_b_1, cY_a_0 - cY_b_1))
        if(dist_1 <= dist_2):
            blob_tuples.append((blob_det_a[1][0], blob_det_b[1][0], blob_det_a[2][0], blob_det_b[2][0]))
            blob_tuples.append((blob_det_a[1][1], blob_det_b[1][1], blob_det_a[2][1], blob_det_b[2][1]))
        else:
            blob_tuples.append((blob_det_a[1][0], blob_det_b[1][1], blob_det_a[2][0], blob_det_b[2][1]))
            blob_tuples.append((blob_det_a[1][1], blob_det_b[1][0], blob_det_a[2][1], blob_det_b[2][0]))
    elif blob_det_a[0] == 1 and blob_det_b[0] == 2:
        M = cv2.moments(blob_det_a[1][0])
        cX_a_0 = int(M["m01"] / M["m00"])
        cY_a_0 = int(M["m10"] / M["m00"])
        M = cv2.moments(blob_det_b[1][0])
        cX_b_0 = int(M["m01"] / M["m00"])
        cY_b_0 = int(M["m10"] / M["m00"])
        M = cv2.moments(blob_det_b[1][1])
        cX_b_1 = int(M["m01"] / M["m00"])
        cY_b_1 = int(M["m10"] / M["m00"])
        dist_1 = Vector2.magnitude(Vector2(cX_a_0 - cX_b_0, cY_a_0 - cY_b_0))
        dist_2 = Vector2.magnitude(Vector2(cX_a_0 - cX_b_1, cY_a_0 - cY_b_1))
        if(dist_1 <= dist_2):
            blob_tuples.append((blob_det_a[1][0], blob_det_b[1][0], blob_det_a[2][0], blob_det_b[2][0]))
            blob_tuples.append((blob_det_b[1][1], blob_det_b[1][1], blob_det_b[2][1], blob_det_b[2][1]))
        else:
            blob_tuples.append((blob_det_a[1][0], blob_det_b[1][1], blob_det_a[2][0], blob_det_b[2][1]))
            blob_tuples.append((blob_det_b[1][0], blob_det_b[1][0], blob_det_b[2][0], blob_det_b[2][0]))
    elif blob_det_a[0] == 2 and blob_det_b[0] == 1:
        M = cv2.moments(blob_det_a[1][0])
        cX_a_0 = int(M["m01"] / M["m00"])
        cY_a_0 = int(M["m10"] / M["m00"])
        M = cv2.moments(blob_det_a[1][1])
        cX_a_1 = int(M["m01"] / M["m00"])
        cY_a_1 = int(M["m10"] / M["m00"])
        M = cv2.moments(blob_det_b[1][0])
        cX_b_0 = int(M["m01"] / M["m00"])
        cY_b_0 = int(M["m10"] / M["m00"])
        dist_1 = Vector2.magnitude(Vector2(cX_a_0 - cX_b_0, cY_a_0 - cY_b_0))
        dist_2 = Vector2.magnitude(Vector2(cX_a_1 - cX_b_0, cY_a_1 - cY_b_0))
        if(dist_1 <= dist_2):
            blob_tuples.append((blob_det_a[1][0], blob_det_b[1][0], blob_det_a[2][0], blob_det_b[2][0]))
            blob_tuples.append((blob_det_a[1][1], blob_det_a[1][1], blob_det_a[2][1], blob_det_a[2][1]))
        else:
            blob_tuples.append((blob_det_a[1][1], blob_det_b[1][0], blob_det_a[2][1], blob_det_b[2][0]))
            blob_tuples.append((blob_det_a[1][0], blob_det_a[1][0], blob_det_a[2][0], blob_det_a[2][0]))
    elif blob_det_a[0] == 0:
        if blob_det_b[0] == 1:
            blob_tuples.append((blob_det_b[1][0], blob_det_b[1][0], blob_det_b[2][0], blob_det_b[2][0]))
        elif blob_det_b[0] == 2:
            blob_tuples.append((blob_det_b[1][0], blob_det_b[1][0], blob_det_b[2][0], blob_det_b[2][0]))
            blob_tuples.append((blob_det_b[1][1], blob_det_b[1][1], blob_det_b[2][1], blob_det_b[2][1]))
    elif blob_det_b[0] == 0:
        if blob_det_a[0] == 1:
            blob_tuples.append((blob_det_a[1][0], blob_det_a[1][0], blob_det_a[2][0], blob_det_a[2][0]))
        elif blob_det_b[0] == 2:
            blob_tuples.append((blob_det_a[1][0], blob_det_a[1][0], blob_det_a[2][0], blob_det_a[2][0]))
            blob_tuples.append((blob_det_a[1][1], blob_det_a[1][1], blob_det_a[2][1], blob_det_a[2][1]))
    else:
        blob_tuples.append((blob_det_a[1][0], blob_det_b[1][0], blob_det_a[2][0], blob_det_b[2][0]))
        
    return blob_tuples

In [None]:
# first brings blobs to same shape through adding zeros
# then interpolates linearly between the two blobs and the two centroids
# finaly places the interpolated blob at the interpolated centroid position on a black image
def interpolate_images(im_a, im_b, task_id, factor=0.5):
    
    new_img = np.zeros((27, 15))
    blob_tuples = create_blob_tuples(im_a, im_b, task_id)
    
    for blob_a, blob_b, pos_a, pos_b in blob_tuples:
        #pad blobs to same size with zeros
        pad_a_0 = blob_b.shape[0] - blob_a.shape[0]
        if pad_a_0 > 0:
            blob_a = np.append(blob_a, np.zeros((pad_a_0, blob_a.shape[1])), axis=0)
        pad_a_1 = blob_b.shape[1] - blob_a.shape[1]
        if pad_a_1 > 0:
            blob_a = np.append(blob_a, np.zeros((blob_a.shape[0], pad_a_1)), axis=1)
        pad_b_0 = blob_a.shape[0] - blob_b.shape[0]
        if pad_b_0 > 0:
            blob_b = np.append(blob_b, np.zeros((pad_b_0, blob_b.shape[1])), axis=0)
        pad_b_1 = blob_a.shape[1] - blob_b.shape[1]
        if pad_b_1 > 0:
            blob_b = np.append(blob_b, np.zeros((blob_b.shape[0], pad_b_1)), axis=1)
        im_lerp = lerp(blob_a, blob_b, factor)
        M = cv2.moments(im_lerp)
        cY = int(M["m10"] / M["m00"])
        cX = int(M["m01"] / M["m00"])
        #print("a: %s" % str(blob_a.shape))
        #print("b: %s" % str(blob_b.shape))
        blob_pos_x = int(lerp(pos_a[0], pos_b[0], factor))
        blob_pos_y = int(lerp(pos_a[1], pos_b[1], factor))
        pos_x_0 = blob_pos_x - cX
        pos_x_1 = blob_pos_x - cX + im_lerp.shape[0]
        pos_y_0 = blob_pos_y - cY
        pos_y_1 = blob_pos_y - cY + im_lerp.shape[1]
        if pos_x_0 < 0:
            pos_x_1 += abs(pos_x_0)
            pos_x_0 = 0
        if pos_x_1 >= 27:
            pos_x_0 += 26 - pos_x_1
            pos_x_1 = 26
        if pos_y_0 < 0:
            pos_y_1 += abs(pos_y_0)
            pos_y_0 = 0
        if pos_y_1 >= 15:
            pos_y_0 += 14 - pos_y_1
            pos_y_1 = 14
                    
        new_img[pos_x_0:pos_x_1, pos_y_0:pos_y_1] = im_lerp
    return new_img

# only for testing
for i in range(0, df_lstm.iloc[58].Blobs.shape[0] - 1):
    im_a = df_lstm.iloc[58].Blobs[i]
    im_b = df_lstm.iloc[58].Blobs[i + 1]
    im_c = interpolate_images(im_a, im_b, 3)
    #print(type(im_c))

In [None]:
# determines the positions, where to interpolate between images
def stretch_images(cur_images, task_id, target_length=50):
    new_images = []
    factor = (cur_images.shape[0] - 1) / (target_length - 1)
    
    for i in range(0, target_length):
        lower = int(i * factor)
        higher = lower if lower == i * factor else lower + 1
        if higher >= cur_images.shape[0]:
            higher = cur_images.shape[0] - 1
        local_factor = (i * factor) - int(i * factor)
        #print("%s: val: %s, lower: %s, higher: %s, local: %s" % (i, i * factor, lower, higher, local_factor))
        new_img = interpolate_images(cur_images[lower], cur_images[higher], local_factor)
        new_images.append(new_img)
    return np.array(new_images)
        
#print(df_lstm.iloc[0].shape)
#res = stretch_images(df_lstm.iloc[0].Blobs, df_lstm.iloc[0].TaskID, 30)
#res.shape

In [None]:
# descides whether or not a normalization is neccessary
def normalize_blobs(blobs, task_id, new_len=50):
    new_count = new_len - blobs.shape[0]
    if new_count == 0:
        return blobs
    else:
        return stretch_images(blobs, task_id, new_len)

In [None]:
%%time
# normalizes all image sequences
df_lstm_norm = df_lstm.copy(deep=True)
new_blobs = []
for index, row in df_lstm.iterrows():
    new_blobs.append(normalize_blobs(row.Blobs, row.TaskID, 50))

df_lstm_norm.Blobs = new_blobs

lengths = []
for index, row in df_lstm_norm.iterrows():
    lengths.append(row.Blobs.shape[0])
df_lstm_norm["BlobCount"] = lengths

In [None]:
df_lstm_norm.BlobCount.describe()

In [None]:
df_lstm_norm.to_pickle("PklData/df_lstm_norm50_100.pkl")

In [None]:
df_lstm_norm.head()

In [None]:
print("finished")