# Loading Google Drive

In [None]:
import os
import pandas as pd
import numpy as np
from google.colab import drive
import pickle
from collections import defaultdict
from tqdm import tqdm
import matplotlib.pyplot as plt


In [None]:
drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/BBMAS_Touchstrokes/')

Mounted at /content/drive


# **Helper Functions**

In [None]:
def pickling(fname, obj):
  f = open(fname, "wb")
  pickle.dump(obj, f)
  f.close()

def unpickling(fname):
  f = open(fname, 'rb')
  g = pickle.load(f) 
  f.close()
  return g

In [None]:
# Function to convert a given time string to linux time
# Input format: 'Y-m-d H:M:S.MS'
def convert_time_to_linux(time_str):
    pos = time_str.rfind(':')
    time_str = "".join((time_str[:pos], '.', time_str[pos+1:])) 
    return pd.to_datetime(time_str).value//10**6

In [None]:
def euclidean_distance(x1, y1, x2, y2):
  return np.sqrt((x1-x2)**2 + (y1 - y2)**2)

In [None]:
def length_of_swipe(swipe):
  d = 0
  for i in range(1,len(swipe)):
    d += euclidean_distance(swipe[i-1]['Xvalue'], swipe[i-1]['Yvalue'], swipe[i]['Xvalue'], swipe[i]['Yvalue'])
  return d

In [None]:
def velocity_of_swipe(swipe):
    displacement = euclidean_distance(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[-1]['Xvalue'], swipe[-1]['Yvalue'])
    time = swipe[-1]['time'] - swipe[0]['time']
    velocity = displacement/time
    return velocity

In [None]:
def area_of_swipe(swipe):
    # Area of each point and averaging it per swipe
    area_per_point = 0
    for i in range(0,len(swipe)-1):
        area_per_point += np.pi * swipe[i]['touchMajor'] * swipe[i]['touchMinor']
    avg_area = area_per_point/len(swipe)
    return avg_area

In [None]:
def median_area_of_swipe(swipe):
    # Area of each point and averaging it per swipe
    area_per_point = []
    for i in range(0,len(swipe)-1):
        area_per_point.append(np.pi * swipe[i]['touchMajor'] * swipe[i]['touchMinor'])
    med_area = np.median(area_per_point)
    return med_area

In [None]:
def number_of_swipes(df):
    # To find swipes count which can be different among users for same task
    swipes = extract_touches(df)
    return len(swipes)

In [None]:
def get_direction(x1, y1, x2, y2):
  if(x2 == x1):
    return (np.pi)/2
  return np.arctan((y2 - y1)/(x2 - x1))

In [None]:
def acceleration_of_swipe(swipe):
  velocity = velocity_of_swipe(swipe)
  if (swipe[-1]['time'] - swipe[0]['time']) == 0:
      return 0
  return velocity/(swipe[-1]['time'] - swipe[0]['time'])

In [None]:
def get_pairwise_velocities_X(swipe):
  v = [0]
  for i in range(1, len(swipe)):
    d = swipe[i]['Xvalue'] - swipe[i-1]['Xvalue']
    t = swipe[i]['time'] - swipe[i-1]['time']
    if(t == 0):
      v.append(v[-1])
      continue
    v.append(d/t)
  return v

def get_pairwise_velocities_Y(swipe):
  v = [0]
  for i in range(1, len(swipe)):
    d = swipe[i]['Yvalue'] - swipe[i-1]['Yvalue']
    t = swipe[i]['time'] - swipe[i-1]['time']
    if(t == 0):
      v.append(v[-1])
      continue
    v.append(d/t)
  return v

In [None]:
def get_pairwise_accelerations(swipe, velocities):
  a = [0]
  for i in range(1,len(velocities)):
     dv = velocities[i] - velocities[i-1]
     dt = swipe[i]['time'] - swipe[i-1]['time']
     if(dt == 0):
       a.append(a[-1])
       continue
     a.append(dv/dt)
  return a

In [None]:
def get_average_acceleration(swipe):
    # Getting acceleration for each point and averaging it per swipe
    avg = 0
    res = 0
    for i in range(1,len(swipe)):
        displacement = euclidean_distance(swipe[i-1]['Xvalue'], swipe[i-1]['Yvalue'], swipe[i]['Xvalue'], swipe[i]['Yvalue'])
        time = swipe[i]['time'] - swipe[i-1]['time']
        if time == 0:
            continue
        res += displacement/(time **2)
    avg = res/len(swipe)
    return avg

In [None]:
def get_initial_acceleration(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.05 * len(swipe)
    displacement = euclidean_distance(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[int(n)]['Xvalue'], swipe[int(n)]['Yvalue'])
    time = swipe[int(n)]['time'] - swipe[0]['time']
    if time == 0:
        return 0
    acc = displacement/(time ** 2)
    return acc


In [None]:
def get_acceleration_percentile_25(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.25 * len(swipe)
    displacement = euclidean_distance(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[int(n)]['Xvalue'], swipe[int(n)]['Yvalue'])
    time = swipe[int(n)]['time'] - swipe[0]['time']
    if time == 0:
        return 0
    acc = displacement/(time ** 2)
    return acc

In [None]:
def get_acceleration_percentile_50(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.5 * len(swipe)
    displacement = euclidean_distance(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[int(n)]['Xvalue'], swipe[int(n)]['Yvalue'])
    time = swipe[int(n)]['time'] - swipe[0]['time']
    if time == 0:
        return 0
    acc = displacement/(time ** 2)
    return acc

In [None]:
def get_acceleration_percentile_75(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.75 * len(swipe)
    displacement = euclidean_distance(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[int(n)]['Xvalue'], swipe[int(n)]['Yvalue'])
    time = swipe[int(n)]['time'] - swipe[0]['time']
    if time == 0:
        return 0
    acc = displacement/(time ** 2)
    return acc

In [None]:
def get_final_acceleration(swipe):
    # Considering final 5 percent of points per swipe
    n = 0.05 * len(swipe)
    displacement = euclidean_distance(swipe[int(-n)]['Xvalue'], swipe[int(-n)]['Yvalue'], swipe[-1]['Xvalue'], swipe[-1]['Yvalue'])
    time = swipe[-1]['time'] - swipe[int(-n)]['time']
    if time == 0:
        return 0
    acc = displacement/(time ** 2)
    return acc

In [None]:
def get_initial_velocity(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.05 * len(swipe)
    displacement = euclidean_distance(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[int(n)]['Xvalue'], swipe[int(n)]['Yvalue'])
    time = swipe[int(n)]['time'] - swipe[0]['time']
    if time == 0:
        return 0
    velocity = displacement/time
    return velocity


In [None]:
def get_velocity_percentile_25(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.25 * len(swipe)
    displacement = euclidean_distance(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[int(n)]['Xvalue'], swipe[int(n)]['Yvalue'])
    time = swipe[int(n)]['time'] - swipe[0]['time']
    if time == 0:
        return 0
    vel = displacement/time
    return vel

In [None]:
def get_velocity_percentile_50(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.5 * len(swipe)
    displacement = euclidean_distance(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[int(n)]['Xvalue'], swipe[int(n)]['Yvalue'])
    time = swipe[int(n)]['time'] - swipe[0]['time']
    if time == 0:
        return 0
    vel = displacement/time
    return vel

In [None]:
def get_velocity_percentile_75(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.75 * len(swipe)
    displacement = euclidean_distance(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[int(n)]['Xvalue'], swipe[int(n)]['Yvalue'])
    time = swipe[int(n)]['time'] - swipe[0]['time']
    if time == 0:
        return 0
    vel = displacement/time
    return vel

In [None]:
def get_final_velocity(swipe):
    # Considering final 5 percent of points per swipe
    n = 0.05 * len(swipe)
    displacement = euclidean_distance(swipe[int(-n)]['Xvalue'], swipe[int(-n)]['Yvalue'], swipe[-1]['Xvalue'], swipe[-1]['Yvalue'])
    time = swipe[-1]['time'] - swipe[int(-n)]['time']
    if time == 0:
        return 0
    velocity = displacement/time
    return velocity

In [None]:
def get_average_velocity(swipe):
    # Getting velocity for each point and averaging it per swipe
    avg = 0
    res = 0
    for i in range(1,len(swipe)):
        displacement = euclidean_distance(swipe[i-1]['Xvalue'], swipe[i-1]['Yvalue'], swipe[i]['Xvalue'], swipe[i]['Yvalue'])
        time = swipe[i]['time'] - swipe[i-1]['time']
        if time == 0:
            continue
        res += displacement/time
    avg = res/len(swipe)
    return avg

In [None]:
def speed_of_swipe(swipe):
    distance = length_of_swipe(swipe)
    time = swipe[-1]['time'] - swipe[0]['time']
    if time == 0:
        return 0
    speed = distance/time
    return speed

In [None]:
def get_final_speed(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.05 * len(swipe)
    distance = 0
    time = 0
    for i in range(1,int(n)+1):
        distance += euclidean_distance(swipe[-i-1]['Xvalue'], swipe[-i-1]['Yvalue'], swipe[-i]['Xvalue'], swipe[-i]['Yvalue'])
        time += swipe[-i]['time'] - swipe[-i-1]['time']
    if time == 0:
        return 0
    speed = distance/time
    return speed 

In [None]:
def get_initial_speed(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.05 * len(swipe)
    distance = 0
    time = 0
    for i in range(1,int(n)+1):
        distance += euclidean_distance(swipe[i-1]['Xvalue'], swipe[i-1]['Yvalue'], swipe[i]['Xvalue'], swipe[i]['Yvalue'])
        time += swipe[i]['time'] - swipe[i-1]['time']
    if time == 0:
        return 0
    speed = distance/time
    return speed 

In [None]:
def get_speed_percentile_25(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.25 * len(swipe)
    distance = 0
    time = 0
    for i in range(1,int(n)+1):
        distance += euclidean_distance(swipe[i-1]['Xvalue'], swipe[i-1]['Yvalue'], swipe[i]['Xvalue'], swipe[i]['Yvalue'])
        time += swipe[i]['time'] - swipe[i-1]['time']
    if time == 0:
        return 0
    speed = distance/time
    return speed 

In [None]:
def get_speed_percentile_50(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.50 * len(swipe)
    distance = 0
    time = 0
    for i in range(1,int(n)+1):
        distance += euclidean_distance(swipe[i-1]['Xvalue'], swipe[i-1]['Yvalue'], swipe[i]['Xvalue'], swipe[i]['Yvalue'])
        time += swipe[i]['time'] - swipe[i-1]['time']
    if time == 0:
        return 0
    speed = distance/time
    return speed 

In [None]:
def get_speed_percentile_75(swipe):
    # Considering inital 5 percent of points per swipe
    n = 0.75 * len(swipe)
    distance = 0
    time = 0
    for i in range(1,int(n)+1):
        distance += euclidean_distance(swipe[i-1]['Xvalue'], swipe[i-1]['Yvalue'], swipe[i]['Xvalue'], swipe[i]['Yvalue'])
        time += swipe[i]['time'] - swipe[i-1]['time']
    if time == 0:
        return 0
    speed = distance/time
    return speed 

In [None]:
def get_deviations(swipe):
  devs = []
  if(swipe[0]['Xvalue'] == swipe[-1]['Xvalue']):
    for i in swipe:
      devs.append(abs(i['Xvalue'] - swipe[0]['Xvalue']))
    return devs
  if(swipe[0]['Yvalue'] == swipe[-1]['Yvalue']):
    for i in swipe:
      devs.append(abs(i['Yvalue'] - swipe[0]['Yvalue']))
    return devs
  p1 = np.array([swipe[0]['Xvalue'], swipe[0]['Yvalue']])
  p2 = np.array([swipe[-1]['Xvalue'], swipe[-1]['Yvalue']])
  for i in swipe:
    p3 = np.array([i['Xvalue'], i['Yvalue']])
    d = np.linalg.norm(np.cross(p2-p1, p1-p3))/np.linalg.norm(p2-p1)
    devs.append(d)
  return devs

# Feature Engineering

In [None]:
def extract_features(swipe, df = None):
  X = {}
  vx = get_pairwise_velocities_X(swipe)
  vy = get_pairwise_velocities_Y(swipe)
  ax = get_pairwise_accelerations(swipe, vx)
  ay = get_pairwise_accelerations(swipe, vy)
  deviations = get_deviations(swipe)
  
  X['duration'] = swipe[-1]['time'] - swipe[0]['time'] #i
  X['startX'] = swipe[0]['Xvalue'] #i
  X['startY'] = swipe[0]['Yvalue'] #i
  X['endX'] = swipe[-1]['Xvalue'] #i
  X['endY'] = swipe[-1]['Yvalue'] #i
  X['displacement'] = euclidean_distance(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[-1]['Xvalue'], swipe[-1]['Yvalue']) #d
  X['length'] = length_of_swipe(swipe) #i
  X['velocity'] = velocity_of_swipe(swipe) #d
  X['initial_velocity'] = get_initial_velocity(swipe) #i
  X['final_velocity'] = get_final_velocity(swipe) #i
  X['avg_velocity'] = get_average_velocity(swipe) #i
  X['direction'] = get_direction(swipe[0]['Xvalue'], swipe[0]['Yvalue'], swipe[-1]['Xvalue'], swipe[-1]['Yvalue']) #d
  X['area'] = area_of_swipe(swipe) #i
  X['acceleration'] = acceleration_of_swipe(swipe) #i
  X['avg_acceleration'] = get_average_acceleration(swipe) #i
  X['initial_acceleration'] = get_initial_acceleration(swipe) #i
  X['final_acceleration'] = get_final_acceleration(swipe) #i
  X['acceleration_percentile_25'] = get_acceleration_percentile_25(swipe) #i
  X['acceleration_percentile_50'] = get_acceleration_percentile_50(swipe) #i
  X['acceleration_percentile_75'] = get_acceleration_percentile_75(swipe) #i
  X['velocity_percentile_25'] = get_velocity_percentile_25(swipe) #i
  X['velocity_percentile_50'] = get_velocity_percentile_50(swipe) #i
  X['velocity_percentile_75'] = get_velocity_percentile_75(swipe) #i
  X['speed'] = speed_of_swipe(swipe) #d
  X['initial_speed'] = get_initial_speed(swipe) #i
  X['final_speed'] = get_final_speed(swipe) #i
  X['speed_percentile_25'] = get_speed_percentile_25(swipe) #i
  X['speed_percentile_50'] = get_speed_percentile_50(swipe) #i
  X['speed_percentile_75'] = get_speed_percentile_75(swipe) #i
  X['avg_vel_x'] = np.mean(vx)
  X['avg_vel_y'] = np.mean(vy)
  X['avg_acc_x'] = np.mean(ax)
  X['avg_acc_y'] = np.mean(ay)
  X['avg_devs'] = np.mean(deviations)
  X['max_devs'] = np.max(deviations)
  #trying
  X['25%_vel_x'] = vx[len(vx)//4]
  X['50%_vel_x'] = vx[len(vx)//2]
  X['75%_vel_x'] = vx[len(vx)*3//4]
  X['25%_vel_y'] = vy[len(vy)//4]
  X['50%_vel_y'] = vy[len(vy)//2]
  X['75%_vel_y'] = vy[len(vy)*3//4]
  X['25%_acc_x'] = ax[len(ax)//4]
  X['50%_acc_x'] = ax[len(ax)//2]
  X['75%_acc_x'] = ax[len(ax)*3//4]
  X['25%_acc_y'] = ay[len(ay)//4]
  X['50%_acc_y'] = ay[len(ay)//2]
  X['75%_acc_y'] = ay[len(ay)*3//4]
  # X['median_area'] = median_area_of_swipe(swipe)
  
  final_X = []
  for i in sorted(list(X.keys())):
    final_X.append(X[i])
  # X['swipe_count'] = number_of_swipes(df)
  # If quadrant feature makes sense ?
  # print(sorted(list(X.keys())))
  # print(final_X)
  return final_X

In [None]:
def extract_touches(df, count):
  swipes = []
  swipe = []

  for index, row in tqdm(df.iterrows()):
    try:
      nan_check = np.isnan(row['time'])
      nan_check = True
    except TypeError as e:
      nan_check = False

    if(not nan_check):
      row['time'] = convert_time_to_linux(row['time'])
      swipe.append(row)

    if(row['actionType'] == 1):
      if(len(swipe) <= 5):
        count += 1
        swipe = []
        continue
      swipes.append(extract_features(swipe))
      swipe = []
      continue
  return swipes, count

In [None]:
!pwd

/content/drive/.shortcut-targets-by-id/1Syra_U__eAe_iKmuKYWf9ykv4tC2_7tW/BBMAS_Touchstrokes


In [None]:
count = 0
result = []
for i in range(1, 117):
    print (i, count)
    df = pd.read_csv('Tablet/User{}.csv'.format(i))
    f, count = extract_touches(df, count)
    result.append(len(f))
print (count)

1 0


4130it [00:01, 3405.18it/s]


2 3


5289it [00:01, 3414.63it/s]


3 8


2788it [00:00, 3277.22it/s]


4 49


3933it [00:01, 3432.69it/s]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-84-79528a6cef9c>", line 6, in <module>
    f, count = extract_touches(df, count)
  File "<ipython-input-82-77f46a4ef978>", line 5, in extract_touches
    for index, row in tqdm(df.iterrows()):
  File "/usr/local/lib/python3.7/dist-packages/tqdm/std.py", line 1185, in __iter__
    for obj in iterable:
  File "/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py", line 1014, in iterrows
    s = klass(v, index=columns, name=k)
  File "/usr/local/lib/python3.7/dist-packages/pandas/core/series.py", line 244, in __init__
    index = ensure_index(index)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 1823, in show

KeyboardInterrupt: ignored

In [None]:
print(result)
print (sum(result))
print(min(result))
print(max(result))
print(sum(result)/len(result))

In [None]:
['acceleration', 'acceleration_percentile_25', 'acceleration_percentile_50', 'acceleration_percentile_75', 'area', 'avg_acc_x', 'avg_acc_y', 'avg_acceleration', 'avg_devs', 'avg_vel_x', 'avg_vel_y', 'avg_velocity', 'direction', 'displacement', 'duration', 'endX', 'endY', 'final_acceleration', 'final_speed', 'final_velocity', 'initial_acceleration', 'initial_speed', 'initial_velocity', 'length', 'max_devs', 'speed', 'speed_percentile_25', 'speed_percentile_50', 'speed_percentile_75', 'startX', 'startY', 'velocity', 'velocity_percentile_25', 'velocity_percentile_50', 'velocity_percentile_75']

In [None]:
f[0]

# Data Preprocessing

In [None]:
X = []
y = []
# extract processed CSVs and store for Phone
data_dir = 'Phone/'
demographs = pd.read_csv('Demographics.csv')
id_map = {}
u = []
for index, row in demographs.iterrows():
  id_map[row['User ID']] = row['Gender']
user_files = os.listdir(data_dir)
for user_file in tqdm(user_files):
    #user_id = user_file.split('.')[0].replace('Copy of User', '')
    if (str(user_file)[0] == 'C'):
        continue
    if (str(user_file).find('(') != -1): 
        continue
    start = 'User'
    end = '.csv'
    user_id = user_file.split(start)[1].split(end)[0]
    df = pd.read_csv("Phone/" + user_file)
    temp_X = extract_touches(df)
    X.extend(temp_X)
    y.extend([id_map[int(user_id)]]*len(temp_X))
    u.extend([user_id]*len(temp_X))

  0%|          | 0/216 [00:00<?, ?it/s]

TypeError: ignored

In [None]:
pickling("features_X.pkl",X)
pickling("features_Y.pkl",y)
pickling("Ids_U.pkl",u)

In [None]:
X = np.array(X)
y = np.array(y)
print (X.shape, y.shape)

(0,) (0,)


In [None]:
X[110]

IndexError: ignored

# Authentication Preprocessing

In [None]:
def create_sliding_window(X, Y, n):
  final_X = []
  final_Y = []
  for i in range(len(X)- n):
    temp = []
    for j in range(i, i + n):
      temp += X[j]
    final_X.append(temp)
    final_Y.append(Y[i+n])
  return final_X, final_Y


In [None]:
X = unpickling("features_X.pkl")
y = unpickling("features_Y.pkl")
u = unpickling("Ids_U.pkl")

def binarize(u, id):
  # print(id)
  # print(u)
  ans = []
  for i in u:
    if int(i) == int(id):
      ans.append(1)
    else:
      ans.append(0)
  return ans

select_user = binarize(u, 2)

# select_user = []
# for i in range(117):
#     val = i+1
#     select_user.append(np.array(binarize(u, val)))
#     break

#print(select_user[0])
print (np.array(X).shape)

(0,)


In [None]:
select_user[0]

IndexError: ignored

In [None]:
## CREATING SLIDING WINDOW
X, y = create_sliding_window(X, select_user, 5)
X = np.array(X)
y = np.array(y)
print (X.shape, y.shape)

(20281, 235) (20281,)


In [None]:
np.unique(y)

array([0, 1])

In [None]:

#New Try:
# X = unpickling("features_X.pkl")
# y = unpickling("features_Y.pkl")
# u = unpickling("Ids_U.pkl")

# def binarize_select(X, u, id):
#   users = {}
#   for i in u:
#     users[i] = []
#   print (len(users))
#   for i in range(len(u)):
#     users[u[i]].append(X[i])
#   legit = users[id]
#   adversary = []
#   for i in users:
#     if i != id:
#       idx = np.random.choice(len(users[i]), 2)
#       adversary.extend(list(np.array(users[i])[idx]))
#   return np.array(legit), np.array(adversary)


# legit, adversary = np.array(binarize_select(X, u, '116'))
# X = np.concatenate((legit, adversary))
# y = np.concatenate((np.ones(legit.shape[0]), np.zeros(adversary.shape[0])))


In [None]:
# Need to try ADASYN as well
X_matrix, y_vector = SMOTE(kind='svm').fit_sample(X, y)
# X_matrix, y_vector = SMOTE(kind='svm').fit_sample(X, select_user)
scaler = preprocessing.StandardScaler()
X_matrix = scaler.fit_transform(X_matrix)

# Split the dataset in two equal parts to remove unseen data

X_train, X_test, y_train, y_test = train_test_split(
X_matrix, y_vector, test_size=0.4, stratify = y_vector, random_state=0)

In [None]:
y_train[0]

1

In [None]:
print(X_train.shape)
print(y_train.shape)

(17456, 235)
(17456,)


# Authentication

In [None]:
X = unpickling("features_X.pkl")
y = unpickling("features_Y.pkl")
u = unpickling("Ids_U.pkl")

In [None]:
np.unique(u)

array(['1', '10', '100', '101', '102', '103', '104', '105', '106', '107',
       '108', '109', '11', '110', '111', '112', '113', '114', '115',
       '116', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20',
       '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30',
       '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40',
       '41', '42', '43', '44', '45', '46', '47', '48', '49', '5', '50',
       '51', '52', '53', '54', '55', '56', '57', '58', '59', '6', '60',
       '61', '62', '63', '64', '65', '66', '67', '68', '69', '7', '70',
       '71', '72', '73', '74', '75', '76', '77', '78', '79', '8', '80',
       '81', '82', '83', '84', '85', '86', '87', '88', '89', '9', '90',
       '91', '92', '93', '94', '95', '96', '97', '98', '99'], dtype='<U3')

In [None]:
def binarize_select(X, u, id):
  users = {}
  for i in u:
    users[i] = []
  print (len(users))
  for i in range(len(u)):
    users[u[i]].append(X[i])
  legit = users[id]
  adversary = []
  for i in users:
    if i != id:
      idx = np.random.choice(len(users[i]), 2)
      adversary.extend(list(np.array(users[i])[idx]))
  return np.array(legit), np.array(adversary)

### Helper functions

In [None]:
legit, adversary = np.array(binarize_select(X, u, '116'))

116


  """Entry point for launching an IPython kernel.


In [None]:
y = np.ones(legit.shape[0])

In [None]:
y = np.concatenate((y, np.zeros(adversary.shape[0])))

In [None]:
X = np.concatenate((legit, adversary))

In [None]:
y.shape

(453,)

In [None]:
X.shape

(453, 47)

In [None]:
y

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 0., 0., 0.

# Population Attack

In [None]:
import random
import numpy as np

In [None]:
def population_attack(X, n = 1):
	us = [np.mean(X[:,i]) for i in range(X.shape[1])]
	ss = [np.std(X[:,i]) for i in range(X.shape[1])]

	swipes = []
	for i in range(n):
		swipe = [us[i] + random.normalvariate(0, 1)*ss[i] for i in range(X.shape[1])]
		swipes.append(swipe)
	return swipes


In [None]:
X = unpickling('features_X.pkl')

In [None]:
pop_data_1000 = population_attack(np.array(X), 1000)

In [None]:
pickling("pop_attack_data_1000.pkl", pop_data_1000)

In [None]:
def attack(pkl, model):
  x = unpickling(pkl)
  y = [0]*len(x)
  y_pred = model.predict(x, y)
  far, frr, hter = HTER(y, y_pred)
  print ("Accuracy:", accuracy_score(y_true, y_pred))
  print ("FAR:", far)
  print ("FRR", frr)
  print ("HTER", hter)

# BTAS Serwadda

In [None]:
def extract_touches(df):
  users = {}
  swipes = []
  swipe = []
  prev_ID = None
  prev_user_ID = -1
  for index, row in tqdm(df.iterrows()):
    # print (len(swipes))
    try:
      nan_check = np.isnan(row['time'])
      nan_check = True
    except TypeError as e:
      nan_check = False
    if prev_user_ID != row['UserID']:
      users[prev_user_ID] = swipes
      swipes = []
      swipe = []
      prev_user_ID = row['UserID']
    if prev_ID != row['SwipeID']:
      if(len(swipe) > 5):
        swipes.append(extract_features(swipe))
        swipe = []
      else:
        swipe = []
      swipe.append(row)
      prev_ID = row['SwipeID']
    else:
      swipe.append(row)
  if(len(swipe) > 5):
    swipes.append(extract_features(swipe))
  users[prev_user_ID] = swipes
  return users

In [None]:
df = pd.read_csv('BTAS2013Serwadda/PortraitSession1.csv')

In [None]:
df['Xvalue'] = df['X']
df['Yvalue'] = df['Y']
df['time'] = df['EventTime']
df['touchMajor'] = df['Area']
df['touchMinor'] = [1/np.pi]*len(df['Area'])

In [None]:
df

Unnamed: 0,UserID,SwipeID,X,Y,Pressure,Area,EventTime,Xvalue,Yvalue,time,touchMajor,touchMinor
0,1,1,366.56250,500.00000,0.7500,0.266667,5294067,366.56250,500.00000,5294067,0.266667,0.31831
1,1,1,374.53125,457.81250,0.7500,0.233333,5294098,374.53125,457.81250,5294098,0.233333,0.31831
2,1,1,382.96875,420.31250,0.7500,0.266667,5294126,382.96875,420.31250,5294126,0.266667,0.31831
3,1,1,385.78125,404.68750,0.7500,0.233333,5294141,385.78125,404.68750,5294141,0.233333,0.31831
4,1,1,390.93750,388.28125,0.7500,0.200000,5294156,390.93750,388.28125,5294156,0.200000,0.31831
...,...,...,...,...,...,...,...,...,...,...,...,...
727164,138,199,229.68750,672.65625,0.6250,0.233333,842659,229.68750,672.65625,842659,0.233333,0.31831
727165,138,199,206.71875,671.87500,0.6250,0.266667,842674,206.71875,671.87500,842674,0.266667,0.31831
727166,138,199,182.81250,675.00000,0.6250,0.200000,842689,182.81250,675.00000,842689,0.200000,0.31831
727167,138,199,160.78125,673.43750,0.6250,0.266667,842704,160.78125,673.43750,842704,0.266667,0.31831


In [None]:
X = extract_touches(df)

727169it [04:25, 2736.71it/s]


In [None]:
X[1]

In [None]:
X = np.asarray(X)

In [None]:
X.shape

In [None]:
pickling('serwadda_features.pkl', X)

In [None]:
f = unpickling('serwadda_features.pkl')

In [None]:
ans = 0
for i in f:
  ans += len(f[i])
print (ans)

37768


# **HMOG**


In [None]:
PATH = '/content/drive/MyDrive/hmog/public_dataset/'

In [None]:
def pickup_user(u):
  #assumed the user id exists and the directory is right
  d = {}
  for i in range(24):
    d[i + 1] = {}
    ss = PATH + u + '/' + u+'_' + 'session' + '_' + str(i+1)
    try:
      d[i+1]['touch'] = pd.read_csv(ss + '/' + 'TouchEvent.csv', header = None, usecols = list(range(11)), names=['time', 'etime', 'actid', 'ptrcnt', 'ptrid', 'actionid', 'Xvalue', 'Yvalue', 'pressure', 'touchMajor', 'orientation'])
      d[i+1]['touch'].insert(2, 'touchMinor', [1/np.pi]*len(d[i + 1]['touch']['touchMajor']), True)
      d[i+1]['touch'] = d[i+1]['touch'].drop(columns = ['etime', 'actid', 'pressure'])
    except:
      pass
  return d

In [None]:
def extract_touches(df):
  swipes = []
  swipe = []
  for index, row in df.iterrows():
    if(row['orientation'] != 0):
      swipe = []
      continue
    if(row['ptrcnt'] != 1):
      swipe = []
      continue
    if(row['actionid'] in [0, 5]):
      swipe.append(row)
      continue
    if(row['actionid'] == 2):
      if(len(swipe) > 0 and row['time'] == swipe[-1]['time']):
        continue
      else:
        swipe.append(row)
    if(row['actionid'] in [1, 6]):
      if len(swipe) == 0:
        continue
      if(row['time'] != swipe[-1]['time']):
        swipe.append(row)
      if(len(swipe) <= 5):
        swipe = []
        continue
      try:
        fset = extract_features(swipe)
        swipes.append(fset)
      except:
        pass
      swipe = []
  return swipes

In [None]:
def get_features(u):
  #input: user id
  u = pickup_user(u)
  # u = unpickling('saved_objects/' + u + '_data.pkl')
  swipes = []
  for ss in range(24):
    try:
      sw = extract_touches(u[ss+1]['touch'])
      swipes.extend(sw)
    except:
      continue
  return swipes

In [None]:
# # CREATING USER ID AND LIST
# from os import listdir

# PATH = '/content/drive/MyDrive/hmog/public_dataset/'
# ll = listdir(PATH)
# fll = []
# for i in ll:
#   if '.zip' in i or '.pdf' in i or 'saved' in i or '__' in i:
#     continue
#   fll.append(i)
# pickling(PATH + 'saved_objects/users.pkl', fll)

In [None]:
# LOADING USERS
users = unpickling(PATH + 'saved_objects/users.pkl')

In [None]:
d = {}
for i in tqdm(users):
  d[i] = get_features(i)

100%|██████████| 100/100 [53:50<00:00, 32.30s/it]


In [None]:
pickling('hmog_features.pkl', d)

In [None]:
a = pickup_user(users[1])

In [None]:
for i in a:
  if 

{1: {'touch':                time  touchMinor  ptrcnt  ...  Yvalue  touchMajor  orientation
  0     1396999272449     0.31831       1  ...   111.0    0.025490            1
  1     1396999272470     0.31831       1  ...   111.0    0.025490            1
  2     1396999272502     0.31831       1  ...   111.0    0.025490            1
  3     1396999272505     0.31831       1  ...   111.0    0.025490            1
  4     1396999278178     0.31831       1  ...    85.0    0.027451            1
  ...             ...         ...     ...  ...     ...         ...          ...
  5497  1396999988955     0.31831       1  ...   103.0    0.033333            1
  5498  1396999988959     0.31831       1  ...   103.0    0.033333            1
  5499  1396999990138     0.31831       1  ...   457.0    0.023529            1
  5500  1396999990142     0.31831       1  ...   457.0    0.025490            1
  5501  1396999990160     0.31831       1  ...   457.0    0.025490            1
  
  [5502 rows x 9 columns]

In [None]:
f = unpickling('hmog_features.pkl')

In [None]:
ans = 0
for i in f:
  ans += len(f[i])
print (ans)

227464


# **UMDAA-02**

In [None]:
cd /content/drive/MyDrive/BBMAS_Touchstrokes/umdaa02-touch/

/content/drive/.shortcut-targets-by-id/0B7efnfTm8DxkeUlETFRkTFlJSkU/umdaa02-touch


In [None]:
!ls

readme.gdoc	    TestEventDictionary_70.csv	TrainEventDictionary_70.csv
readme_touchevents  TestEventDictionary_70.pkl	TrainEventDictionary_70.pkl


In [None]:
df = pd.read_csv('TrainEventDictionary_70.csv')

In [None]:
df2 = pd.read_csv('TestEventDictionary_70.csv')

In [None]:
df = pd.concat([df, df2])

In [None]:
df = df.drop(columns=['SESSION', 'tag', 'eventPressure', 'eventType'])

In [None]:
df = df.rename(columns= {'eventTime': 'time', 'positionX': 'Xvalue', 'positionY': 'Yvalue'})

In [None]:
df['touchMajor'] = [0]*len(df['time'])
df['touchMinor'] = [0]*len(df['time'])

In [None]:
df

Unnamed: 0,index1,index2,time,USER,Xvalue,Yvalue,touchMajor,touchMinor
0,0,0,112632,Ph01USER001,981,976,0,0
1,0,1,112640,Ph01USER001,980,976,0,0
2,0,2,112648,Ph01USER001,976,976,0,0
3,0,3,112657,Ph01USER001,970,976,0,0
4,0,4,112665,Ph01USER001,960,976,0,0
...,...,...,...,...,...,...,...,...
833260,18,1,4188292,Ph10USER004,1485,681,0,0
833261,18,2,4188309,Ph10USER004,1485,679,0,0
833262,18,3,4188326,Ph10USER004,1485,677,0,0
833263,18,4,4188343,Ph10USER004,1485,675,0,0


In [None]:
def extract_swipes(df):
  swipes = defaultdict(lambda : [])
  swipe = []
  prev = -1
  prev_user = None
  for index, row in tqdm(df.iterrows()):
      if prev == row['index1']:
        swipe.append(row)
      else:
        if len(swipe) > 5: 
          swipes[prev_user].append(extract_features(swipe))
        swipe = [row]
        prev_user = row['USER']
        prev = row['index1']
  return swipes

In [None]:
oo = extract_swipes(df)

3587980it [20:17, 2947.54it/s]


In [None]:
oo.keys()

dict_keys(['Ph01USER001', 'Ph01USER002', 'Ph01USER003', 'Ph01USER004', 'Takeout', 'Ph02USER001', 'Ph02USER002', 'Ph02USER003', 'Ph02USER004', 'Ph02USER005', 'Ph03USER001', 'Ph03USER002', 'Ph03USER003', 'Ph04USER001', 'Ph04USER002', 'Ph04USER003', 'Ph04USER004', 'Ph05USER001', 'Ph05USER002', 'Ph05USER003', 'Ph05USER004', 'Ph06USER001', 'Ph06USER002', 'Ph06USER003', 'Ph06USER005', 'Ph08USER001', 'Ph08USER002', 'Ph08USER003', 'Ph09USER001', 'Ph09USER002', 'Ph09USER003', 'Ph09USER004', 'Ph10USER001', 'Ph10USER002', 'Ph10USER003', 'Ph10USER004'])

In [None]:
cd /content/drive/Mydr

/content/drive/.shortcut-targets-by-id/1Syra_U__eAe_iKmuKYWf9ykv4tC2_7tW/BBMAS_Touchstrokes


In [None]:
pickling('umdaa_features.pkl', ooo)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-103-d6e06be4eecc>", line 1, in <module>
    pickling('umdaa_features.pkl', ooo)
  File "<ipython-input-40-75fce63ccefb>", line 2, in pickling
    f = open(fname, "wb")
OSError: [Errno 30] Read-only file system: 'umdaa_features.pkl'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 1823, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'OSError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/ultratb.py", line 1132, in get_records
    return _fixed_getinn

OSError: ignored

In [None]:
f = unpickling('umdaa_features.pkl')