In [1]:
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from behalearn.preprocessing import columns
from behalearn.authentication import authentication_metrics
from behalearn.authentication import authentication_results
from behalearn.estimators import VotingClassifier
from behalearn.features import FeatureExtractor
from behalearn.features import temporal
from behalearn.metrics import fmr_score
from behalearn.metrics import fnmr_score
from behalearn.metrics import hter_score
from behalearn.preprocessing import StartEndSegmentSplitter
from behalearn.preprocessing import SegmentSplitter
from behalearn.preprocessing.segment import criteria
from behalearn.visualization import initialize_notebook_output
from behalearn.visualization import label_touches
from behalearn.visualization import plot_fmr_fnmr
from behalearn.visualization import visualize_mobile_data
from behalearn.visualization import visualize_mouse_data
from behalearn.visualization import visualize_custom_data

In [2]:
segment_column = 'segment'
user_column = 'user'
user_name_column = 'username'
user_to_visualize = 3
compute_features_for_segment = True
columns_to_identificate_features = ['id', 'pattern_id','device','scenario', user_name_column, user_column]

In [3]:
path = '../login_datasets/2019-01-08_FIIT_-2-poschodie_po_skuske_KPAIS_correct_patterns_only/'
touch_data = pd.read_csv(path + 'touch.csv', sep=',')
acc_data = pd.read_csv(path + 'linear_accelerometer.csv', sep=',')
gyro_data = pd.read_csv(path + 'gyroscope.csv', sep=',')
touch_data
touch_data["id"] = touch_data["pattern_id"].astype(str) + touch_data["device"]
acc_data["id"] = acc_data["pattern_id"].astype(str) + acc_data["device"]
gyro_data["id"] = gyro_data["pattern_id"].astype(str) + gyro_data["device"]

if compute_features_for_segment:
    touch_data["id"] += touch_data['segment'].astype(str)
    acc_data["id"] += acc_data['segment'].astype(str)
    gyro_data["id"] += gyro_data['segment'].astype(str)
    columns_to_identificate_features.append(segment_column)
else:
    touch_data.drop('segment', axis=1, inplace=True)
    acc_data.drop('segment', axis=1, inplace=True)
    gyro_data.drop('segment', axis=1, inplace=True)

In [None]:
touch_data.head()

In [5]:
le = LabelEncoder()
le.fit(pd.concat([touch_data['id'], acc_data['id'], gyro_data['id']]))
touch_data[user_column] = le.transform(touch_data['id'])
acc_data[user_column] = le.transform(acc_data['id'])
gyro_data[user_column] = le.transform(gyro_data['id'])

In [None]:
touch_data.head()

In [6]:
def get_columns_combinations(col_names,combinations = None):
    combs = columns._get_column_combinations(col_names, combinations)
    final_combs=[]
    for comb in combs:
        if len(comb) > 1:
            final_combs.append(comb)
    return final_combs

In [7]:
def calculate_maginute_to_df(df, columns_name):
    final_combinations = []
    for combination in get_columns_combinations(columns_name):
        sum = [0]*len(df)
        for dimension in combination:
            sum += df[dimension] ** 2
        magnitude=sum **(1/2)
        new_column='_'.join(combination)
        df[new_column]=magnitude
        final_combinations.append(new_column)
        
    return final_combinations

In [8]:
def compute_features(df, features, prefix = None):
    extractor = FeatureExtractor(features, [user_column])

    features_df = extractor.fit_transform(df)
    features_df = features_df.replace([np.inf, -np.inf], np.nan).dropna()
    
    if prefix is not None:
        features_df.columns = features_df.columns.str.replace(r"velocity", prefix)

    return features_df

In [9]:
def renaming_condition(x, columns_name, prefix):
    if x in columns_name:
        return prefix + x
    return x


def add_prefix_to_columns(df, columns_name, prefix):
    df.columns = [renaming_condition(col, columns_name, prefix) for col in df.columns]

    return [prefix + s for s in columns_name]

In [10]:
def compute_statistics(df, columns_to_compute_statistic, prefix):
    columns_to_compute_statistic += calculate_maginute_to_df(df, columns_to_compute_statistic)
    columns_to_compute_statistic = add_prefix_to_columns(df, columns_to_compute_statistic, prefix)

    statistics = df.groupby([user_column])[columns_to_compute_statistic].describe()
    statistics.columns = statistics.columns.to_flat_index()
    statistics.rename(columns='_'.join, inplace=True)
    statistics = statistics[statistics.columns.drop(list(statistics.filter(regex='count')))]
    
    return statistics

In [11]:
def normalize_columns_names(df):
    df.columns = df.columns.str.replace(r"25%", "_lower_q")
    df.columns = df.columns.str.replace(r"50%", "_median")
    df.columns = df.columns.str.replace(r"75%", "_upper_q")
    df.columns = df.columns.str.replace(r"__", "_")

In [12]:
features = [
    'duration',
    ('length', {
        'columns': ['x', 'y'],
    }),
    ('start', {
        'columns': ['x', 'y'],
    }),
    ('velocity', {
        'columns': ['x', 'y'],
    }),
    ('acceleration', {
        'columns': ['x', 'y'],
    }),
    ('jerk', {
        'columns': ['x', 'y'],
    }),
    ('angular_velocity', {
        'columns': ['x', 'y'],
    }),
    ('angular_acceleration', {
        'columns': ['x', 'y'],
    }),
]
print(time.strftime("%Y-%m-%d %H:%M"))
touch_features = compute_features(touch_data, features)
touch_features = touch_features.merge(touch_data[columns_to_identificate_features], on=[user_column]).drop_duplicates()
features = [
    ('velocity', {
        'columns': ['x', 'y', 'z'],
    }),
]
print(time.strftime("%Y-%m-%d %H:%M"))

acc_features = compute_features(acc_data, features, "accelerometer_jerk")
acc_statistics_from_raw_data = compute_statistics(acc_data,['x','y','z'],"accelerometer_")
acc_features = acc_features.merge(acc_statistics_from_raw_data, on='user', how='inner').drop_duplicates()
print(time.strftime("%Y-%m-%d %H:%M"))

gyro_features = compute_features(gyro_data, features, "gyro_jerk")
gyro_statistics_from_raw_data = compute_statistics(gyro_data,['x','y','z'],"gyro_")
gyro_features = gyro_features.merge(gyro_statistics_from_raw_data, on='user', how='inner').drop_duplicates()
print(time.strftime("%Y-%m-%d %H:%M"))

all_features = touch_features.merge(acc_features, on='user', how='inner').merge(gyro_features, on='user', how='inner')
normalize_columns_names(all_features)

2020-10-12 12:52
2020-10-12 13:00
2020-10-12 13:04
2020-10-12 13:09


In [None]:
print(touch_features)
print(acc_features)
print(gyro_features)
print(all_features)

In [22]:
features_path = "../login_features/" + ("segments" if compute_features_for_segment else "paterns") + "/"
touch_features.to_csv(features_path + "touch_feautures.csv", encoding='utf-8', index=False)
acc_features.to_csv(features_path + "/acc_feautures.csv", encoding='utf-8', index=False)
gyro_features.to_csv(features_path + "/gyro_feautures.csv", encoding='utf-8', index=False)
all_features.to_csv(features_path + "/all_feautures.csv", encoding='utf-8', index=False)

In [14]:
len(touch_data['user'].unique())

2685

In [13]:
all_features

Unnamed: 0,user,duration,length,start_x,start_y,velocity_x_mean,velocity_x_std,velocity_x_min,velocity_x_max,velocity_x_abs_min,...,gyro_y_z_median,gyro_y_z_upper_q,gyro_y_z_max,gyro_x_y_z_mean,gyro_x_y_z_std,gyro_x_y_z_min,gyro_x_y_z_lower_q,gyro_x_y_z_median,gyro_x_y_z_upper_q,gyro_x_y_z_max
0,0,1.914000e+09,1953.448304,291.484070,273.423280,2.059684e-08,6.293817e-07,-1.611801e-06,0.000001,0.0,...,0.279530,0.425458,0.764908,0.425006,0.185709,0.029498,0.285405,0.410518,0.551356,0.929948
1,1,1.575000e+09,1875.100193,311.456330,285.414980,-1.435685e-08,7.851818e-07,-1.632002e-06,0.000002,0.0,...,0.284676,0.443941,1.541725,0.573025,0.389474,0.046580,0.312189,0.460535,0.756365,2.263248
2,2,5.739999e+08,783.814266,127.711520,88.551700,5.998682e-07,5.976641e-07,-6.462221e-07,0.000001,0.0,...,0.228385,0.306700,0.483323,0.262840,0.102345,0.072232,0.176461,0.269539,0.327641,0.483553
3,3,4.010002e+08,754.005772,131.705960,93.548220,8.908311e-07,1.192497e-06,-5.875831e-07,0.000003,0.0,...,0.248920,0.369822,0.683190,0.552535,0.273762,0.126262,0.323754,0.577645,0.737675,1.113737
4,4,3.899999e+08,842.420105,80.776700,85.553770,1.169427e-06,1.374986e-06,-4.715962e-07,0.000004,0.0,...,0.171603,0.247182,0.593592,0.281890,0.161749,0.082268,0.173068,0.223527,0.340505,0.796646
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2523,2680,4.100001e+08,732.438572,113.730930,99.544070,1.132385e-06,1.026150e-06,0.000000e+00,0.000003,0.0,...,0.133703,0.191458,0.248938,0.185137,0.074143,0.044164,0.123506,0.186727,0.245160,0.301092
2524,2681,2.034000e+09,1904.710461,281.497920,271.424680,8.736028e-09,5.945933e-07,-1.274080e-06,0.000001,0.0,...,0.275025,0.379883,0.817026,0.355778,0.188340,0.052777,0.215953,0.324144,0.443239,0.960478
2525,2682,5.909998e+08,855.132522,101.747574,88.551700,6.394731e-07,5.979873e-07,-8.279983e-07,0.000002,0.0,...,0.250844,0.333195,0.788714,0.316916,0.159154,0.054409,0.204718,0.281668,0.405611,0.809738
2526,2683,3.910001e+08,702.758724,160.665740,78.558655,7.712831e-07,9.866407e-07,-4.123821e-07,0.000003,0.0,...,0.198725,0.376625,0.652517,0.455888,0.316789,0.094401,0.241629,0.388837,0.522512,1.544065
