In [15]:
import numpy as np
import pandas as pd
import random
from scipy.io import arff
from itertools import combinations
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_validate
from math import sqrt

In [16]:
def compute_eer_from_paper(labels, prediction):
    from sklearn.metrics import roc_curve
    fprs, tprs, _ = roc_curve(labels, prediction)
    eer = fprs[np.nanargmin(np.absolute((1 - tprs) - fprs))]
    print(eer)
    return eer

In [17]:
def compute_eer_changjiang(labels, prediction):
    from scipy.optimize import brentq
    from scipy.interpolate import interp1d
    from sklearn.metrics import roc_curve
    fpr, tpr, _ = roc_curve(labels, prediction)
    eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
    return eer

In [18]:
def calculate_eer(y_true, y_score):
    from sklearn.metrics import roc_curve
    fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=True)
    abs_diff = [abs(fp-tp) for fp, tp in zip(fpr, tpr)]
    min_idx = abs_diff.index(min(abs_diff))
    eer = (fpr[min_idx] + tpr[min_idx])/2
    print(abs_diff, eer)
    return eer

In [19]:
columns_to_be_dropped = []
for n in ['X', 'Y', 'Z']:
    columns_to_be_dropped.append(f'"{n}VAR"')
    for i in range(13):
        columns_to_be_dropped.append(f'"{n}MFCC{i}"')
for n in combinations(['X', 'Y', 'Z'], 2):
    columns_to_be_dropped.append(f'"{"".join(n)}COS"')
    columns_to_be_dropped.append(f'"{"".join(n)}COR"')

sensors = ["phone_accel", "watch_accel", "phone_gyro", "watch_gyro"]
activities = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
              'I', 'J', 'K', 'L', 'M', 'O', 'P', 'Q', 'R', 'S']

In [20]:
def processed_dataframe(raw: pd.DataFrame, cols_to_drop: list[str]) -> pd.DataFrame:
    result = raw.drop(columns=cols_to_drop)
    result.columns = [col.replace('"', "") for col in result]
    result['ACTIVITY'] = result['ACTIVITY'].str.decode('utf-8')
    result['class'] = result['class'].str.decode('utf-8')
    return result

In [21]:
def pick_impostors(guid: str, people: list[dict[str, pd.DataFrame]]) -> list[dict[str, pd.DataFrame]]:
    return random.choices([i for i in people if i['guid'] != guid], k=18)


def biometric_train_test_split(person, impostors, activity, sensors):
    full_person = person[sensors[0]].query(f"`ACTIVITY`=='{activity}'").drop(
        columns=['ACTIVITY', 'class']).add_suffix(sensors[0])
    full_set_impostors = [impostors[i][sensors[0]].query(f"`ACTIVITY`=='{activity}'").drop(
        columns=['ACTIVITY', 'class'])[:3].add_suffix(sensors[0]) for i in range(len(impostors))]

    for i in range(1, len(sensors)):
        full_person = full_person.merge(person[sensors[i]].query(f"`ACTIVITY`=='{activity}'").drop(
            columns=['ACTIVITY', 'class']).add_suffix(sensors[i]), left_index=True, right_index=True)
        for j in range(len(impostors)):
            full_set_impostors[j] = full_set_impostors[j].merge(impostors[j][sensors[i]].query(f"`ACTIVITY`=='{activity}'").drop(
                columns=['ACTIVITY', 'class'])[:3].add_suffix(sensors[i]), left_index=True, right_index=True)

    full_person['class'] = True
    for i in range(len(full_set_impostors)):
        full_set_impostors[i]['class'] = False

    training_set = pd.concat([full_person[:len(
        full_person)//2]] + full_set_impostors[:len(full_set_impostors)//2])
    testing_set = pd.concat([full_person[len(full_person)//2:]] +
                            full_set_impostors[len(full_set_impostors)//2:])

    return training_set, testing_set

In [29]:
def person_classification_df_builder(people: list[dict[str, int | pd.DataFrame]], sensors: list[str]) -> pd.DataFrame:
    result_df: pd.DataFrame = None
    for person in people:
        df: pd.DataFrame = None
        for sensor in sensors:
            df = person[sensor].drop(columns=['class']).add_suffix(sensor) if df is None else df.merge(
                person[sensor].drop(columns=['class']).add_suffix(sensor), left_index=True, right_index=True)
            df = pd.get_dummies(df)
        df['class']=person['guid']
        result_df = df if result_df is None else pd.concat([result_df, df])
    return result_df.dropna()

def activity_classification_df_builder(people: list[dict[str, int | pd.DataFrame]], sensors: list[str]) -> pd.DataFrame:
    result_df: pd.DataFrame = None
    for person in people:
        df: pd.DataFrame = None
        for sensor in sensors:
            df = person[sensor].drop(columns=['class','ACTIVITY']).add_suffix(sensor) if df is None else df.merge(
                person[sensor].drop(columns=['class','ACTIVITY']).add_suffix(sensor), left_index=True, right_index=True)
        result_df = df if result_df is None else pd.concat([result_df, df])
    return result_df.dropna()

In [23]:
people: list[dict[str, pd.DataFrame]] = [{
    'guid': i,
    'phone_accel': processed_dataframe(pd.DataFrame(arff.loadarff(f"arff_files/phone/accel/data_{i}_accel_phone.arff")[0]), columns_to_be_dropped),
    'phone_gyro': processed_dataframe(pd.DataFrame(arff.loadarff(f"arff_files/phone/gyro/data_{i}_gyro_phone.arff")[0]), columns_to_be_dropped),
    'watch_accel': processed_dataframe(pd.DataFrame(arff.loadarff(f"arff_files/watch/accel/data_{i}_accel_watch.arff")[0]), columns_to_be_dropped),
    'watch_gyro': processed_dataframe(pd.DataFrame(arff.loadarff(f"arff_files/watch/gyro/data_{i}_gyro_watch.arff")[0]), columns_to_be_dropped)
} for i in range(1600, 1651) if i != 1614]  # person 1614 missing in files

In [24]:
every_combination = [[i]for i in sensors] + \
    [list(i) for i in list(combinations(sensors, 2))]+[sensors]

### Authentication

In [25]:
# biometry_models = {person['guid']:{activity: {'+'.join(combination): tuple[RandomForestClassifier,float] for combination in every_combination} for activity in activities } for person in people}
# for person in people:
#     impostors = pick_impostors(person['guid'], people)
#     for activity in activities:
#         for combination in every_combination:
#             train, test = biometric_train_test_split(
#                 person, impostors, activity, combination)
#             if len(train) == 0 or len(test)==0:
#                 continue
#             train_target = train['class']
#             train = train.drop(columns=['class'])
#             test_target = test['class']
#             test = test.drop(columns=['class'])
#             biometry_classifier = RandomForestClassifier(
#                 10, max_features=int(sqrt(len(train_target.columns))))
#             classifier.fit(train, train_target)
#             biometry_models[person['guid']][activity]['+'.join(combination)] = (biometry_classifier, compute_eer_from_paper(test_target,biometry_classifier.predict(test)))

### People classification

In [26]:
kf = KFold(n_splits=10)
people_df = person_classification_df_builder(people, sensors)
people_target = people_df['class']
people_df = people_df.drop(columns=['class'])
people_classifier = RandomForestClassifier(10, max_features=int(sqrt(len(people_df.columns))))
# people_classifier.fit(people_df,people_target)
score = cross_validate(people_classifier,people_df,people_target,cv=kf,scoring='accuracy')
score

In [62]:
people[49]['phone_accel']

Unnamed: 0,ACTIVITY,X0,X1,X2,X3,X4,X5,X6,X7,X8,...,YPEAK,ZPEAK,XABSOLDEV,YABSOLDEV,ZABSOLDEV,XSTANDDEV,YSTANDDEV,ZSTANDDEV,RESULTANT,class
0,A,0.230,0.235,0.385,0.085,0.055,0.01,0.0,0.0,0.0,...,89.5238,71.4815,3.797170,4.201190,3.606970,0.466331,0.458116,0.347447,11.51220,1650
1,A,0.975,0.020,0.005,0.000,0.000,0.00,0.0,0.0,0.0,...,65.3333,71.8519,2.978590,2.154440,3.166530,0.255185,0.193401,0.289781,10.66480,1650
2,A,1.000,0.000,0.000,0.000,0.000,0.00,0.0,0.0,0.0,...,67.2414,79.5455,2.561740,1.712110,2.756330,0.221399,0.152390,0.245360,10.11240,1650
3,A,0.995,0.005,0.000,0.000,0.000,0.00,0.0,0.0,0.0,...,76.2500,71.5385,2.651430,1.869280,2.512110,0.230854,0.158261,0.228532,10.19920,1650
4,A,0.990,0.010,0.000,0.000,0.000,0.00,0.0,0.0,0.0,...,77.3913,105.0000,2.577570,1.228350,2.000750,0.217810,0.113445,0.185843,9.78553,1650
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
798,S,1.000,0.000,0.000,0.000,0.000,0.00,0.0,0.0,0.0,...,83.9130,81.7391,0.534025,0.637936,2.034310,0.049052,0.053133,0.170775,9.40653,1650
799,S,1.000,0.000,0.000,0.000,0.000,0.00,0.0,0.0,0.0,...,75.6000,85.9091,0.470651,0.645209,0.774987,0.043370,0.054753,0.070894,9.41056,1650
800,S,1.000,0.000,0.000,0.000,0.000,0.00,0.0,0.0,0.0,...,76.8000,78.7500,0.413499,0.973854,1.381650,0.036307,0.075720,0.122339,9.44613,1650
801,S,1.000,0.000,0.000,0.000,0.000,0.00,0.0,0.0,0.0,...,76.8000,77.2727,0.820190,0.877469,1.827780,0.070485,0.075312,0.158120,9.44593,1650
