In [1]:
import os
import importlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import seaborn as sns
import math
import statistics as stat
import json
from scipy import interp

In [2]:
from sklearn.model_selection import LeaveOneOut, LeaveOneGroupOut
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.preprocessing import LabelEncoder

In [3]:
from sklearn.svm import LinearSVC
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

# from sklearn.preprocessing import LabelEncoder
# import metrics
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, roc_auc_score, roc_curve, auc

from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import MinMaxScaler

In [4]:
def get_repo_path_in_notebook():
    """
    Finds path of repo from Notebook.
    Start running this once to correctly find
    other modules/functions
    """
    path = os.getcwd()
    repo_name = 'ultraleap_analysis'

    while path[-len(repo_name):] != 'ultraleap_analysis':

        path = os.path.dirname(path)

    return path

In [5]:
repo_path = get_repo_path_in_notebook()
code_path = os.path.join(repo_path, 'code')
os.chdir(code_path)

In [6]:
import sig_processing.extract_scores as sc_ext
import sig_processing.predict as pred
import plotting.boxplotting as bxplt
import statistic.stat_analysis as statis

In [26]:
## Creates box plots with seaborn
def box_plot(feature, boxplot_label, X_df, task):

    path = os.path.join(repo_path, 'camera_comparison', task, 'boxplots')
    if not os.path.exists(path):
        os.makedirs(path)

    if boxplot_label == 'grouped_scores':
        ls_scores = [(0, 1), (2, 3, 4)]
    elif boxplot_label == 'scores':
        ls_scores = np.unique(y)
    elif boxplot_label == 'condition':
        ls_scores = ['m1s0', 'm1s1', 'm0s0', 'm0s1', 'm1']
    elif boxplot_label == 'camera':
        ls_scores = ['vr','dt','st']
        # X_df['condition'] = X_df['file'].apply(lambda x: x.split('_')[2] if len(x.split('_')) > 2 else None)
    else:
        raise ValueError("Invalid boxplot_label")

    # Create a list to store the data for each score
    data_by_score = []

    for score in ls_scores:
        if boxplot_label == 'scores':
            data = X_df[X_df['score'] == score][feature] # Wrap score in a list
        elif boxplot_label == 'grouped_scores':
            # for i in range(len(score)):
            data = X_df[X_df['score'].isin(score)][feature]
        elif boxplot_label == 'condition':
            # Extract the condition from the 'file' column and create a new 'condition' column
            # X_df['condition'] = X_df['file'].apply(lambda x: x.split('_')[2] if len(x.split('_')) > 2 else None)
            data = X_df[X_df['condition'] == score][feature]
        elif boxplot_label == 'camera':
            data = X_df[X_df['camera'] == score][feature]

        df = pd.DataFrame({
            boxplot_label: [score] * len(data),
            feature: data
        })
        data_by_score.append(df)

    # concatenate all dataframes
    data_df = pd.concat(data_by_score)

    # create the boxplot with seaborn
    plt.figure(figsize=(10,8))
    sns.boxplot(x=boxplot_label, y=feature, data=data_df, palette='summer')
    plt.title(f'Boxplot: {boxplot_label}')
    plt.savefig(os.path.join(path, f'box_plot_{feature}'), dpi = 300)
    plt.close()

    return

In [7]:
# define task & folder
task = 'ft'
folder  = 'control'
# feat_file = 'new_features'
feat_file = 'new_features'

feat_df_path = os.path.join(
    repo_path,
    'EDA',
    folder,
    task, 
    f'{feat_file}.csv'
    )

X_df = pd.read_csv(feat_df_path, index_col=0)

In [8]:
X_df

Unnamed: 0,file,num_events,mean_max_dist,sd_max_dist,coef_var_max_dist,slope_max_dist,decr_max_dist,mean_max_vel,sd_max_vel,coef_var_max_vel,...,mean_tap_dur,sd_tap_dur,coef_var_tap_dur,slope_tap_dur,mean_rms,sd_rms,slope_rms,sum_rms,jerkiness,entropy
0,b1_control001_m0_vr_ft_right.json,24.0,0.117761,0.007544,0.064065,-0.000664,-0.185735,1.960155,0.323416,0.164995,...,0.470056,0.200349,0.426223,0.000533,0.05433,0.005162,-0.000239,1.24959,6.184543,6.519147
1,b2_control001_m0_vr_ft_right.json,28.0,0.111317,0.004117,0.036983,-0.000218,-0.137997,2.467647,0.585288,0.237185,...,0.39344,0.151688,0.385542,0.004185,0.055711,0.004798,-4.1e-05,1.504197,8.214953,6.753438
2,b1_control001_m0_dt_ft_right.json,19.0,0.10657,0.007279,0.068303,-0.000986,-0.165012,1.529153,0.234664,0.15346,...,0.61511,0.254656,0.414001,0.005128,0.055213,0.005375,8e-06,0.993828,8.907881,6.458338
3,b2_control001_m0_dt_ft_right.json,26.0,0.111947,0.006547,0.058482,-0.000811,-0.177505,2.632114,0.444506,0.168878,...,0.415245,0.131324,0.316257,0.006793,0.054676,0.004332,-0.000365,1.366908,14.180783,6.738152
4,b1_control001_m0_st_ft_right.json,27.0,0.082891,0.004774,0.057593,-0.000555,-0.135484,0.707252,0.183831,0.259922,...,0.443324,0.102314,0.230789,-0.001171,0.061339,0.002529,-0.000162,1.594813,9.052591,6.575933
5,b2_control001_m0_st_ft_right.json,29.0,0.085344,0.00366,0.042887,-7.4e-05,-0.266883,0.858769,0.253132,0.294761,...,0.365675,0.043477,0.118894,-0.001969,0.063036,0.003758,9.6e-05,1.765007,12.574305,6.778785
6,b1_control001_m0_vr_ft_left.json,23.0,0.103154,0.006662,0.064584,-2.2e-05,0.048467,1.652921,0.361207,0.218526,...,0.47147,0.186444,0.395453,0.001546,0.050417,0.00491,0.000132,1.109163,8.330426,6.683361
7,b2_control001_m0_vr_ft_left.json,29.0,0.116613,0.005073,0.043499,-0.000279,-0.130014,2.581341,0.517185,0.200355,...,0.394291,0.166799,0.423036,0.00183,0.056785,0.005351,-0.000101,1.589978,12.252777,6.865891
8,b1_control001_m0_dt_ft_left.json,24.0,0.083211,0.008896,0.106912,-0.000691,-0.277371,1.128565,0.416032,0.368638,...,0.499637,0.198833,0.397954,0.002216,0.047473,0.006477,5.3e-05,1.091886,9.052591,6.577861
9,b2_control001_m0_dt_ft_left.json,24.0,0.086092,0.012385,0.143857,-0.000941,-0.391214,1.47557,0.364248,0.246852,...,0.439959,0.155885,0.354317,-0.001439,0.039137,0.00639,-0.000299,0.900147,11.053852,6.682109


In [54]:
ls_ft = X_df['file'].to_list()
ls_ft

['b1_control001_m0_vr_ft_right.json',
 'b2_control001_m0_vr_ft_right.json',
 'b1_control001_m0_dt_ft_right.json',
 'b2_control001_m0_dt_ft_right.json',
 'b1_control001_m0_st_ft_right.json',
 'b2_control001_m0_st_ft_right.json',
 'b1_control001_m0_vr_ft_left.json',
 'b2_control001_m0_vr_ft_left.json',
 'b1_control001_m0_dt_ft_left.json',
 'b2_control001_m0_dt_ft_left.json',
 'b1_control001_m0_st_ft_left.json',
 'b2_control001_m0_st_ft_left.json',
 'b1_control002_m0_vr_ft_right.json',
 'b2_control002_m0_vr_ft_right.json',
 'b1_control002_m0_dt_ft_right.json',
 'b2_control002_m0_dt_ft_right.json',
 'b1_control002_m0_st_ft_right.json',
 'b2_control002_m0_st_ft_right.json',
 'b1_control002_m0_vr_ft_left.json',
 'b2_control002_m0_vr_ft_left.json',
 'b1_control002_m0_dt_ft_left.json',
 'b2_control002_m0_dt_ft_left.json',
 'b1_control002_m0_st_ft_left.json',
 'b2_control002_m0_st_ft_left.json',
 'b1_control003_m0_vr_ft_right.json',
 'b2_control003_m0_vr_ft_right.json',
 'b1_control003_m0_dt_ft

In [73]:
ls_ft_without_cam = [file[:19]+file[22:] for file in ls_ft]
ls_ft_without_cam

['b1_control001_m0_vr_right.json',
 'b2_control001_m0_vr_right.json',
 'b1_control001_m0_dt_right.json',
 'b2_control001_m0_dt_right.json',
 'b1_control001_m0_st_right.json',
 'b2_control001_m0_st_right.json',
 'b1_control001_m0_vr_left.json',
 'b2_control001_m0_vr_left.json',
 'b1_control001_m0_dt_left.json',
 'b2_control001_m0_dt_left.json',
 'b1_control001_m0_st_left.json',
 'b2_control001_m0_st_left.json',
 'b1_control002_m0_vr_right.json',
 'b2_control002_m0_vr_right.json',
 'b1_control002_m0_dt_right.json',
 'b2_control002_m0_dt_right.json',
 'b1_control002_m0_st_right.json',
 'b2_control002_m0_st_right.json',
 'b1_control002_m0_vr_left.json',
 'b2_control002_m0_vr_left.json',
 'b1_control002_m0_dt_left.json',
 'b2_control002_m0_dt_left.json',
 'b1_control002_m0_st_left.json',
 'b2_control002_m0_st_left.json',
 'b1_control003_m0_vr_right.json',
 'b2_control003_m0_vr_right.json',
 'b1_control003_m0_dt_right.json',
 'b2_control003_m0_dt_right.json',
 'b1_control003_m0_st_right.json

In [75]:
compare_ls_ft_ls_oc = [i!=j for i,j in zip(ls_ft,ls_oc)]
len(compare_ls_ft_ls_oc)

54

In [79]:
len(ls_ft_without_cam)

54

In [85]:
l = list(set(ls_ft_without_cam).symmetric_difference(set(ls_oc_without_cam)))
print(l)

['b2_control005_m0_st_left.json', 'b2_control005_m0_dt_left.json', 'b2_control004_m0_vr_left.json', 'b1_control005_m0_st_left.json', 'b2_control004_m0_vr_right.json', 'b1_control005_m0_dt_left.json']


In [74]:
ls_oc_without_cam = [file[:19]+file[22:] for file in ls_oc]
ls_oc_without_cam

['b1_control001_m0_vr_right.json',
 'b2_control001_m0_vr_right.json',
 'b1_control001_m0_dt_right.json',
 'b2_control001_m0_dt_right.json',
 'b1_control001_m0_st_right.json',
 'b2_control001_m0_st_right.json',
 'b1_control001_m0_vr_left.json',
 'b2_control001_m0_vr_left.json',
 'b1_control001_m0_dt_left.json',
 'b2_control001_m0_dt_left.json',
 'b1_control001_m0_st_left.json',
 'b2_control001_m0_st_left.json',
 'b1_control002_m0_vr_right.json',
 'b2_control002_m0_vr_right.json',
 'b1_control002_m0_dt_right.json',
 'b2_control002_m0_dt_right.json',
 'b1_control002_m0_st_right.json',
 'b2_control002_m0_st_right.json',
 'b1_control002_m0_vr_left.json',
 'b2_control002_m0_vr_left.json',
 'b1_control002_m0_dt_left.json',
 'b2_control002_m0_dt_left.json',
 'b1_control002_m0_st_left.json',
 'b2_control002_m0_st_left.json',
 'b1_control003_m0_vr_right.json',
 'b2_control003_m0_vr_right.json',
 'b1_control003_m0_dt_right.json',
 'b2_control003_m0_dt_right.json',
 'b1_control003_m0_st_right.json

In [51]:
ls_oc = X_df['file'].to_list()
ls_oc

['b1_control001_m0_vr_oc_right.json',
 'b2_control001_m0_vr_oc_right.json',
 'b1_control001_m0_dt_oc_right.json',
 'b2_control001_m0_dt_oc_right.json',
 'b1_control001_m0_st_oc_right.json',
 'b2_control001_m0_st_oc_right.json',
 'b1_control001_m0_vr_oc_left.json',
 'b2_control001_m0_vr_oc_left.json',
 'b1_control001_m0_dt_oc_left.json',
 'b2_control001_m0_dt_oc_left.json',
 'b1_control001_m0_st_oc_left.json',
 'b2_control001_m0_st_oc_left.json',
 'b1_control002_m0_vr_oc_right.json',
 'b2_control002_m0_vr_oc_right.json',
 'b1_control002_m0_dt_oc_right.json',
 'b2_control002_m0_dt_oc_right.json',
 'b1_control002_m0_st_oc_right.json',
 'b2_control002_m0_st_oc_right.json',
 'b1_control002_m0_vr_oc_left.json',
 'b2_control002_m0_vr_oc_left.json',
 'b1_control002_m0_dt_oc_left.json',
 'b2_control002_m0_dt_oc_left.json',
 'b1_control002_m0_st_oc_left.json',
 'b2_control002_m0_st_oc_left.json',
 'b1_control003_m0_vr_oc_right.json',
 'b2_control003_m0_vr_oc_right.json',
 'b1_control003_m0_dt_oc

In [35]:
feats = X_df.columns[1:]
X_df['camera'] = X_df['file'].apply(lambda x: x.split('_')[3] if len(x.split('_')) > 2 else None)
bx_label = 'camera'
for feat in feats:
    if feat != 'camera':
        box_plot(feat, bx_label, X_df, task)