In [1]:
# Basic imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import glob

# preprocessing and split
from sklearn.model_selection import train_test_split as split
from sklearn.preprocessing import StandardScaler

# models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier


# Scores and reports
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, plot_roc_curve
from sklearn.decomposition import PCA

In [2]:
# Make the data frame
Names = glob.glob(r'/Users/ortrabelsi/Desktop/מטלה דימות נתונים/סופי 1/Training' + "/*")
li = []
for i,name in enumerate(Names):
   
    all_files = glob.glob(name + "/*.csv")
    labels = ["Spontan", "Sync" , "Alone"]

    for filename in all_files:
        df = pd.read_csv(filename, index_col=None, header=0)
        for j,label in enumerate(labels):
            if label in filename:
                df["Label"] = j
                df["Name"] = i
                df = (df[df["Time"] > (df["Time"][0]+7)]) # remove the first 7 sec from each test
        li.append(df)

# merge data frames.
frame = pd.concat(li, axis=0, ignore_index=True)

# Make hand type to 0 or 1.
frame.loc[(frame[' Hand Type'] == 'left') , " Hand Type"] = int(0)
frame.loc[(frame[' Hand Type'] == 'right') , " Hand Type"] = int(1)
frame[' Hand Type'] = frame[' Hand Type'].astype(np.int)

### We index all the names. Because we want when all the information is numerical:
  
### Names:  
0 Oriya Social_Nuero  
1 Evyatar Cohen  
2 Shelly Social_Nuero  
3 Revital Marbel  
4 Nofar Social_Nuero  
5 Yoel Raz  
6 Orya Kalmanovitz  
7 Shahar Terner  
8 Yael Hagai  
  
### Labels:    
0 Spontan  
1 Sync  
2 Alone  
  
### Hand Type:    
0 left  
1 right

In [3]:
frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 172916 entries, 0 to 172915
Data columns (total 24 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   Time             172916 non-null  float64
 1    Frame ID        172916 non-null  int64  
 2    Hand Type       172916 non-null  int64  
 3    # hands         172916 non-null  int64  
 4    Position X      172916 non-null  float64
 5    Position Y      172916 non-null  float64
 6    Position Z      172916 non-null  float64
 7    Velocity X      172916 non-null  float64
 8    Velocity Y      172916 non-null  float64
 9    Velocity Z      172916 non-null  float64
 10   Pitch           172916 non-null  float64
 11   Roll            172916 non-null  float64
 12   Yaw             172916 non-null  float64
 13   Wrist Pos X     172916 non-null  float64
 14   Wrist Pos Y     172916 non-null  float64
 15   Wrist Pos Z     172916 non-null  float64
 16   Elbow pos X     172916 non-null  floa

### we can see that all the data is numerical!

In [4]:
frame.isna().sum()

Time               0
 Frame ID          0
 Hand Type         0
 # hands           0
 Position X        0
 Position Y        0
 Position Z        0
 Velocity X        0
 Velocity Y        0
 Velocity Z        0
 Pitch             0
 Roll              0
 Yaw               0
 Wrist Pos X       0
 Wrist Pos Y       0
 Wrist Pos Z       0
 Elbow pos X       0
 Elbow Pos Y       0
 Elbow Pos Z       0
 Grab Strenth      0
 Grab Angle        0
 Pinch Strength    0
Label              0
Name               0
dtype: int64

In [5]:
g = frame.groupby([' Frame ID','Label', 'Name'] ).count()
g[g["Time"]>2]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Time,Hand Type,# hands,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,...,Yaw,Wrist Pos X,Wrist Pos Y,Wrist Pos Z,Elbow pos X,Elbow Pos Y,Elbow Pos Z,Grab Strenth,Grab Angle,Pinch Strength
Frame ID,Label,Name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
47482,0,6,4,4,4,4,4,4,4,4,4,4,...,4,4,4,4,4,4,4,4,4,4
50542,0,6,4,4,4,4,4,4,4,4,4,4,...,4,4,4,4,4,4,4,4,4,4
51624,0,6,4,4,4,4,4,4,4,4,4,4,...,4,4,4,4,4,4,4,4,4,4
52368,0,6,4,4,4,4,4,4,4,4,4,4,...,4,4,4,4,4,4,4,4,4,4
55239,1,6,4,4,4,4,4,4,4,4,4,4,...,4,4,4,4,4,4,4,4,4,4
59326,1,6,4,4,4,4,4,4,4,4,4,4,...,4,4,4,4,4,4,4,4,4,4
68032,0,3,4,4,4,4,4,4,4,4,4,4,...,4,4,4,4,4,4,4,4,4,4
83476,1,3,4,4,4,4,4,4,4,4,4,4,...,4,4,4,4,4,4,4,4,4,4


In [6]:
frame.shape

(172916, 24)

In [7]:
frame = frame[frame[' # hands']>1]
frame.shape

(137042, 24)

In [94]:
def distance3D(x1, y1, z1, x2, y2, z2):
      
    d = np.sqrt((x2 - x1)** 2 +
                (y2 - y1)** 2 +
                (z2 - z1)** 2)
    return d
def distance2D(x1, y1, x2, y2):
      
    d = np.sqrt((x2 - x1)** 2 +
                (y2 - y1)** 2
                )
    return d

In [9]:
z = frame[(frame[' Hand Type'] == 1) & (frame['Label'] != 2)]
x = frame[(frame[' Hand Type'] == 0) & (frame['Label'] != 2)]

In [10]:
x = x.set_index(['Time', 'Name'])
z = z.set_index(['Time', 'Name'])

In [51]:
result = pd.concat([z, x], axis=1)
result.columns = ['Frame ID', 'Hand Type', '# hands', 'Position X', 'Position Y',
       'Position Z', 'Velocity X', 'Velocity Y', 'Velocity Z', 'Pitch',
       'Roll', 'Yaw', 'Wrist Pos X', 'Wrist Pos Y', 'Wrist Pos Z',
       'Elbow pos X', 'Elbow Pos Y', 'Elbow Pos Z', 'Grab Strenth',
       'Grab Angle', 'Pinch Strength', 'Label', '_Frame ID', '_Hand Type',
       '_# hands', '_Position X', '_Position Y', '_Position Z', '_Velocity X',
       '_Velocity Y', '_Velocity Z', '_Pitch', '_Roll', '_Yaw', '_Wrist Pos X',
       '_Wrist Pos Y', '_Wrist Pos Z', '_Elbow pos X', '_Elbow Pos Y',
       '_Elbow Pos Z', '_Grab Strenth', '_Grab Angle', '_Pinch Strength',
       '_Label']

In [108]:
x1 = result['Position X'].to_numpy()
y1 = result['Position Y'].to_numpy()
z1 = result['Position Z'].to_numpy()
x2 = result['_Position X'].to_numpy()
y2 = result['_Position Y'].to_numpy()
z2 = result['_Position Z'].to_numpy()
result['Position Dis'] = distance3D(x1, y1, z1, x2, y2, z2)

x1 = result['Pitch'].to_numpy()
y1 = result['Roll'].to_numpy()
z1 = result['Yaw'].to_numpy()
x2 = result['_Pitch'].to_numpy()
y2 = result['_Roll'].to_numpy()
z2 = result['_Yaw'].to_numpy()
result['Pitch Roll Yaw Dis'] = distance3D(x1, y1, z1, x2, y2, z2)

x1 = result['Wrist Pos X'].to_numpy()
y1 = result['Wrist Pos Y'].to_numpy()
z1 = result['Wrist Pos Z'].to_numpy()
x2 = result['_Wrist Pos X'].to_numpy()
y2 = result['_Wrist Pos Y'].to_numpy()
z2 = result['_Wrist Pos Z'].to_numpy()
result['Wrist Pos Dis'] = distance3D(x1, y1, z1, x2, y2, z2)

x1 = result['Elbow pos X'].to_numpy()
y1 = result['Elbow Pos Y'].to_numpy()
z1 = result['Elbow Pos Z'].to_numpy()
x2 = result['_Elbow pos X'].to_numpy()
y2 = result['_Elbow Pos Y'].to_numpy()
z2 = result['_Elbow Pos Z'].to_numpy()
result['Elbow pos Dis'] = distance3D(x1, y1, z1, x2, y2, z2)


In [111]:
result.drop(['Position X','Position Y','Position Z','_Position X','_Position Y','_Position Z','Frame ID','Hand Type', '# hands','_Hand Type',
       '_# hands','Label','Pitch' ,'Roll' ,'Yaw','_Roll' ,'_Yaw','_Pitch','Elbow pos X','Elbow Pos Y','Elbow Pos Z','_Elbow pos X','_Elbow Pos Y','_Elbow Pos Z',
            'Wrist Pos X','Wrist Pos Y','Wrist Pos Z','_Wrist Pos X','_Wrist Pos Y','_Wrist Pos Z','_Frame ID'], axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Velocity X,Velocity Y,Velocity Z,Grab Strenth,Grab Angle,Pinch Strength,_Velocity X,_Velocity Y,_Velocity Z,_Grab Strenth,_Grab Angle,_Pinch Strength,_Label,dis,Position Dis,Pitch Roll Yaw Dis,Elbow pos Dis,Wrist Pos Dis
Time,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
706.2290,0,102.422200,131.220600,-248.5983,0.0,0.087518,0.0,-86.600180,129.33130,-76.362170,0.0,0.000000,0.00000,1,230.435822,230.435822,3.070053,697.853169,295.720614
706.2448,0,71.367130,45.372480,-249.5586,0.0,0.081127,0.0,-115.173900,159.14350,-57.412770,0.0,0.000000,0.00000,1,232.658995,232.658995,3.058918,696.187240,295.365209
706.2614,0,49.867860,6.072713,-275.5513,0.0,0.050581,0.0,-129.385300,163.29210,-31.097620,0.0,0.000000,0.00000,1,234.537496,234.537496,3.047360,691.620639,294.772709
706.2780,0,37.896940,-1.944623,-260.2235,0.0,0.040162,0.0,-139.472800,172.40940,-20.110030,0.0,0.000000,0.00000,1,235.536693,235.536693,3.043426,687.871261,294.487124
706.2947,0,47.227250,-12.433580,-265.6293,0.0,0.033259,0.0,-141.824900,182.79920,-13.664930,0.0,0.000000,0.00000,1,237.610009,237.610009,3.042171,682.034998,294.390023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250.4559,8,337.593200,409.950100,379.7256,0.0,0.000000,0.0,8.349696,-24.89295,22.597310,0.0,2.316369,1.00000,0,390.669440,390.669440,2.657662,491.862333,433.907106
250.4725,8,58.345050,235.841100,311.2217,0.0,0.000000,0.0,38.420360,24.12613,-0.512933,0.0,2.329724,1.00000,0,394.276903,394.276903,2.785604,487.184573,434.815028
250.4891,8,2.671451,233.980000,228.4638,0.0,0.000000,0.0,-35.421390,116.10120,-69.644100,0.0,2.386132,1.00000,0,395.882170,395.882170,2.839269,488.695741,435.354342
250.5056,8,-78.759040,41.621510,152.6175,0.0,0.000000,0.0,-68.142180,149.32880,-127.051700,0.0,2.399800,1.00000,0,395.991784,395.991784,2.832885,498.165670,435.849251


0