In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "dim_reduction"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    #print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.utils import io

with io.capture_output() as captured:
    print('no prints in here')

import time
import pandas as pd

In [2]:
Alone = 0
Spontan = 1
Sync = 2

First lets load the right hand solo,
and we would actually from the get go merge it to the solo hands

In [3]:
HandRight = pd.read_csv("Unity Data\HandRight.csv")
#HandRight.head(10)

In [4]:
drop_indecies = [1,2,3] # notice we are droping hands count because this is cheating!

In [5]:
HandRight = HandRight.drop(HandRight.columns[drop_indecies], axis=1)
HandRight = HandRight.iloc[500:4000]

In [6]:
HandRight.shape

#HandRight.info()

(3500, 19)

In [69]:
k = pd.concat([HandRight.iloc[::4,:].reset_index(drop=True), HandRight.iloc[1::4,:].reset_index(drop=True)], axis=1)
k.shape


(875, 38)

In [24]:
#HandRight.iloc[1:5*3+1:3,:]

Unnamed: 0,Time,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,Roll,Yaw,Wrist Pos X,Wrist Pos Y,Wrist Pos Z,Elbow pos X,Elbow Pos Y,Elbow Pos Z,Grab Strenth,Grab Angle,Pinch Strength
501,136.6354,2.990307,163.0106,14.22045,63.82683,-27.71448,91.91269,-0.166827,0.079468,-0.760001,60.39677,173.5084,62.45371,257.0284,134.3396,242.4251,0.0,0.405177,0.0
504,136.6873,6.809369,163.075,18.31746,64.54992,-20.25474,60.26554,-0.162684,0.0714,-0.747182,63.8033,173.8077,67.11089,263.0048,144.4967,246.1332,0.0,0.299372,0.0
507,136.7356,9.854824,164.674,21.33324,48.95504,50.64619,53.4698,-0.138619,0.050727,-0.728599,66.04675,174.0629,71.30888,260.6205,138.9874,254.3455,0.0,0.332174,0.0
510,136.7852,11.72584,165.0486,23.11185,43.53115,-0.453021,43.8352,-0.134189,0.040016,-0.717533,67.10747,174.2701,73.99036,252.3753,140.8113,266.7354,0.0,0.404081,0.0
513,136.8351,13.71593,161.7758,25.69214,20.70159,-149.3925,45.11576,-0.099915,0.021586,-0.69519,67.89079,169.6017,78.132,244.3223,140.6269,279.704,0.0,0.508077,0.0


In [8]:
def TransformData(df, type):
    df = df.drop(df.columns[drop_indecies], axis=1)
    if(type == Alone):
        df = df.iloc[500:4000]
        maxX = df.shape[0]
        combine = np.hstack([HandRight[:maxX].values, df.values]).reshape(-1, df.shape[1])
        df = pd.DataFrame(combine, columns=df.columns)
    else:
        df = df.iloc[1000:9000]
        maxX = int(df.shape[0] / 2) * 2
        df = df.iloc[:maxX]
    df["state"] = type
    return df

In [9]:
import glob

class DataLoader:
    def __init__(self, path):
        li = []
        all_files = glob.glob(path + "/*")
        v = 0
        for folder in all_files:
            print("\nloading in" ,folder, ':')
            files = glob.glob(folder + "/*.csv")
            for filename in files:
                df = pd.read_csv(filename, index_col=None, header=0)
                type = None
                if("Alone" in filename):
                    type = Alone
                elif("Sync" in filename):
                    type = Sync
                elif("Spontan" in filename):
                    type = Spontan
                df = TransformData(df, type)
                v += df.shape[0]
                li.append(df)
                print('loaded ', filename, type)
        self.dataRaw = li
        self.dataMerged = pd.concat(li, axis=0, sort=False)
        print(self.dataMerged.shape, v)

In [10]:
training = DataLoader('Unity Data\Training')


loading in Unity Data\Training\Evyatar Cohen :
loaded  Unity Data\Training\Evyatar Cohen\Evyatar636771052727603804Spontan.csv 1
loaded  Unity Data\Training\Evyatar Cohen\Evyatar636771053639929594Sync.csv 2
loaded  Unity Data\Training\Evyatar Cohen\Evyatar636771054555711409Alone.csv 0

loading in Unity Data\Training\Nofar Social_Nuero :
loaded Unity Data\Training\Nofar Social_Nuero\Nofar636759795182793299Spontan.csv 1
loaded  Unity Data\Training\Nofar Social_Nuero\Nofar636759796290435160Alone.csv 0
loaded  Unity Data\Training\Nofar Social_Nuero\Nofar636759797397919664Sync.csv 2

loading in Unity Data\Training\Oriya Social_Nuero :
loaded  Unity Data\Training\Oriya Social_Nuero\Oriya636759804404113837Spontan.csv 1
loaded  Unity Data\Training\Oriya Social_Nuero\Oriya636759805268396661Alone.csv 0
loaded  Unity Data\Training\Oriya Social_Nuero\Oriya636759806131350399Sync.csv 2

loading in Unity Data\Training\Orya Kalmanovitz :
loaded  Unity Data\Training\Orya Kalmanovitz\OryaB63677108273660

In [11]:
data = pd.DataFrame
data = training.dataMerged

dataRaw is all the daraframes that we have already setup for us, now what we need is to generate from all of this,

our actuall training data

In [12]:
training.dataRaw[4].head(5)

Unnamed: 0,Time,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,Roll,Yaw,Wrist Pos X,Wrist Pos Y,Wrist Pos Z,Elbow pos X,Elbow Pos Y,Elbow Pos Z,Grab Strenth,Grab Angle,Pinch Strength,state
0,136.6184,1.854658,163.3199,12.51509,67.3389,-6.28195,101.4422,-0.155174,0.078463,-0.764891,59.46534,173.0233,60.61472,254.6348,127.2725,240.6239,0.0,0.401652,0.0,0
1,504.3504,-20.80859,226.086,17.0886,49.71883,-341.2516,-14.95178,1.199416,0.119953,1.519325,-92.19111,218.0329,12.57041,-309.3688,85.11283,-24.33749,0.0,0.603028,0.0,0
2,136.6354,2.990307,163.0106,14.22045,63.82683,-27.71448,91.91269,-0.166827,0.079468,-0.760001,60.39677,173.5084,62.45371,257.0284,134.3396,242.4251,0.0,0.405177,0.0,0
3,504.3669,-20.06171,220.5413,17.05443,36.02574,-309.4118,0.574942,1.089347,0.092826,1.515014,-91.69262,214.4963,13.19102,-314.42,89.30956,-17.0896,0.0,0.313147,0.0,0
4,136.6524,4.197042,162.9949,15.69018,69.52037,12.69234,67.46247,-0.172697,0.079392,-0.758363,61.55299,174.0561,63.92721,259.6933,142.5166,243.7431,0.0,0.378955,0.0,0


Lets generate our np_array so first of all lets create a new class,

class would do the following it would get a settings : colloums it should drop, 

(jumps in time, and number of frames per row, andoverall jumps on the dataset),

In [45]:
# k = training.dataRaw[4].iloc[1:2*2+1:2,:].to_numpy()
# w = training.dataRaw[4].iloc[:2*2:2,:].to_numpy()
# k
# w

# np.concatenate((k, w), axis=0)

training.dataRaw[4]['state'][0]

training.dataRaw[4].drop('state', axis=1)

training.dataRaw[]

0

Unnamed: 0,Time,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,Roll,Yaw,Wrist Pos X,Wrist Pos Y,Wrist Pos Z,Elbow pos X,Elbow Pos Y,Elbow Pos Z,Grab Strenth,Grab Angle,Pinch Strength
0,136.6184,1.854658,163.3199,12.51509,67.33890,-6.28195,101.442200,-0.155174,0.078463,-0.764891,59.46534,173.0233,60.61472,254.6348,127.27250,240.62390,0.000000,0.401652,0.000000
1,504.3504,-20.808590,226.0860,17.08860,49.71883,-341.25160,-14.951780,1.199416,0.119953,1.519325,-92.19111,218.0329,12.57041,-309.3688,85.11283,-24.33749,0.000000,0.603028,0.000000
2,136.6354,2.990307,163.0106,14.22045,63.82683,-27.71448,91.912690,-0.166827,0.079468,-0.760001,60.39677,173.5084,62.45371,257.0284,134.33960,242.42510,0.000000,0.405177,0.000000
3,504.3669,-20.061710,220.5413,17.05443,36.02574,-309.41180,0.574942,1.089347,0.092826,1.515014,-91.69262,214.4963,13.19102,-314.4200,89.30956,-17.08960,0.000000,0.313147,0.000000
4,136.6524,4.197042,162.9949,15.69018,69.52037,12.69234,67.462470,-0.172697,0.079392,-0.758363,61.55299,174.0561,63.92721,259.6933,142.51660,243.74310,0.000000,0.378955,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6995,561.4893,-73.616340,242.2852,69.19761,-188.33350,-50.49825,29.529870,1.857212,0.791704,1.893267,-119.80380,195.0393,45.57323,-299.4263,41.95221,-32.44111,0.163370,1.244664,0.201331
6996,194.5654,3.230397,169.1729,-4.81981,224.79620,-538.24620,-463.401900,0.351476,0.213730,-0.117169,22.59152,148.0959,66.18939,158.3324,41.64271,276.20630,0.000000,0.514317,0.000000
6997,561.5054,-75.451450,242.0424,69.58482,-215.89570,-28.57200,45.553780,1.852507,0.801023,1.897196,-120.83340,194.0461,45.91526,-299.5178,40.43322,-33.21206,0.236677,1.400911,0.336688
6998,194.5826,6.434268,161.4118,-12.09352,194.23230,-373.04310,-408.729000,0.309624,0.220371,-0.108358,25.10394,143.9342,60.14394,156.3949,60.15020,282.87230,0.000000,0.383218,0.000000


In [165]:
class DataToNP:
    def __init__(self, label_index, jumps, combine = 4, skips = 1, drop_indecies = []):
        self.drop_indecies = drop_indecies
        self.label_index = label_index
        self.jumps = jumps
        self.combine = combine
        if(skips < 0):
            skips = skips % self.jumps
        self.skips = skips

    def transform(self, data, skips = -1):
        if(skips < 0):
            skips = self.skips
        type = data[self.label_index].iloc[:1]
        if(len(self.drop_indecies) > 0):
            current = data.drop(data.columns[self.drop_indecies], axis=1)
            current = current.drop(self.label_index, axis=1)
        else:
            current = data.drop(self.label_index, axis=1)

        current = pd.concat([current.iloc[::2,:].reset_index(drop=True), current.iloc[1::2,:].reset_index(drop=True)], axis=1)
        li = []
        if(skips < 1):
            skips = 1
        for i in range(0, self.jumps, skips):
            df = current.iloc[i::self.jumps,:]
            comb = []
            for j in range(self.combine):
                sample = df.iloc[j::self.combine]
                comb.append(sample.reset_index(drop=True))
            df = pd.concat(comb, axis=1)
            li.append(df.dropna())
        X = pd.concat(li, axis=0, sort=False).to_numpy()
        y = np.full((X.shape[0], 1), type)
        return (X, y)
    
    def transform_arr(self, df_arr, skips = -1):
        Xl = []
        yl = []
        print(len(df_arr))
        for df in df_arr:
            df.head()
            t = self.transform(df, skips)
            Xl.append(t[0])
            yl.append(t[1])
            #X = np.concatenate((X, t[0]), axis=0)
            #y = np.concatenate((y, t[1]), axis=0)
        X = np.concatenate((Xl), axis=0)
        y = np.concatenate((yl), axis=0)
        return (X, y)
        
            

Small test seeing that the class actually works!

it does so cool,

we want to combine every 2 rows ( right and left hand), then we want,

to sample with jumps ( here its 2 ), and we will combine every 2 jumps.

also if we got a line that has missing properties a.k.a we combined a line with a non existent one,

we will drop that line

In [166]:
test_data = {'Name':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'Age':[20, 21, 19, 18, 20, 21, 19, 18, 17, 16],
        'label':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
test_data2 = {'Name':[11, 22, 33, 44, 55, 66, 77, 88, 99, 1010],
        'Age':[20, 21, 19, 18, 20, 21, 19, 18, 17, 16],
        'label':[2, 2, 2, 2, 2, 2, 2, 2, 2, 2]}
 
test_df = pd.DataFrame(test_data)
test_df2 = pd.DataFrame(test_data2)
li = [test_df, test_df2]

# Create DataFrame

test_df
combinerTest = DataToNP('label', 2, combine=2)
combinerTest.transform(test_df)
combinerTest.transform_arr(li)

Unnamed: 0,Name,Age,label
0,1,20,1
1,2,21,1
2,3,19,1
3,4,18,1
4,5,20,1
5,6,21,1
6,7,19,1
7,8,18,1
8,9,17,1
9,10,16,1


(array([[ 1., 20.,  2., 21.,  5., 20.,  6., 21.],
        [ 3., 19.,  4., 18.,  7., 19.,  8., 18.]]),
 array([[1],
        [1]], dtype=int64))

2


(array([[ 1., 20.,  2., 21.,  5., 20.,  6., 21.],
        [ 3., 19.,  4., 18.,  7., 19.,  8., 18.],
        [11., 20., 22., 21., 55., 20., 66., 21.],
        [33., 19., 44., 18., 77., 19., 88., 18.]]),
 array([[1],
        [1],
        [2],
        [2]], dtype=int64))

In [167]:
jumps = 15
combine = 5
skip = 3
label = 'state'

MyCombiner = DataToNP(label_index = label, skips=skip, jumps=jumps, combine=combine)

# test if we get the same dementions its working

MyCombiner.transform(training.dataRaw[4])[0].shape
(training.dataRaw[4].shape[1] - 1) * combine * 2
rows_afterJoining = (training.dataRaw[4].shape[0]/2) 
import math
int(int(int(rows_afterJoining/jumps )/combine) * math.ceil(jumps / skip))

(230, 190)

190

230

Loots good so the combiner works as expected!

there should be no problems with multiple dataFrames as it just doing the same function and combining the end result

In [184]:
X_train_full, y_train_full = MyCombiner.transform_arr(training.dataRaw)

27


array([[1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
        0, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 2,
        2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0,
        1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0,
        0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 1,
        1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 2, 2]],
      dtype=int64)