In [1]:
# Import libraries
## --- Libraries   ---  ##
# File imports and aggregates data from multiple databases
import os
import fnmatch
import pandas as pd
import numpy as np
import random

import tensorflow as tf
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

### Importing data and label
#### The data samples are stored in a separate text files, each containing a single sample. The samples contain 72 features and are of uneven lenghts. The labels are given in a separate text file. The labels have multiple categories based on experience and performance scores.  

In [2]:
# Import data
# Get samples
path = os.getcwd() + '\\..\\..\\Data\\JIGSAWS\\'
surgical_action = 'Knot_Tying'
path_cont = '\\kinematics\\AllGestures\\'
file_list = os.listdir(path + surgical_action + path_cont)

if len(file_list) > 0:
    print("Number of files: %d", len(file_list))
    print("Found samples sucessfully !!")

    # Import labels
label_txt = "\\meta_file_Knot_Tying.txt"
labels = pd.read_csv(path+surgical_action+label_txt, sep="\s+", header=None)
if len(labels) > 0:
    print("Labels imported")
labels.columns = ['sample_name', "experience", "total_score", "ROT", "H", "TAM", "FOO", "OP", "QFP"] 
labels.describe()
# labels.head()


Number of files: %d 40
Found samples sucessfully !!
Labels imported


Unnamed: 0,total_score,ROT,H,TAM,FOO,OP,QFP
count,36.0,36.0,36.0,36.0,36.0,36.0,36.0
mean,14.416667,2.416667,2.25,2.027778,2.583333,2.277778,2.861111
std,5.106718,1.052209,0.937321,0.877858,0.937321,0.974272,0.960737
min,6.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,10.0,1.75,1.75,1.0,2.0,1.0,2.0
50%,14.5,2.5,2.0,2.0,3.0,2.0,3.0
75%,19.0,3.0,3.0,3.0,3.0,3.0,4.0
max,22.0,4.0,4.0,3.0,4.0,4.0,4.0


In [None]:
# Import data
def is_file_empty(file_path):
    # Check if file empty by checking size of file (0 bytes)
    return os.path.exists(file_path) and os.stat(file_path).st_size == 0

print(file_list[0])
samples = []
for file in file_list:
    print("File: " + file)
    file_path = path + surgical_action + path_cont + file
    if is_file_empty(file_path):        
        if file.endswith('.txt') and file[:-4] in labels.values:            
            labels.drop([file[-4]])
    else:
        sample = pd.read_csv(file_path, sep="\s+", header=None)    
        sample.columns = ['m_l_x', 'm_l_y', 'm_l_z',
                          'm_l_r00','m_l_r01','m_l_r02',
                          'm_l_r10','m_l_r11','m_l_r12',
                          'm_l_r20','m_l_r21','m_l_r22',                 
                          'm_l_vx','m_l_vy','m_l_vz',
                          'm_l_avx','m_l_avy','m_l_avz',
                          'm_l_ang',
                          'm_r_x', 'm_r_y', 'm_r_z',
                          'm_r_r00','m_r_r01','m_r_r02',
                          'm_r_r10','m_r_r11','m_r_r12',
                          'm_r_r20','m_r_r21','m_r_r22',
                          'm_r_vx','m_r_vy','m_r_vz',
                          'm_r_avx','m_r_avy','m_r_avz',
                          'm_r_ang',
                          's_l_x', 's_l_y', 's_l_z',
                          's_l_r00','s_l_r01','s_l_r02',
                          's_l_r10','s_l_r11','s_l_r12',
                          's_l_r20','s_l_r21','s_l_r22',
                          's_l_vx','s_l_vy','s_l_vz',
                          's_l_avx','s_l_avy','s_l_avz',
                          's_l_ang',
                          's_r_x', 's_r_y', 's_r_z',
                          's_r_r00','s_r_r01','s_r_r02',
                          's_r_r10','s_r_r11','s_r_r12',
                          's_r_r20','s_r_r21','s_r_r22',                 
                          's_r_vx','s_r_vy','s_r_vz',
                          's_r_avx','s_r_avy','s_r_avz',
                          's_r_ang' ]

        samples.append(sample)

# samples[0].head()

### Label selection
#### Three main classes of labels are Novice, Intermediate and Expert. However there are multiple ways to label samples :
#### 1. Based purely on exprience (Number of hours on the DaVinci)
#### 2. Based on purely on the GRS performance score ( define a range of performance values to label)
#### 3. Consider the average of the two above.
#### 
#### Note:  For our scenario we will select the 2nd method.
