In [9]:
import pandas as pd
import os, re
from helpers import *
import datetime

In [10]:
############
# input VARS

PERSON = "taylor"
TRYS = 1
VALID_LABELS = ['zero', 'one', 'two', 'three', 'four', 
            'five', 'six', 'seven', 'eight', 'nine',
            'left', 'right', 'stop', 'go', 'up','oneFinger','twoFinger' ,
            'down']
NUM_CHANNELS = 4
FNAME = "arm_01_04"

############
# const VARS
ROOT = os.getcwd()
IMG_ROOT = os.path.join(*[ROOT, "images_scaled", FNAME])
OUTPUT = os.path.join(ROOT, "train_csv", FNAME + ".csv")
DATE_PATTERN = "[0-9]{2}_[0-9]{2}"
IMG_EXT = ".png"
VERBOSITY = 100
CATS, MONTHS, DAYS, LABELS, SEQ, SETS = [], [], [], [], [], []
for i in range(1, NUM_CHANNELS+1):
    globals()["PATH{}".format(i)] = []
    
###########
# check dirs

print('-' * 42)
print("IMG Directory:      {}".format(IMG_ROOT))
print("OUTPUT Directory:   {}".format(OUTPUT))
print('-' * 42)

------------------------------------------
IMG Directory:      /Users/kyy/cerebro_train/images_scaled/arm_01_04
OUTPUT Directory:   /Users/kyy/cerebro_train/train_csv/arm_01_04.csv
------------------------------------------


In [11]:
def make_df_from_images(image_root):
    for cat in [d for d in os.listdir(image_root) if "voice" in d]:
        cat_path = os.path.join(image_root, cat)
        for date in [d for d in os.listdir(cat_path) if re.match(DATE_PATTERN, d)]:
            print("\tProcessing {}".format(date))
            date_path = os.path.join(cat_path, date)
            month = int(date[:2])
            day = int(date[3:])
            date_count = 0
            for label in [d for d in os.listdir(date_path) if d in VALID_LABELS]:
                label_path = os.path.join(date_path, label)
                placeholder = os.path.join(label_path, "ch1")
                for image in [f for f in os.listdir(placeholder) if f.endswith(IMG_EXT)]:
                    date_count += 1
                    for i in range(1, NUM_CHANNELS+1):
                        p = os.path.join(label_path, "ch{}".format(i), image)
                        if os.path.exists(p):
                            globals()["PATH{}".format(i)].append(p)
                        else:
                            globals()["PATH{}".format(i)].append(float('nan'))
                    CATS.append(cat)
                    DAYS.append(day)
                    MONTHS.append(month)
                    LABELS.append(label)
                    sequence_number = int(image[:-4])
                    basenum = sequence_number % 10
                    SEQ.append(sequence_number)
                    if basenum < 8:
                        SETS.append("Training")
                    elif basenum < 9:
                        SETS.append("Validation")
                    else:
                        SETS.append("Testing")
            print("\t\tProcessed {} sequences".format(date_count))
    d = {
            "Category":CATS,
            "Day":DAYS,
            "Month":MONTHS,
            "Label":LABELS,
            "SequenceNumber":SEQ,
            "Set":SETS
        }
    for i in range(1, NUM_CHANNELS+1):
        d["Path{}".format(i)] = globals()["PATH{}".format(i)]
    return pd.DataFrame(d)

In [12]:
####################################
# turn spectrogram paths to csv file

df = timer(make_df_from_images, IMG_ROOT)
df.to_csv(OUTPUT, index=False)

Start: 2019-01-04 06:51:39.266891
	Processing 01_04
		Processed 202 sequences
End: 2019-01-04 06:51:39.280481
Finished in 00:00:00:00.0133


In [13]:
print(set(df['Set']))
from collections import Counter
c = Counter(df['Set'])

print( c.items() )

{'Validation', 'Training', 'Testing'}
dict_items([('Validation', 20), ('Training', 162), ('Testing', 20)])


In [14]:
print(df.tail(5))

     Category  Day  Month      Label  SequenceNumber       Set  \
197  no_voice    4      1  twoFinger              46  Training   
198  no_voice    4      1  twoFinger              52  Training   
199  no_voice    4      1  twoFinger               0  Training   
200  no_voice    4      1  twoFinger              85  Training   
201  no_voice    4      1  twoFinger              91  Training   

                                                 Path1  Path2  Path3  Path4  
197  /Users/kyy/cerebro_train/images_scaled/arm_01_...    NaN    NaN    NaN  
198  /Users/kyy/cerebro_train/images_scaled/arm_01_...    NaN    NaN    NaN  
199  /Users/kyy/cerebro_train/images_scaled/arm_01_...    NaN    NaN    NaN  
200  /Users/kyy/cerebro_train/images_scaled/arm_01_...    NaN    NaN    NaN  
201  /Users/kyy/cerebro_train/images_scaled/arm_01_...    NaN    NaN    NaN  


In [15]:
print(df.describe())

         Day  Month  SequenceNumber  Path2  Path3  Path4
count  202.0  202.0      202.000000    0.0    0.0    0.0
mean     4.0    1.0       50.000000    NaN    NaN    NaN
std      0.0    0.0       29.227194    NaN    NaN    NaN
min      4.0    1.0        0.000000    NaN    NaN    NaN
25%      4.0    1.0       25.000000    NaN    NaN    NaN
50%      4.0    1.0       50.000000    NaN    NaN    NaN
75%      4.0    1.0       75.000000    NaN    NaN    NaN
max      4.0    1.0      100.000000    NaN    NaN    NaN


In [16]:
df

Unnamed: 0,Category,Day,Month,Label,SequenceNumber,Set,Path1,Path2,Path3,Path4
0,no_voice,4,1,oneFinger,88,Validation,/Users/kyy/cerebro_train/images_scaled/arm_01_...,,,
1,no_voice,4,1,oneFinger,77,Training,/Users/kyy/cerebro_train/images_scaled/arm_01_...,,,
2,no_voice,4,1,oneFinger,63,Training,/Users/kyy/cerebro_train/images_scaled/arm_01_...,,,
3,no_voice,4,1,oneFinger,62,Training,/Users/kyy/cerebro_train/images_scaled/arm_01_...,,,
4,no_voice,4,1,oneFinger,76,Training,/Users/kyy/cerebro_train/images_scaled/arm_01_...,,,
5,no_voice,4,1,oneFinger,89,Testing,/Users/kyy/cerebro_train/images_scaled/arm_01_...,,,
6,no_voice,4,1,oneFinger,60,Training,/Users/kyy/cerebro_train/images_scaled/arm_01_...,,,
7,no_voice,4,1,oneFinger,74,Training,/Users/kyy/cerebro_train/images_scaled/arm_01_...,,,
8,no_voice,4,1,oneFinger,48,Validation,/Users/kyy/cerebro_train/images_scaled/arm_01_...,,,
9,no_voice,4,1,oneFinger,49,Testing,/Users/kyy/cerebro_train/images_scaled/arm_01_...,,,
