In [84]:
import pandas as pd
import os, re
from helpers import *
import datetime

In [85]:
############
# input VARS

PERSON = "taylor"
TRYS = 1
VALID_LABELS = ["lights-on", "turn-off","---"]
NUM_CHANNELS = 4
FNAME = "vocal_12_19"

############
# const VARS
ROOT = os.getcwd()
IMG_ROOT = os.path.join(*[ROOT, "images_scaled", FNAME])
OUTPUT = os.path.join(ROOT, "train_csv", FNAME + ".csv")
DATE_PATTERN = "[0-9]{2}_[0-9]{2}"
IMG_EXT = ".png"
VERBOSITY = 100
CATS, MONTHS, DAYS, LABELS, SEQ, SETS = [], [], [], [], [], []
for i in range(1, NUM_CHANNELS+1):
    globals()["PATH{}".format(i)] = []
    
###########
# check dirs

print('-' * 42)
print("IMG Directory:      {}".format(IMG_ROOT))
print("OUTPUT Directory:   {}".format(OUTPUT))
print('-' * 42)

------------------------------------------
IMG Directory:      /Users/kyy/cerebro_train/images_scaled/vocal_12_19
OUTPUT Directory:   /Users/kyy/cerebro_train/train_csv/vocal_12_19.csv
------------------------------------------


In [86]:
def make_df_from_images(image_root):
    for cat in [d for d in os.listdir(image_root) if "voice" in d]:
        cat_path = os.path.join(image_root, cat)
        for date in [d for d in os.listdir(cat_path) if re.match(DATE_PATTERN, d)]:
            print("\tProcessing {}".format(date))
            date_path = os.path.join(cat_path, date)
            month = int(date[:2])
            day = int(date[3:])
            date_count = 0
            for label in [d for d in os.listdir(date_path) if d in VALID_LABELS]:
                label_path = os.path.join(date_path, label)
                placeholder = os.path.join(label_path, "ch1")
                for image in [f for f in os.listdir(placeholder) if f.endswith(IMG_EXT)]:
                    date_count += 1
                    for i in range(1, NUM_CHANNELS+1):
                        p = os.path.join(label_path, "ch{}".format(i), image)
                        if os.path.exists(p):
                            globals()["PATH{}".format(i)].append(p)
                        else:
                            globals()["PATH{}".format(i)].append(float('nan'))
                    CATS.append(cat)
                    DAYS.append(day)
                    MONTHS.append(month)
                    LABELS.append(label)
                    sequence_number = int(image[:-4])
                    basenum = sequence_number % 10
                    SEQ.append(sequence_number)
                    if basenum < 8:
                        SETS.append("Training")
                    elif basenum < 9:
                        SETS.append("Validation")
                    else:
                        SETS.append("Testing")
            print("\t\tProcessed {} sequences".format(date_count))
    d = {
            "Category":CATS,
            "Day":DAYS,
            "Month":MONTHS,
            "Label":LABELS,
            "SequenceNumber":SEQ,
            "Set":SETS
        }
    for i in range(1, NUM_CHANNELS+1):
        d["Path{}".format(i)] = globals()["PATH{}".format(i)]
    return pd.DataFrame(d)

In [87]:
####################################
# turn spectrogram paths to csv file

df = timer(make_df_from_images, IMG_ROOT)
df.to_csv(OUTPUT, index=False)

Start: 2018-12-19 11:42:29.099764
	Processing 12_19
		Processed 5300 sequences
End: 2018-12-19 11:42:29.505706
Finished in 00:00:00:00.406


In [88]:
print(set(df['Set']))
from collections import Counter
c = Counter(df['Set'])

print( c.items() )

{'Validation', 'Testing', 'Training'}
dict_items([('Testing', 529), ('Training', 4242), ('Validation', 529)])


In [89]:
print(df.tail(5))

      Category  Day  Month     Label  SequenceNumber       Set  \
5295  no_voice   19     12  turn-off             361  Training   
5296  no_voice   19     12  turn-off             407  Training   
5297  no_voice   19     12  turn-off            1719   Testing   
5298  no_voice   19     12  turn-off            1094  Training   
5299  no_voice   19     12  turn-off            1080  Training   

                                                  Path1  \
5295  /Users/kyy/cerebro_train/images_scaled/vocal_1...   
5296  /Users/kyy/cerebro_train/images_scaled/vocal_1...   
5297  /Users/kyy/cerebro_train/images_scaled/vocal_1...   
5298  /Users/kyy/cerebro_train/images_scaled/vocal_1...   
5299  /Users/kyy/cerebro_train/images_scaled/vocal_1...   

                                                  Path2  \
5295  /Users/kyy/cerebro_train/images_scaled/vocal_1...   
5296  /Users/kyy/cerebro_train/images_scaled/vocal_1...   
5297  /Users/kyy/cerebro_train/images_scaled/vocal_1...   
5298  /Users

In [90]:
print(df.describe())

          Day   Month  SequenceNumber
count  5300.0  5300.0     5300.000000
mean     19.0    12.0      883.837358
std       0.0     0.0      510.047755
min      19.0    12.0        1.000000
25%      19.0    12.0      442.000000
50%      19.0    12.0      884.000000
75%      19.0    12.0     1325.250000
max      19.0    12.0     1772.000000


In [91]:
df

Unnamed: 0,Category,Day,Month,Label,SequenceNumber,Set,Path1,Path2,Path3,Path4
0,no_voice,19,12,---,1099,Testing,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...
1,no_voice,19,12,---,387,Training,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...
2,no_voice,19,12,---,393,Training,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...
3,no_voice,19,12,---,1700,Training,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...
4,no_voice,19,12,---,378,Validation,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...
5,no_voice,19,12,---,1066,Training,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...
6,no_voice,19,12,---,1072,Training,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...
7,no_voice,19,12,---,1714,Training,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...
8,no_voice,19,12,---,422,Training,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...
9,no_voice,19,12,---,344,Training,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...,/Users/kyy/cerebro_train/images_scaled/vocal_1...
