In [27]:
import pandas as pd
import os, re
from helpers import *
import datetime

In [29]:
############
# input VARS

PERSON = "taylor"
TRYS = 1
VALID_LABELS = ["lights-on", "turn-off","---"]
NUM_CHANNELS = 4
FNAME = "osc_test"

############
# const VARS
ROOT = os.getcwd()
IMG_ROOT = os.path.join(*[ROOT, "images_scaled", FNAME])
OUTPUT = os.path.join(ROOT, "train_csv", FNAME + ".csv")
DATE_PATTERN = "[0-9]{2}_[0-9]{2}"
IMG_EXT = ".png"
VERBOSITY = 100
CATS, MONTHS, DAYS, LABELS, SEQ, SETS = [], [], [], [], [], []
for i in range(1, NUM_CHANNELS+1):
    globals()["PATH{}".format(i)] = []
    
###########
# check dirs

print('-' * 42)
print("IMG Directory:      {}".format(IMG_ROOT))
print("OUTPUT Directory:   {}".format(OUTPUT))
print('-' * 42)

------------------------------------------
IMG Directory:      /Users/kyy/cerebro_train/images_scaled/osc_test
OUTPUT Directory:   /Users/kyy/cerebro_train/train_csv/osc_test.csv
------------------------------------------


In [30]:
def make_df_from_images(image_root):
    for cat in [d for d in os.listdir(image_root) if "voice" in d]:
        cat_path = os.path.join(image_root, cat)
        for date in [d for d in os.listdir(cat_path) if re.match(DATE_PATTERN, d)]:
            print("\tProcessing {}".format(date))
            date_path = os.path.join(cat_path, date)
            month = int(date[:2])
            day = int(date[3:])
            date_count = 0
            for label in [d for d in os.listdir(date_path) if d in VALID_LABELS]:
                label_path = os.path.join(date_path, label)
                placeholder = os.path.join(label_path, "ch1")
                for image in [f for f in os.listdir(placeholder) if f.endswith(IMG_EXT)]:
                    date_count += 1
                    for i in range(1, NUM_CHANNELS+1):
                        p = os.path.join(label_path, "ch{}".format(i), image)
                        if os.path.exists(p):
                            globals()["PATH{}".format(i)].append(p)
                        else:
                            globals()["PATH{}".format(i)].append(float('nan'))
                    CATS.append(cat)
                    DAYS.append(day)
                    MONTHS.append(month)
                    LABELS.append(label)
                    sequence_number = int(image[:-4])
                    basenum = sequence_number % 10
                    SEQ.append(sequence_number)
                    if basenum < 8:
                        SETS.append("Training")
                    elif basenum < 9:
                        SETS.append("Validation")
                    else:
                        SETS.append("Testing")
            print("\t\tProcessed {} sequences".format(date_count))
    d = {
            "Category":CATS,
            "Day":DAYS,
            "Month":MONTHS,
            "Label":LABELS,
            "SequenceNumber":SEQ,
            "Set":SETS
        }
    for i in range(1, NUM_CHANNELS+1):
        d["Path{}".format(i)] = globals()["PATH{}".format(i)]
    return pd.DataFrame(d)

In [31]:
####################################
# turn spectrogram paths to csv file

df = timer(make_df_from_images, IMG_ROOT)
df.to_csv(OUTPUT, index=False)

Start: 2018-12-16 08:08:13.485227
	Processing 12_11
		Processed 5280 sequences
	Processing 12_13
		Processed 5220 sequences
	Processing 12_14
		Processed 5298 sequences
	Processing 12_12
		Processed 5274 sequences
End: 2018-12-16 08:08:17.216321
Finished in 00:00:00:03.731


In [32]:
print(set(df['Set']))
from collections import Counter
c = Counter(df['Set'])

print( c.items() )

{'Validation', 'Testing', 'Training'}
dict_items([('Training', 16860), ('Testing', 2106), ('Validation', 2106)])


In [33]:
print(df.tail(5))

       Category  Day  Month     Label  SequenceNumber       Set  \
21067  no_voice   12     12  turn-off            1094  Training   
21068  no_voice   12     12  turn-off            2589   Testing   
21069  no_voice   12     12  turn-off            1080  Training   
21070  no_voice   12     12  turn-off            1916  Training   
21071  no_voice   12     12  turn-off            1902  Training   

                                                   Path1  \
21067  /Users/kyy/cerebro_train/images_scaled/osc_tes...   
21068  /Users/kyy/cerebro_train/images_scaled/osc_tes...   
21069  /Users/kyy/cerebro_train/images_scaled/osc_tes...   
21070  /Users/kyy/cerebro_train/images_scaled/osc_tes...   
21071  /Users/kyy/cerebro_train/images_scaled/osc_tes...   

                                                   Path2  \
21067  /Users/kyy/cerebro_train/images_scaled/osc_tes...   
21068  /Users/kyy/cerebro_train/images_scaled/osc_tes...   
21069  /Users/kyy/cerebro_train/images_scaled/osc_tes...

In [34]:
print(df.describe())

                Day    Month  SequenceNumber
count  21072.000000  21072.0    21072.000000
mean      12.500000     12.0     1317.541002
std        1.119842      0.0      760.459106
min       11.000000     12.0        1.000000
25%       11.000000     12.0      659.000000
50%       12.000000     12.0     1317.500000
75%       14.000000     12.0     1976.000000
max       14.000000     12.0     2652.000000


In [35]:
df

Unnamed: 0,Category,Day,Month,Label,SequenceNumber,Set,Path1,Path2,Path3,Path4
0,no_voice,11,12,lights-on,1927,Training,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...
1,no_voice,11,12,lights-on,1099,Testing,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...
2,no_voice,11,12,lights-on,387,Training,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...
3,no_voice,11,12,lights-on,2590,Training,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...
4,no_voice,11,12,lights-on,2584,Training,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...
5,no_voice,11,12,lights-on,393,Training,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...
6,no_voice,11,12,lights-on,1933,Training,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...
7,no_voice,11,12,lights-on,2209,Testing,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...
8,no_voice,11,12,lights-on,1700,Training,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...
9,no_voice,11,12,lights-on,378,Validation,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...,/Users/kyy/cerebro_train/images_scaled/osc_tes...
