In [21]:
import os, shutil, re
import pandas as pd
from helpers import *
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile
import librosa
import librosa.display
from helpers import *
%matplotlib inline

In [53]:
# replace this with your root directory
ROOT = os.getcwd() + "/openbci_data/"
IMG_ROOT = os.getcwd() + "/images_scaled/no_voice/09_20/"
pattern = "[0-9]{2}_[0-9]{2}"

if not os.path.isdir(IMG_ROOT):
    os.mkdir(IMG_ROOT)
    os.mkdir(IMG_ROOT + "yes")
    os.mkdir(IMG_ROOT + "no")
    os.mkdir(IMG_ROOT + "yes/ch1")
    os.mkdir(IMG_ROOT + "yes/ch3")
    os.mkdir(IMG_ROOT + "no/ch1")
    os.mkdir(IMG_ROOT + "no/ch3")

"""
Expected directory structure:
[INSIDE ROOT DIRECTORY]
---- [category] voice, no_voice
-------- [date] 07_02, 07_09, ...
------------ [label] down, go, ...
---------------- [channel] ch1, ch2, ...
-------------------- [wave files] *.wav
"""
# VARS 

VALID_LABELS = ["yes", "no", "stop", "go", "right", "left", "down", "up", "on", "off", "test"]
USELESS = ["ch2", "ch4", "na1", "na2", "na3"]
IMG_EXT = ".png"
VERBOSITY = 1000

In [34]:
def timestamp2milsec(timestamp):
    timesplit = timestamp.split('.')
    if len(timesplit) == 2:
        milsec = int(timesplit[1])
        time = timesplit[0].split(':')
        time_milsec = int(time[0]) * 60 * 60 * 1000 + int(time[1]) * 60 * 1000 + int(time[2]) * 1000 + milsec
        return time_milsec

In [52]:
# to do: make it so we can change label number dynamically

def make_image(channel_data, ch_name): 
    channel = channel_data[ch_name]
    time = range(400)
    samples = len(channel) // 400
    x_counter = 0
    y_counter = 0
    i = 0
    while i < samples:
        curr = channel.iloc[:400]
        fig = plt.figure(figsize=(1.28, 1.28), dpi=100, frameon=False)
        plt.plot(time, curr)
        plt.ylim(-700, 700)
        fig.subplots_adjust(bottom = 0)
        fig.subplots_adjust(top = 1)
        fig.subplots_adjust(right = 1)
        fig.subplots_adjust(left = 0)
        plt.axis('off')
        if i % 2 == 0:
            fig.savefig(IMG_ROOT + "/no/" + ch_name + "/" + str(x_counter).zfill(4) + IMG_EXT, pad_inches=0)
            x_counter += 1
        else:
            fig.savefig(IMG_ROOT + "/yes/" + ch_name + "/" + str(y_counter).zfill(4) + IMG_EXT, pad_inches=0)
            y_counter += 1
        plt.close()
        channel = channel.iloc[400:]
        i += 1

In [46]:
# read csv(txt) file and parse into the dataframe we want

df = pd.read_csv("./openbci_data/taylor_9_21/9_21.txt", ",")
df = df.drop(columns=USELESS)
df['timestamp'] = df['timestamp'].map(timestamp2milsec)
df['timestamp'] = df['timestamp'].map(lambda x: x - df['timestamp'].iloc[0])
df = df.dropna()
df.head()

Unnamed: 0,sr_id,ch1,ch3,timestamp
0,0,-53.42,-20.09,0.0
1,1,31.11,29.78,15.0
2,2,53.66,20.54,15.0
3,3,-47.1,-22.9,29.0
4,4,3.03,15.89,29.0


In [54]:
make_image(df, 'ch3')
make_image(df, 'ch1')

In [None]:
def preprocess(samples, sample_rate, multiplier=1):
    sr = sample_rate * multiplier
    padded = np.zeros(sr)
    samples = samples[:sr]
    padded[:samples.shape[0]] = samples
    return padded

def make_dir(path):
    if not os.path.isdir(path):
        os.mkdir(path)

In [None]:
def process(input_dir, output_dir, overwrite=False):
    items = 0
    created = 0
    found = 0
    date_mult = {"08_08":2, "08_11":2, "09_20":2}
    plt.ioff()
    for date in [x for x in os.listdir(input_dir) if re.match(pattern, x)]:
        multiplier = 1
        if date in date_mult:
            multiplier = date_mult[date]
        date_path = os.path.join(input_dir, date)
        o_date_path = os.path.join(output_dir, date)
        make_dir(o_date_path)
        for label in [d for d in os.listdir(date_path) if d in VALID_LABELS]:
            label_path = os.path.join(date_path, label)
            o_label_path = os.path.join(o_date_path, label)
            make_dir(o_label_path)
            print("\tProcessing: {}".format(label_path))
            print("\tTime: {}".format(curr_time()))
            for channel in [d for d in os.listdir(label_path) if d.startswith("ch")]:
                voice = False
                ch = int(channel[2:])
                if ch == 4 or ch >= 9:
                    voice = True
                channel_path = os.path.join(label_path, channel)
                o_channel_path = os.path.join(o_label_path, channel)
                make_dir(o_channel_path)
                channel_num = channel[-1]
                for file in [f for f in os.listdir(channel_path) if f.endswith(".wav")]:
                    items += 1
                    wavpath = os.path.join(channel_path, file)
                    imgpath = os.path.join(o_channel_path, file[:-4] + IMG_EXT)
                    if overwrite or not os.path.isfile(imgpath):
                        created += 1
                        if items % VERBOSITY == 0:
                            print("\t\tCreated {}th image".format(items))
                        sample_rate, samples = wavfile.read(wavpath)
                        samples = preprocess(samples, sample_rate, multiplier)
#                         freqs, times, spectrogram = signal.spectrogram(samples, sample_rate)
                        if voice:
                            S = librosa.feature.melspectrogram(samples, sr=sample_rate, n_mels=128)
                        else:
                            S = librosa.feature.melspectrogram(samples, sr=sample_rate, n_mels=128, fmax=512)
                        log_S = librosa.power_to_db(S, ref=np.max)
                        fig = plt.figure(figsize=(1.28, 1.28), dpi=100, frameon=False)
                        ax = plt.Axes(fig, [0., 0., 1., 1.])
                        ax.set_axis_off()
                        fig.add_axes(ax)
                        plt.axis('off')
                        librosa.display.specshow(log_S)                          
                        plt.savefig(imgpath)
                        plt.close()
                    else:
                        found += 1
                        if items % VERBOSITY == 0:
                            print("\t\tFound {}th image".format(items))
    print("\tFound:\t\t{}\n\tCreated:\t{}".format(found, created))
    plt.ion()

In [None]:
dir_pairs = {
    ROOT+"voice":IMG_ROOT+"voice",
    ROOT+"no_voice":IMG_ROOT+"no_voice"
}

In [None]:
for input_dir in dir_pairs:
    output_dir = dir_pairs[input_dir]
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    timer(process, input_dir, output_dir)

In [None]:
# replace this with your root directory
ROOT = os.getcwd() + "/images_scaled/"
OUTPUT = os.getcwd() + "/test.csv"
DATE_PATTERN = "[0-9]{2}_[0-9]{2}"
VALID_LABELS = ["yes", "no", "stop", "go", "right", "left", "down", "up", "on", "off", "test"]
IMG_EXT = ".png"
VERBOSITY = 100
NUM_CHANNELS = 22
CATS, MONTHS, DAYS, LABELS, SEQ, SETS = [], [], [], [], [], []
for i in range(1, NUM_CHANNELS+1):
    globals()["PATH{}".format(i)] = []

In [None]:
def make_df_from_images(image_root):
    for cat in [d for d in os.listdir(image_root) if "voice" in d]:
        cat_path = os.path.join(image_root, cat)
        for date in [d for d in os.listdir(cat_path) if re.match(DATE_PATTERN, d)]:
            print("\tProcessing {}".format(date))
            date_path = os.path.join(cat_path, date)
            month = int(date[:2])
            day = int(date[3:])
            date_count = 0
            for label in [d for d in os.listdir(date_path) if d in VALID_LABELS]:
                label_path = os.path.join(date_path, label)
                placeholder = os.path.join(label_path, "ch1")
                for image in [f for f in os.listdir(placeholder) if f.endswith(IMG_EXT)]:
                    date_count += 1
                    for i in range(1, NUM_CHANNELS+1):
                        p = os.path.join(label_path, "ch{}".format(i), image)
                        if os.path.exists(p):
                            globals()["PATH{}".format(i)].append(p)
                        else:
                            globals()["PATH{}".format(i)].append(float('nan'))
                    CATS.append(cat)
                    DAYS.append(day)
                    MONTHS.append(month)
                    LABELS.append(label)
                    sequence_number = int(image[:-4])
                    basenum = sequence_number % 10
                    SEQ.append(sequence_number)
                    if basenum < 8:
                        SETS.append("Training")
                    elif basenum < 9:
                        SETS.append("Validation")
                    else:
                        SETS.append("Testing")
            print("\t\tProcessed {} sequences".format(date_count))
    d = {
            "Category":CATS,
            "Day":DAYS,
            "Month":MONTHS,
            "Label":LABELS,
            "SequenceNumber":SEQ,
            "Set":SETS
        }
    for i in range(1, NUM_CHANNELS+1):
        d["Path{}".format(i)] = globals()["PATH{}".format(i)]
    return pd.DataFrame(d)

In [None]:
df = timer(make_df_from_images, ROOT)
df.to_csv(OUTPUT, index=False)

In [None]:
print(df.head(5))

In [None]:
print(df.describe())