# Exploratory Data Analysis

In [1]:
import sys
sys.path.append("../")

import pandas as pd
import tensorflow as tf
import tqdm
import matplotlib.pyplot as plt
import numpy as np

import dataloader

%matplotlib inline

In [2]:
df_audio = pd.read_csv("../dataset/audio_list.csv")

In [3]:
len(df_audio)

87

Only 87?...


In [4]:
df_audio.head()

Unnamed: 0,Index,FileName,set
0,0,WaveDrum02_51#MIX,0
1,1,WaveDrum02_30#MIX,0
2,2,WaveDrum01_02#MIX,0
3,3,WaveDrum02_44#MIX,0
4,4,RealDrum01_05#MIX,0


## What kind of drum types do we have

In [5]:
df_audio.FileName.apply(lambda x: x.split("_")[0][:-2]).value_counts()

WaveDrum      60
RealDrum      15
TechnoDrum    12
Name: FileName, dtype: int64

According to dataloader.py  
**0 : Validation set (default: val_set_number=0)
1,2 : Train set**  
Assuming this is for cross validation, do we report cross-validation F1-Score?

In [6]:
df_audio.groupby("set")["Index"].count()

set
0    29
1    29
2    29
Name: Index, dtype: int64

# Labels

In [7]:
labels = pd.read_pickle("../dataset/labels.pkl")

In [8]:
df_labels = pd.DataFrame(labels)

In [9]:
df_labels

Unnamed: 0,RealDrum01_00#MIX,RealDrum01_02#MIX,RealDrum01_04#MIX,RealDrum01_05#MIX,RealDrum01_06#MIX,RealDrum01_08#MIX,RealDrum01_09#MIX,RealDrum01_10#MIX,RealDrum01_12#MIX,RealDrum01_13#MIX,...,WaveDrum02_50#MIX,WaveDrum02_51#MIX,WaveDrum02_53#MIX,WaveDrum02_54#MIX,WaveDrum02_55#MIX,WaveDrum02_56#MIX,WaveDrum02_57#MIX,WaveDrum02_58#MIX,WaveDrum02_59#MIX,WaveDrum02_60#MIX
0,"[1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, ...","[1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, ...","[1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, ...","[1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, ...","[1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, ...","[1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, ...","[1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, ...","[0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, ...","[0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, ...","[0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, ...",...,"[1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, ...","[1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, ...","[1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, ...","[1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, ...","[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, ...","[0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, ..."
1,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, ...","[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, ...","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...",...,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, ...","[1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, ...","[1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, ...","[1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ...","[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...","[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...","[1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, ...","[1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, ..."
2,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...",...,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [10]:
len(df_labels.iloc[0,0])

200

# Dataloader

In [11]:
training_loader = dataloader.dataLoader('../dataset/audio_list.csv', '../dataset/labels.pkl', 2, val_set_number = 0, is_train_mode=True)

In [12]:
X, y  = training_loader.next_batch()

name_list ['WaveDrum02_57#MIX', 'RealDrum01_16#MIX']


In [13]:
np.array(X).shape

(2, 20, 1723)

In [14]:
y.shape

(2, 3, 200)