First set up code to visualize a sound form

In [None]:
%pylab inline
import IPython.display as ipd
import librosa
from librosa import load, display
import glob
# import matplotlib.pyplot as plt

In [None]:
# Helper method to desiplay the audio as a playable icon in the notebook.
def show_audio_player(PATH):
    ipd.display(ipd.Audio(PATH))

In [None]:
# Helper method to show the waveform of the audio.
def show_audio_waveform(PATH):
    data, sampling_rate = librosa.load(PATH)
    print("Length of audio in floating point %d" % len(data))
    print("Sampling rate %d" % sampling_rate)
    plt.figure(figsize=(12,4))
    librosa.display.waveplot(data, sr = sampling_rate)

In [None]:
# This is to verify if the audio is currently being displayed or not.
PATH_TO_AUDIO = "data/train/wav/10.wav"
show_audio_waveform(PATH_TO_AUDIO)
show_audio_player(PATH_TO_AUDIO)

Now that we have this, let us check the labels a bit.

In [None]:
import pandas as pd
import numpy as np

In [None]:
# You should change these paths according to the path of the files on your system.
PATH_TO_TRAIN_LABELS = "data/train/train.csv"
PATH_TO_TEST_LABELS = "data/test/test.csv"
PATH_TO_TRAIN_AUDIO_FILES = "data/train/wav/"
PATH_TO_TEST_AUDIO_FILES = "data/test/wav/"
PATH_TO_SUBMISSION = "submission/"

In [None]:
# It is easier to deal with csv if you can load it into a structure you can work with.
# Pandas are the most convenient way to do that and are available with 
# inbuilt functionality to handle csv file.

# Pandas assumes that the first row in your file is the header adn not the actual values.
# This behavior can be overriden by passing header=None as a parameter.
train = pd.read_csv(PATH_TO_TRAIN_LABELS)
test = pd.read_csv(PATH_TO_TEST_LABELS)

In [None]:
# Let us start with checking the distribution of class labels in the test set.
class_counts = train.groupby('Class').count().sort_values('ID')
class_counts

In [None]:
# It is easier to follow the distribution through a bar chart visual.
class_counts.plot(kind='bar',    # Plot a bar chart
        legend=False,    # Turn the Legend off
        width=1,      # Set bar width as 75% of space available
        figsize=(8,5.8),  # Set size of plot in inches
        color=[plt.cm.Paired(np.arange(len(class_counts)))])

In [None]:
# This is an alternate way to plot the same chart using the plot function provided by pandas.
train.Class.value_counts().plot(kind='bar')

In [None]:
# Helper method to pick a random sample and see the waveform adn hear the sample.
def show_random_sample(train, path_to_wav_files):
    random_index = random.choice(train.index)
    print("This sample is a ", train.Class[random_index])
    print("The id of the sample is ", train.ID[random_index])
    file_path = path_to_wav_files + str(train.ID[random_index]) + ".wav"
    show_audio_player(file_path)
    show_audio_waveform(file_path)
    

In [None]:
show_random_sample(train,PATH_TO_TRAIN_AUDIO_FILES )