In [12]:
#https://www.kaggle.com/daisukelab/creating-fat2019-preprocessed-data/output

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import IPython
import IPython.display
import PIL
import pickle


# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
# Any results you write to the current directory are saved as output.


import librosa
import librosa.display
import random
import os

## Setting the paarameters for Spectrogaram 

In [13]:
#Top Notebook in cometition to preprocess:https://www.kaggle.com/daisukelab/cnn-2d-basic-solution-powered-by-fast-ai

class conf:
    sampling_rate = 44100
    duration = 2 # sec
    hop_length = 347*duration # to make time steps 128
    fmin = 20
    fmax = sampling_rate // 2
    n_mels = 128
    n_fft = n_mels * 20
    padmode = 'constant'
    samples = sampling_rate * duration

# Spectrogram Rows and columns masking function for one of Augumentation 

In [14]:
# Function to augument images by masking random columns and rows, Only applied for CV and Train Data


def timeFreqMasking(conf,X: np.ndarray, num_mask=2, 
                 freq_masking_max_percentage=0.15, time_masking_max_percentage=0.2,debug_display=False):

    X = X.copy()
    all_frames_num, all_freqs_num = X.shape
    time_num,freq_num = X.shape
    for i in range(num_mask):
        
        #### Frequency Masking ####
        freq_percentage = np.random.uniform(0.0, freq_masking_max_percentage)
        
        num_freqs_to_mask = int(freq_percentage * freq_num)
        f0 = int(np.random.uniform(low=0.0, high=freq_num - num_freqs_to_mask))
        #f0 = int(f0)
        X[:, f0:f0 + num_freqs_to_mask] = 0
        
        #### Time Masking ####
        time_percentage = np.random.uniform(0.0, time_masking_max_percentage)
        
        num_frames_to_mask = int(time_percentage * time_num)
        t0 = int(np.random.uniform(low=0.0, high=time_num - num_frames_to_mask))
        #t0 = int(t0)
        X[t0:t0 + num_frames_to_mask, :] =0
    return X

In [15]:
def read_audio(conf, pathname, trim_long_data):
    y, sr = librosa.load(pathname, sr=conf.sampling_rate)
    # trim silence
    if 0 < len(y): # workaround: 0 length causes error
        y, _ = librosa.effects.trim(y) # trim, top_db=default(60)
    # make it unified length to conf.samples
    if len(y) > conf.samples: # long enough
        if trim_long_data:
            y = y[0:0+conf.samples]
    else: # pad blank
        padding = conf.samples - len(y)    # add padding at both ends
        offset = padding // 2
        y = np.pad(y, (offset, conf.samples - len(y) - offset), conf.padmode)
    return y



def audio_to_melspectrogram(conf, audio):
    spectrogram = librosa.feature.melspectrogram(audio, 
                                                 sr=conf.sampling_rate,
                                                 n_mels=conf.n_mels,
                                                 hop_length=conf.hop_length,
                                                 n_fft=conf.n_fft,
                                                 fmin=conf.fmin,
                                                 fmax=conf.fmax)
    spectrogram = librosa.power_to_db(spectrogram)
    spectrogram = spectrogram.astype(np.float32)
    return spectrogram


def show_melspectrogram(conf, mels, title='Log-frequency power spectrogram'):
    librosa.display.specshow(mels, x_axis='time', y_axis='mel', 
                             sr=conf.sampling_rate, hop_length=conf.hop_length,
                            fmin=conf.fmin, fmax=conf.fmax)
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.show()


def read_as_melspectrogram(conf, pathname, trim_long_data, debug_display=False):
    x = read_audio(conf, pathname, trim_long_data)
    mels = audio_to_melspectrogram(conf, x)
    if debug_display:
        IPython.display.display(IPython.display.Audio(x, rate=conf.sampling_rate))
        show_melspectrogram(conf, mels)
    return mels

def convert_wav_to_image(df, source):
    X = []
    for i, row in tqdm_notebook(df.iterrows()):
        x = read_as_melspectrogram(conf, source/str(row.fname), trim_long_data=False)
        x_color = mono_to_color(x)
        X.append(x_color)
    return X

def get_default_conf():
    return conf


## Creating Directories for processed images

In [16]:
!mkdir .\noisy_cv
!mkdir .\noisy_train
!mkdir .\noisy_test
!mkdir .\noisy_cv_aug
!mkdir .\noisy_train_aug

A subdirectory or file .\noisy_cv already exists.
A subdirectory or file .\noisy_train already exists.
A subdirectory or file .\noisy_test already exists.
A subdirectory or file .\noisy_cv_aug already exists.
A subdirectory or file .\noisy_train_aug already exists.


In [17]:
!mkdir .\curated_cv
!mkdir .\curated_train
!mkdir .\curated_test
!mkdir .\curated_cv_aug
!mkdir .\curated_train_aug

A subdirectory or file .\curated_cv already exists.
A subdirectory or file .\curated_train already exists.
A subdirectory or file .\curated_test already exists.
A subdirectory or file .\curated_cv_aug already exists.
A subdirectory or file .\curated_train_aug already exists.


In [18]:
!mkdir .\sub

A subdirectory or file .\sub already exists.


In [19]:
DATA_DIR="../Applied AI/SC 2 Final/Freesound data/"

In [20]:
train_curated = pd.read_csv(DATA_DIR+"/train_curated.csv")
train_noisy = pd.read_csv(DATA_DIR+"/train_noisy.csv")

In [21]:
train_curated.shape

(4970, 2)

In [11]:
train_noisy.shape

(19815, 2)

## Checking Number of labels for Each 80 Class for Noisy data

In [44]:
labels_train_curated=mlb_train.fit_transform([ i.split(",") for i in list(train_noisy["labels"])])
pd.DataFrame({"labels_num":list(np.sum(labels_train_curated, axis=0)),"labels":list(mlb_train.classes_)}).head(10)

Unnamed: 0,labels_num,labels
0,300,Accelerating_and_revving_and_vroom
1,300,Accordion
2,300,Acoustic_guitar
3,300,Applause
4,300,Bark
5,300,Bass_drum
6,300,Bass_guitar
7,300,Bathtub_(filling_or_washing)
8,300,Bicycle_bell
9,300,Burping_and_eructation


In [22]:
train_noisy.tail()

Unnamed: 0,fname,labels
19810,fffc7128.wav,Accordion
19811,fffcf57b.wav,Acoustic_guitar
19812,fffd1871.wav,"Water_tap_and_faucet,Sink_(filling_or_washing)"
19813,fffe9808.wav,Clapping
19814,ffff6da3.wav,Walk_and_footsteps


## Splting the Data noisy and curated data into train, cv and test

In [34]:
# Curated Data
from sklearn.model_selection import train_test_split

X_train_cv_curated, X_test_curated, y_train_cv_curated, y_test_curated = train_test_split(train_curated["fname"],train_curated["labels"], test_size = 0.15, random_state = 42)
X_train_curated, X_cv_curated, y_train_curated, y_cv_curated = train_test_split(X_train_cv_curated,y_train_cv_curated, test_size = 0.20, random_state = 42)

In [39]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb_train = MultiLabelBinarizer()


labels_train = mlb_train.fit_transform([ i.split(",") for i in list(train_curated["labels"])])

labels_train.shape

(4970, 80)

In [None]:

labels_test = mlb_train.transform([ i.split(",") for i in list(test_df["labels"])])


#mlb_cv = MultiLabelBinarizer()
labels_cv = mlb_train.transform([ i.split(",") for i in list(cv_df["labels"])])

In [37]:
X_train_curated.shape

(3379,)

In [26]:
X_train_curated.shape

(3379,)

In [27]:
X_cv_curated.shape

(845,)

In [28]:
X_test_curated.shape

(746,)

In [29]:
# Noisy Data

X_train_cv_noisy, X_test_noisy, y_train_cv_noisy, y_test_noisy = train_test_split(train_noisy["fname"],train_noisy["labels"], test_size = 0.15, random_state = 42)
X_train_noisy, X_cv_noisy, y_train_noisy, y_cv_noisy = train_test_split(X_train_cv_noisy,y_train_cv_noisy, test_size = 0.20, random_state = 42)

In [30]:
X_train_noisy.shape

(13473,)

In [31]:
X_cv_noisy.shape

(3369,)

In [32]:
X_test_noisy.shape

(2973,)

## Function for creating Spectrogram via librosa lib

In [18]:
def create_spectrogram(x,name,data_type):
    plt.interactive(False)
    #clip, sample_rate = librosa.load(filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    librosa.display.specshow(x, x_axis='time', y_axis='mel', 
                             sr=conf.sampling_rate, hop_length=conf.hop_length,
                            fmin=conf.fmin, fmax=conf.fmax)
    #show_melspectrogram(conf, x)
    filename  = Path('./'+data_type+'/' + name + '.jpg')
    fig.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del name,fig,ax 

In [19]:
def create_spectrogram_aug(x,name,data_type):
    
    ### Aumentation ###
    plt.interactive(False)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    librosa.display.specshow(x, x_axis='time', y_axis='mel', 
                             sr=conf.sampling_rate, hop_length=conf.hop_length,
                            fmin=conf.fmin, fmax=conf.fmax)
    filename  = './'+data_type+'/'+name+'_aug' +'.jpg'
    plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del name,fig,ax 

# Converting Noisy data to Spectrogram images

In [20]:
conf = get_default_conf()

# Will unzip the files so that you can see them..
import zipfile         

In [21]:
list(X_train_noisy)[:10]

['d805ffd8.wav',
 '9620e8ef.wav',
 '7ce5c78d.wav',
 '10b68b30.wav',
 'd29ecb7a.wav',
 '74b7b200.wav',
 '4f694df8.wav',
 '97f96db0.wav',
 'f3540f19.wav',
 '60a0615c.wav']

In [22]:
#https://stackoverflow.com/questions/1855095/how-to-create-a-zip-archive-of-a-directory-in-python
def zipdir(path, ziph):
    # ziph is zipfile handle
    for root, dirs, files in os.walk(path):
        for file in files:
            ziph.write(os.path.join(root, file))

In [23]:
sub_df=pd.read_csv(DATA_DIR+"sample_submission.csv")
sub_df["fname"].head()

0    4260ebea.wav
1    426eb1e0.wav
2    428d70bb.wav
3    4292b1c9.wav
4    429c5071.wav
Name: fname, dtype: object

In [24]:
from tqdm import tqdm 

## SUB

In [25]:
with zipfile.ZipFile(DATA_DIR+"test.zip","r") as z:
    listOfFileNames = z.namelist()
    for fileName in tqdm(list(sub_df["fname"])):
             z.extract(fileName,"sub/.wav")
             x=read_as_melspectrogram(conf, "./sub/.wav/"+fileName, trim_long_data=False,debug_display=False)
             create_spectrogram(x,fileName.split(".")[0],"sub")
             #!rm -rf ./sub/.wav/*

            
zipf = zipfile.ZipFile('sub.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir('./sub/', zipf)
zipf.close()


100%|██████████████████████████████████████████████████████████████████████████████| 3361/3361 [12:36<00:00,  2.90it/s]


## Noisy Train

In [26]:
with zipfile.ZipFile(DATA_DIR+"train_noisy.zip","r") as z:
    listOfFileNames = z.namelist()
    for fileName in tqdm(list(X_train_noisy)):
             z.extract(fileName,"./noisy_train/.wav")
             x=read_as_melspectrogram(conf, "./noisy_train/.wav/"+fileName, trim_long_data=False,debug_display=False)
             create_spectrogram(x,fileName.split(".")[0],"noisy_train")
             create_spectrogram_aug(timeFreqMasking(conf,x),fileName.split(".")[0],"noisy_train_aug")
             #!rm -rf noisy_train/.wav/*

            
#zipf = zipfile.ZipFile('noisy_train.zip', 'w', zipfile.ZIP_DEFLATED)
#zipdir('/kaggle/working/noisy_train/', zipf)
#zipf.close()




100%|██████████████████████████████████████████████████████████████████████████| 13473/13473 [1:56:25<00:00,  1.86it/s]


# Noisy CV

In [None]:
with zipfile.ZipFile(DATA_DIR+"train_noisy.zip","r") as z:
    listOfFileNames = z.namelist()
    for fileName in list(X_cv_noisy):
             z.extract(fileName,"./noisy_cv/.wav")
             x=read_as_melspectrogram(conf, "./noisy_cv/.wav/"+fileName, trim_long_data=False,debug_display=False)
             create_spectrogram(x,fileName.split(".")[0],"noisy_cv")
             create_spectrogram_aug(timeFreqMasking(conf,x),fileName.split(".")[0],"noisy_cv_aug")
             #!rm -rf noisy_cv/.wav/*

## Noisy Test

In [None]:
###

In [26]:
with zipfile.ZipFile(DATA_DIR+"train_noisy.zip","r") as z:
    listOfFileNames = z.namelist()
    for fileName in list(X_test_noisy):
             z.extract(fileName,"./noisy_test/.wav")
             x=read_as_melspectrogram(conf, "./noisy_test/.wav/"+fileName, trim_long_data=False,debug_display=False)
             create_spectrogram(x,fileName.split(".")[0],"noisy_test")
             #!rm -rf noisy_test/.wav/*


# Curated Train

In [None]:
with zipfile.ZipFile(DATA_DIR+"train_curated.zip","r") as z:
    listOfFileNames = z.namelist()
    for fileName in list(X_train_curated):
             z.extract(fileName,"curated_train/.wav")
             x=read_as_melspectrogram(conf, "./curated_train/.wav/"+fileName, trim_long_data=False,debug_display=False)
             create_spectrogram(x,fileName.split(".")[0],"curated_train")
             create_spectrogram_aug(timeFreqMasking(conf,x),fileName.split(".")[0],"curated_train_aug")
             #!rm -rf curated_train/.wav/*

            

## Curated CV

In [None]:
with  zipfile.ZipFile(DATA_DIR+"train_curated.zip","r") as z:
    listOfFileNames = z.namelist()
    for fileName in list(X_cv_curated):
             z.extract(fileName,"./curated_cv/.wav")
             x=read_as_melspectrogram(conf, "./curated_cv/.wav/"+fileName, trim_long_data=False,debug_display=False)
             create_spectrogram(x,fileName.split(".")[0],"curated_cv")
             create_spectrogram_aug(timeFreqMasking(conf,x),fileName.split(".")[0],"curated_cv_aug")
            

## Curated test

In [27]:
with zipfile.ZipFile(DATA_DIR+"train_curated.zip","r") as z:
    listOfFileNames = z.namelist()
    for fileName in tqdm(list(X_test_curated)):
             z.extract(fileName,"./curated_test/.wav")
             x=read_as_melspectrogram(conf, "./curated_test/.wav/"+fileName, trim_long_data=False,debug_display=False)
             create_spectrogram(x,fileName.split(".")[0],"curated_test")
             #!rm -rf curated_test/.wav/*

100%|████████████████████████████████████████████████████████████████████████████████| 746/746 [02:09<00:00,  7.04it/s]


## Saving the labels

In [None]:
Y_data={"Y_noisy_train":y_train_noisy,"Y_noisy_cv":y_cv_noisy,"Y_noisy_test":y_test_noisy,
       "Y_curated_train":y_train_curated,"Y_curated_cv":y_cv_curated,"Y_noisy_curated":y_test_curated}

In [None]:
import pickle
pickle.dump( Y_data, open( "data_final_Y.pkl", "wb" ) )