In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import IPython
import IPython.display
import PIL
import time
import sklearn.metrics
import pickle
import random
import cv2
import librosa

In [None]:
!cp ../input/freesound-audio-tagging-2019/train_noisy/00097e21.wav 00097e21.wav 
!cp ../input/freesound-audio-tagging-2019/train_noisy/000b6cfb.wav 000b6cfb.wav


In [None]:
# load csv
df_train = pd.read_csv("../input/freesound-audio-tagging-2019/train_curated.csv")
df_test = pd.read_csv("../input/freesound-audio-tagging-2019/sample_submission.csv")
df_noise = pd.read_csv("../input/freesound-audio-tagging-2019/train_noisy.csv")
labels = df_test.columns[1:].tolist()

for label in labels:
    df_train[label] = df_train['labels'].apply(lambda x: label in x)
    df_noise[label] = df_noise['labels'].apply(lambda x: label in x)
    
print(df_train.shape, df_noise.shape, df_test.shape)
df_train.head(10)

In [None]:
df_class = pd.read_csv("../input/freesound-additional/freesound_class.csv")
df_class.head()

In [None]:
import librosa
import librosa.display

SR = 44100
MELS = 128
HOP = 347
N_FFT = 128*20
FMIN = 20
FMAX = 44100//2
def read_audio(pathname):
    y, sr = librosa.load(pathname, sr=SR)
    return y

def audio_to_melspectrogram(audio):
    spectrogram = librosa.feature.melspectrogram(audio, 
                                                 sr=SR,
                                                 n_mels=MELS,
                                                 hop_length=HOP,
                                                 n_fft=N_FFT,
                                                 fmin=FMIN,
                                                 fmax=FMAX)
    spectrogram = librosa.power_to_db(spectrogram)
    spectrogram = spectrogram.astype(np.float32)
    return spectrogram

def show_melspectrogram(mels, title='Log-frequency power spectrogram'):
    librosa.display.specshow(mels, x_axis='time', y_axis='mel', 
                             sr=SR, hop_length=HOP,
                            fmin=FMIN, fmax=FMAX)
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.show()
    

def show_wavmel(wav, mels):
    plt.figure(figsize=(12,3))
    plt.subplot(1,2,1)
    plt.plot(np.arange(len(wav))/SR, wav)
    plt.ylim([-1,1])
    plt.title('Waveform')
    plt.xlabel('Time')
    plt.ylabel('Amplitude')
    plt.subplot(1,2,2)
    librosa.display.specshow(mels, x_axis='time', y_axis='mel', 
                             sr=SR, hop_length=HOP,
                            fmin=FMIN, fmax=FMAX)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Log mel spectrogram')
    plt.show()

def read_as_melspectrogram(pathname, debug_display=False):
    x = read_audio(pathname)
    mels = audio_to_melspectrogram(x)
    if debug_display:
        IPython.display.display(IPython.display.Audio(x, rate=SR))
        show_melspectrogram(mels)
    return mels

In [None]:
# train curatedの例
idx = np.random.randint(0,len(df_train))
idx = 0
path_idx = "../input/freesound-audio-tagging-2019/train_curated/{}".format(df_train['fname'][idx])
wav_idx = read_audio(path_idx)
class_en = ""
class_ja = ""
for i in range(80):
    if df_train[labels[i]][idx]==1:
        class_en += "{} ".format(df_class['en'][i])
        class_ja += "{} ".format(df_class['ja'][i])
print("file: {}, class: {}/{}".format(df_train['fname'][idx], class_en, class_ja))
mel_idx = read_as_melspectrogram(path_idx)
show_wavmel(wav_idx[:len(wav_idx2)]*10, mel_idx)
IPython.display.Audio(data=wav_idx*5, rate=SR)

In [None]:
idx2 = df_train[df_train['fname']=='023935e1.wav'].index.values[0]
print(idx2)
path_idx2 = "../input/freesound-audio-tagging-2019/train_curated/{}".format(df_train['fname'][idx2])
wav_idx2 = read_audio(path_idx2)
class_en = ""
class_ja = ""
for i in range(80):
    if df_train[labels[i]][idx2]==1:
        class_en += "{} ".format(df_class['en'][i])
        class_ja += "{} ".format(df_class['ja'][i])
print("file: {}, class: {}/{}".format(df_train['fname'][idx], class_en, class_ja))
mel_idx2 = read_as_melspectrogram(path_idx2)
show_wavmel(wav_idx2, mel_idx2)
IPython.display.Audio(data=wav_idx2, rate=SR)

In [None]:
mel_idx_tmp = mel_idx[:,:mel_idx2.shape[1]]
mel_idx_tmp[0,0] = mel_idx2.max()
wav_mix = wav_idx[:len(wav_idx2)]*0.6*10 + wav_idx2 * 0.4
show_wavmel(wav_mix, mel_idx_tmp)

In [None]:
mel_idx_tmp = mel_idx2
mel_idx_tmp[40:70] = 
show_wavmel(wav_idx2, mel_idx_tmp)

In [None]:
wav_mix = wav_idx[:len(wav_idx2)] * 0.6 + wav_idx2 * 0.4
mel_mix = audio_to_melspectrogram(wav_mix)
mel_mix[0,0] = mel_idx.min()
show_wavmel(wav_mix, mel_mix)
IPython.display.Audio(data=wav_mix, rate=SR)

In [None]:
print(mel_idx.shape, mel_idx2.shape)

In [None]:
# train noisyの例
idx = np.random.randint(0,len(df_noise))
path_idx = "../input/freesound-audio-tagging-2019/train_noisy/{}".format(df_noise['fname'][idx])
wav_idx = read_audio(path_idx)
class_en = ""
class_ja = ""
for i in range(80):
    if df_noise[labels[i]][idx]==1:
        class_en += "{} ".format(df_class['en'][i])
        class_ja += "{} ".format(df_class['ja'][i])
print(df_noise['labels'][idx])
print("file: {}, class: {}/{}".format(df_noise['fname'][idx], class_en, class_ja))
mel_idx = read_as_melspectrogram(path_idx)
show_wavmel(wav_idx, mel_idx)
IPython.display.Audio(data=wav_idx, rate=SR)