In [None]:
import numpy as np 
import sys
import librosa , librosa.display
import matplotlib.pyplot as plt
import numpy as np
sys.path.append('../input/audio-data/wild.wmv')

# Waveform

In [None]:
file = '../input/audio-data/wild.wmv'
Signal , sr = librosa.load(file , sr = 22050) # n_samples = 2.6 * 60 * 22050
librosa.display.waveplot(Signal , sr = sr)
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

# Fast Fourier Transform

In [None]:
fft = np.fft.fft(Signal) 
fft # there are complex numbers in fft array so we should extract the magnitudes

In [None]:
magnitude = np.abs(fft)
magnitude # These magnitudes represent the contribution of each frequency within the sound

In [None]:
frequency = np.linspace(0 , sr , len(magnitude))
frequency

In [None]:
plt.plot(frequency,magnitude)
plt.xlabel('Frequency')
plt.ylabel('Magnitude')
plt.show()

In [None]:
left_frequency = frequency[:int(len(frequency)/2)]
left_magnitude = magnitude[:int(len(frequency)/2)]

In [None]:
plt.plot(left_frequency,left_magnitude)
plt.xlabel('Frequency')
plt.ylabel('Magnitude')
plt.show()

# STFT

In [None]:
n_fft = 2048 # the window
hop_length = 512 # the amount of shifting the window to the right
stft = librosa.core.stft(Signal , hop_length = hop_length , n_fft = n_fft)

In [None]:
spectogram = np.abs(stft)
log_spectogram = librosa.amplitude_to_db(spectogram)
librosa.display.specshow(log_spectogram , sr = sr , hop_length = hop_length)
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.colorbar()
plt.show()

# MFCCs

In [None]:
MFCCs = librosa.feature.mfcc(Signal , n_fft = n_fft , hop_length = hop_length , n_mfcc = 13)
librosa.display.specshow(MFCCs , sr = sr , hop_length = hop_length)
plt.xlabel('Time')
plt.ylabel('MFCC')
plt.colorbar()
plt.show()

# Music Genre Classification


In [None]:
import json
import os
import math
import librosa

DATASET_PATH = "../input/gtzan-dataset-music-genre-classification/Data/genres_original"
JSON_PATH = "./data_10.json"
SAMPLE_RATE = 22050
TRACK_DURATION = 30 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
not_allowed = "../input/gtzan-dataset-music-genre-classification/Data/genres_original/jazz/jazz.00054.wav"

def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
    """Extracts MFCCs from music dataset and saves them into a json file along witgh genre labels.

        :param dataset_path (str): Path to dataset
        :param json_path (str): Path to json file used to save MFCCs
        :param num_mfcc (int): Number of coefficients to extract
        :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
        :param hop_length (int): Sliding window for FFT. Measured in # of samples
        :param: num_segments (int): Number of segments we want to divide sample tracks into
        :return:
        """

    # dictionary to store mapping, labels, and MFCCs
    data = {
        "mapping": [],
        "labels": [],
        "mfcc": []
    }
    
    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    # loop through all genre sub-folder
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # ensure we're processing a genre sub-folder level
        if dirpath is not dataset_path:

            # save genre label (i.e., sub-folder name) in the mapping
            semantic_label = dirpath.split("/")[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing: {}".format(semantic_label))

            # process all audio files in genre sub-dir
            for f in filenames:
               
		# load audio file
                file_path = os.path.join(dirpath, f)
                if file_path != not_allowed :
                    signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)

                # process all segments of audio file
                    for d in range(num_segments):

                    # calculate start and finish sample for current segment
                        start = samples_per_segment * d
                        finish = start + samples_per_segment

                    # extract mfcc
                        mfcc = librosa.feature.mfcc(signal[start:finish], sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                        mfcc = mfcc.T

                    # store only mfcc feature with expected number of vectors
                        if len(mfcc) == num_mfcc_vectors_per_segment:
                            data["mfcc"].append(mfcc.tolist())
                            data["labels"].append(i-1)
                            print("{}, segment:{}".format(file_path, d+1))

    # save MFCCs to json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
        
        
        
if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)

In [None]:
from tensorflow.keras import losses

In [None]:
import json
import numpy as np
DATASET_PATH = "./data_10.json"
def load_data(dataset_path):
    with open(dataset_path,"r") as fp:
        data = json.load(fp)
    inputs = np.array(data["mfcc"])  
    targets = np.array(data["labels"])   
    
    return inputs , targets

inputs,targets = load_data(DATASET_PATH)    

In [None]:
from sklearn.model_selection import train_test_split
import tensorflow.keras  as keras

In [None]:
inputs_train,inputs_test,targets_train,targets_test = train_test_split(inputs,targets,test_size=0.3)

In [None]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(inputs.shape[1],inputs.shape[2])),
    
    keras.layers.Dense(512,activation="relu"),
    keras.layers.Dense(256,activation="relu"),
    keras.layers.Dense(64,activation="relu"),
    
    keras.layers.Dense(10,activation="softmax"),

    
])

optimizer = keras.optimizers.Adam(learning_rate = 0.0001)
model.compile(optimizer=optimizer,loss="sparse_categorical_crossentropy",metrics=["accuracy"])
model.summary()

In [None]:
history = model.fit(inputs_train,targets_train,validation_data=(inputs_test,targets_test),epochs=50,batch_size=32)

In [None]:
import matplotlib.pyplot as plt
def plot_history(history):
    
    fig,axs = plt.subplots(2)
    axs[0].plot(history.history["accuracy"],label="train accuracy")
    axs[0].plot(history.history["val_accuracy"],label="test accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc='lower right')
    axs[0].set_title("Accuracy eval")
    
    axs[1].plot(history.history["loss"],label="train error")
    axs[1].plot(history.history["val_loss"],label="test error")
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc='upper right')
    plt.show()
    
    

In [None]:
plot_history(history)

In [None]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(inputs.shape[1],inputs.shape[2])),
    
    keras.layers.Dense(512,activation="relu",kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(256,activation="relu",kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(64,activation="relu",kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(10,activation="softmax"),

    
])

optimizer = keras.optimizers.Adam(learning_rate = 0.0001)
model.compile(optimizer=optimizer,loss="sparse_categorical_crossentropy",metrics=["accuracy"])
model.summary()

In [None]:
history = model.fit(inputs_train,targets_train,validation_data=(inputs_test,targets_test),epochs=50,batch_size=32)

In [None]:
plot_history(history)

# CNN 

In [None]:
import json
import numpy as np

In [None]:
DATA_PATH = "./data_10.json"

def load_dataset(data_path):
    
    with open(data_path,"r") as fp:
        data = json.load(fp)
        
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
    return X , y    
    


In [None]:
from sklearn.model_selection import train_test_split

def prepare_datasets(test_size,val_size):
    
    X , y = load_dataset(DATA_PATH)
    X_train,X_test,y_train,y_test = train_test_split(X , y , test_size = test_size)
    X_train,X_validation,y_train,y_validation = train_test_split(X_train , y_train , test_size = val_size)
    #from 2d-(130,13) to 3d-(130,13,1) 
    X_train = X_train[...,np.newaxis] # (num_samples,130,13,1)
    X_validation = X_validation[...,np.newaxis] 
    X_test = X_test[...,np.newaxis]
    
    return X_train,X_validation,X_test,y_train,y_validation,y_test 

    

In [None]:
X_train,X_validation,X_test,y_train,y_validation,y_test = prepare_datasets(0.25,0.2)

In [None]:
import tensorflow.keras as keras


def build_model(input_shape):
    model = keras.Sequential()
    model.add(keras.layers.Conv2D(32 , (3,3) ,activation = 'relu', input_shape=input_shape))
    model.add(keras.layers.MaxPooling2D((3,3),strides=(2,2),padding='same'))
    model.add(keras.layers.BatchNormalization())

    model.add(keras.layers.Conv2D(64 , (3,3) ,activation = 'relu'))
    model.add(keras.layers.MaxPooling2D((3,3),strides=(2,2),padding='same'))
    model.add(keras.layers.BatchNormalization())
    
    model.add(keras.layers.Conv2D(120 , (2,2) ,activation = 'relu'))
    model.add(keras.layers.MaxPooling2D((2,2),strides=(2,2),padding='same'))
    model.add(keras.layers.BatchNormalization())
    
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64,activation = 'relu'))
    model.add(keras.layers.Dropout(0.3))
    model.add(keras.layers.Dense(10,activation='softmax'))
    
    return model

In [None]:
input_shape = (X_train.shape[1],X_train.shape[2],X_train.shape[3])
model = build_model(input_shape)
optimizer =  keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer = optimizer ,loss = 'sparse_categorical_crossentropy',metrics=['accuracy'])

In [None]:
model.fit(X_train,y_train,validation_data=(X_validation,y_validation),batch_size=32,epochs=30)

In [None]:
test_error , test_accuracy = model.evaluate(X_test,y_test,verbose=1)
print("Accuracy on test is {}".format(test_accuracy))
print("Test error is {}".format(test_error))

In [None]:
def predict(model , X , y):
    X = X[np.newaxis,...]
    prediction = model.predict(X)
    predicted_index = np.argmax(prediction , axis = 1)
    print("Expected index : {} predicted index : {}".format(y,predicted_index[0]))
    

In [None]:
X = X_test[100]
y = y_test[100]
predict(model , X , y)

# RNN

In [None]:
from sklearn.model_selection import train_test_split

def prepare_datasets(test_size,val_size):
    
    X , y = load_dataset(DATA_PATH)
    X_train,X_test,y_train,y_test = train_test_split(X , y , test_size = test_size)
    X_train,X_validation,y_train,y_validation = train_test_split(X_train , y_train , test_size = val_size)

    return X_train,X_validation,X_test,y_train,y_validation,y_test 

    

In [None]:
X_train,X_validation,X_test,y_train,y_validation,y_test = prepare_datasets(0.25,0.2)

In [None]:
X_train.shape

In [None]:
def build_model(input_shape):
    model = keras.Sequential()
    # 64 represnets the number of units NOT cells
    # the number of cells equal to the numper of steps which is 130 here
    model.add(keras.layers.LSTM(64,input_shape=input_shape,return_sequences=True))
    model.add(keras.layers.LSTM(64))
    model.add(keras.layers.Dense(64,activation='relu'))
    model.add(keras.layers.Dropout(0.3))
    model.add(keras.layers.Dense(10,activation='softmax'))
    
    return model

In [None]:
input_shape = (X_train.shape[1],X_train.shape[2])
model = build_model(input_shape)
optimizer =  keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer = optimizer ,loss = 'sparse_categorical_crossentropy',metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(X_train,y_train,validation_data=(X_validation,y_validation),batch_size=32,epochs=30)

In [None]:
test_error , test_accuracy = model.evaluate(X_test,y_test,verbose=1)
print("Accuracy on test is {}".format(test_accuracy))
print("Test error is {}".format(test_error))