# Setup
This Jupyter Notebook is meant to be executed directly from Google Colab in a plug and play manner.
To enable this, the following code below can be run to set up the notebook 

In [None]:
import os
import sys

!git clone https://github.com/terryluan12/Genrify.git
!cd Genrify && make

sys.path.insert(0, "Genrify/src")

Run code below if using Google Drive for retrieving test data and CNN models for the Ensemble model

In [None]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

## Preprocessing Code
The following code can be run to download the datasets and preprocess the datasets

In [1]:
from datasources import download_datasets, convert_files_to_wav
from preprocessing import preprocess
from cnn.testhandler.TestHandler import TestHandler



In [None]:
# Make sure to change subset_num to the subset which you are using
subset_num = 0
#Possible method values:
#"spec", "mel", "chroma", "mfcc" to retrieve exclusive training datasets
#"create_testing_data" to create testing data for the first time (not needed anymore due to option below)
#"test" to unzip already preprocessed testing data from Google Drive
method = None

if method!="test":
    if method=="create_testing_data":
        !unzip /content/drive/MyDrive/APS360_Team_Project/Test_Data/test_data_mp3.zip
        data_dir = "/content/test_data_mp3"
        output_dir = "/content/Genrify/src/datasources/test_data_wav"
        convert_files_to_wav(data_dir, output_dir)
    elif not os.path.isdir("Genrify/src/datasources/processed_data"):
        download_datasets("Genrify/src")

    preprocess(subset_num, method, "Genrify/src")
    if method=="create_testing_data":
      !zip -r '/content/drive/MyDrive/APS360 Team Project/Test_Data/test_mel.zip' '/content/Genrify/src/datasources/mel'
      !zip -r '/content/drive/MyDrive/APS360 Team Project/Test_Data/test_spectrogram.zip' '/content/Genrify/src/datasources/spectrogram'
      !zip -r '/content/drive/MyDrive/APS360 Team Project/Test_Data/test_chroma.zip' '/content/Genrify/src/datasources/chroma'
      !zip -r '/content/drive/MyDrive/APS360 Team Project/Test_Data/test_mfcc.zip' '/content/Genrify/src/datasources/mfcc'
else:
    #Unzipping already preprocessed testing data
    !unzip -q '/content/drive/MyDrive/APS360 Team Project/Test_Data/test_mel.zip' -d '/'
    !unzip -q '/content/drive/MyDrive/APS360 Team Project/Test_Data/test_spectrogram.zip' -d '/'
    !unzip -q '/content/drive/MyDrive/APS360 Team Project/Test_Data/test_chroma.zip' -d '/'
    !unzip -q '/content/drive/MyDrive/APS360 Team Project/Test_Data/test_mfcc.zip' -d '/'
    test_handler = TestHandler(batch_size=1, num_workers=2)

In [2]:
from cnn import mfcc_model, training
from utils import plot
from cnn.datahandler.DataHandler import DataHandler

# Ensemble Model
This code block implements ensemle learning for all of our best models

Note that the batch size in used in TestHandler should be divide the total number of samples evenly. This is why batch_size=1 is used

In [None]:
from ensemble import ensemble

weak_learners=ensemble.get_weak_learners()
print("Test Accuracy:", ensemble.full_model([test_handler.spec_test_loader,test_handler.mfcc_test_loader,test_handler.chroma_test_loader,test_handler.mel_test_loader], weak_learners=[weak_learners[x] for x in [0,1,2,3]], cuda=True))

## Example to training an model

In [1]:
# # training example
# mfcc_data_dir = os.path.join("Genrify", "src", "datasources", "mfcc")
# mfcc_data_handler = DataHandler(mfcc_data_dir, batch_size=32, num_workers=4)
# lr = 1e-4
# num_epochs = 30
# mfcc_cnn = mfcc_model.MFCC_CNN()
# print(f"Learning rate: {1e-4}")
# training.train(mfcc_cnn, mfcc_data_handler.train_loader, mfcc_data_handler.val_loader, num_epochs, lr, 32)
# model_path = training.get_model_name(mfcc_cnn.name, 32, lr, num_epochs-1)

# # plot in the notebook
# %matplotlib inline
# plot.plot_training_curve(model_path)
# plot.plot_confusion_matrix(model_path, range(10))

## Training Chroma

In [None]:
# training example for chroma
#from cnn import chroma_model, training_chroma

#chroma_data_dir = '/content/Genrify/src/datasources/chroma'
#chroma_data_handler = DataHandler(chroma_data_dir, batch_size=32, num_workers=2)
#lr = 1e-4
#num_epochs = 60
#chroma_cnn = chroma_model.ChromaClassifier()
#print(f"Learning rate: {lr}")

#training_chroma.train(chroma_cnn, chroma_data_handler.train_loader, chroma_data_handler.val_loader, num_epochs, lr, 32, 10, 0.1)
#model_path = training.get_model_name(chroma_cnn.name, 32, lr, num_epochs-1)
# %matplotlib inline
# plot.plot_training_curve(model_path)
# plot.plot_confusion_matrix(model_path, range(10))