# Setup
This Jupyter Notebook is meant to be executed directly from Google Colab in a plug and play manner.
To enable this, the following code below can be run to set up the notebook 

In [None]:
import os
import sys

!git clone https://github.com/terryluan12/Genrify.git
!cd Genrify && make

sys.path.insert(0, "Genrify/src")

Run code below if using Google Drive for retrieving test data and CNN models for the Ensemble model

In [None]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

## Preprocessing Code
The following code can be run to download the datasets and preprocess the datasets

In [1]:
from datasources import download_datasets, convert_files_to_wav
from preprocessing import preprocess



In [None]:
# Make sure to change subset_num to the subset which you are using
subset_num = None
#Possible method values:
#"spec", "mel", "chroma", "mfcc" to retrieve exclusive training datasets
#"create_testing_data" to create testing data for the first time (not needed anymore due to option below)
#"test" to unzip already created testing data from Google Drive
method = None

if method=="create_testing_data":
    from google.colab import drive
    # Mount Google Drive
    drive.mount('/content/drive', force_remount=True)
    !unzip /content/drive/MyDrive/APS360_Team_Project/test_data_mp3.zip
    data_dir = "/content/test_data_mp3"
    output_dir = "/content/Genrify/src/datasources/test_data_wav"
    convert_files_to_wav(data_dir, output_dir)
elif not os.path.isdir("Genrify/src/datasources/processed_data"):
    download_datasets("Genrify/src")

if method!="test":
    preprocess(subset_num, method, "Genrify/src")
else:
    !unzip -q '/content/drive/MyDrive/APS360_Team_Project/test_mel.zip' -d '/'
    !unzip -q '/content/drive/MyDrive/APS360_Team_Project/test_spectrogram.zip' -d '/'
    !unzip -q '/content/drive/MyDrive/APS360_Team_Project/test_chroma.zip' -d '/'
    !unzip -q '/content/drive/MyDrive/APS360_Team_Project/test_mfcc.zip' -d '/'

In [2]:
from cnn import mfcc_model, training
from utils import plot
from cnn.datahandler.DataHandler import DataHandler

# Ensemble Model
This code block implements ensemle learning for all of our best models

In [None]:
from ensemble import ensemble

# Ideally, we should get the dataloader for the full dataset
spec_data_dir = '/content/Genrify/src/datasources/spectrogram'
spec_data_handler = DataHandler(spec_data_dir, batch_size=32, num_workers=4)

print("Test Accuracy:", ensemble.full_model(spec_data_handler.test_loader, cuda=True))

## Example to training an model

In [1]:
# # training example
# mfcc_data_dir = os.path.join("Genrify", "src", "datasources", "mfcc")
# mfcc_data_handler = DataHandler(mfcc_data_dir, batch_size=32, num_workers=4)
# lr = 1e-4
# num_epochs = 30
# mfcc_cnn = mfcc_model.MFCC_CNN()
# print(f"Learning rate: {1e-4}")
# training.train(mfcc_cnn, mfcc_data_handler.train_loader, mfcc_data_handler.val_loader, num_epochs, lr, 32)
# model_path = training.get_model_name(mfcc_cnn.name, 32, lr, num_epochs-1)

# # plot in the notebook
# %matplotlib inline
# plot.plot_training_curve(model_path)
# plot.plot_confusion_matrix(model_path, range(10))