<a href="https://colab.research.google.com/github/s2t2/ml-music/blob/main/notebooks/youtube/YouTube_Audio_Test_for_GTZAN_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup

### Google Drive

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

# you might need to update the path below, or create a shortcut to the path below
DATASET_PATH = '/content/drive/MyDrive/Research/DS Research Shared 2023/data/ml_music_2023'

print(DATASET_PATH)
assert os.path.isdir(DATASET_PATH)

/content/drive/MyDrive/Research/DS Research Shared 2023/data/ml_music_2023


In [7]:
# you might need to update the path below, or create a shortcut to the path below
MODELS_DIRPATH = '/content/drive/MyDrive/Research/DS Research Shared 2023/users/mjr300/ML Music/models'

print(MODELS_DIRPATH)
assert os.path.isdir(MODELS_DIRPATH)

/content/drive/MyDrive/Research/DS Research Shared 2023/users/mjr300/ML Music/models


In [None]:
#CHANNELS_DIRPATH = os.path.join(DATASET_PATH, "youtube_channels")
#channel_names = os.listdir(CHANNELS_DIRPATH)
#print(channel_names)

## Load Pre-trained Model (SVC)

Load the pre-trained model from pickle file. Was trained on the GTZAN dataset, using just the provided MFCC features in that dataset.

In [None]:
import pickle 

def read_model(model_filepath):
    print("READING MODEL FROM LOCAL FILE...")
    print(model_filepath)
    with open(model_filepath, "rb") as f:
        return pickle.load(f)


In [10]:
model_filepath = os.path.join(MODELS_DIRPATH, "best_svc_mfcc_features.gpickle")
gs = read_model(model_filepath)
print(type(gs))

READING MODEL FROM LOCAL FILE...
/content/drive/MyDrive/Research/DS Research Shared 2023/users/mjr300/ML Music/models/best_svc_mfcc_features.gpickle
<class 'sklearn.model_selection._search.GridSearchCV'>


In [36]:
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

clf = gs.best_estimator_
clf
#[m for m in dir(clf) if m.endswith("_")]

In [38]:
genres = clf.classes_
print(genres)

['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop'
 'reggae' 'rock']


In [35]:
features = clf.feature_names_in_.tolist()
print(features)

['mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean', 'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var', 'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var']


In [None]:
# Parameter learned in Platt scaling when probability=True.
#print(clf.probA_.shape)
#print(clf.probB_.shape)

In [42]:
from pandas import DataFrame

# Support vectors are data points that are closer to the hyperplane and influence the position and orientation of the hyperplane. 

print(clf.n_support_) # Number of support vectors for each class.
#print(clf.support_.shape)
print(clf.support_vectors_.shape)

support_vectors = DataFrame(clf.support_vectors_, columns=features)
support_vectors.head()

[71 34 70 78 61 58 43 41 61 80]
(597,)
(597, 40)


Unnamed: 0,mfcc1_mean,mfcc1_var,mfcc2_mean,mfcc2_var,mfcc3_mean,mfcc3_var,mfcc4_mean,mfcc4_var,mfcc5_mean,mfcc5_var,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
0,0.837211,0.074721,0.621591,0.077492,0.37539,0.073283,0.565857,0.174444,0.327267,0.121078,...,0.551128,0.145206,0.284362,0.08842,0.363171,0.092895,0.394124,0.106806,0.565402,0.082737
1,0.786003,0.025381,0.463928,0.020045,0.501916,0.053661,0.784013,0.110283,0.041431,0.045811,...,0.748094,0.077955,0.142308,0.077587,0.581133,0.102502,0.328029,0.078676,0.670893,0.066104
2,0.449832,0.214716,0.566466,0.14842,0.675523,0.488847,0.703801,0.299859,0.810654,0.171346,...,0.65132,0.279802,0.442593,0.316748,0.455556,0.284822,0.73573,0.335508,0.655656,0.325476
3,0.630959,0.339159,0.707757,0.115272,0.429683,0.550379,0.554208,0.23648,0.286354,0.205989,...,0.452007,0.182134,0.418786,0.094423,0.312683,0.154391,0.459207,0.178088,0.488349,0.169776
4,0.546814,0.141598,0.715719,0.105347,0.514289,0.418682,0.827603,0.20408,0.632837,0.116883,...,0.54141,0.094054,0.496167,0.13529,0.386717,0.211711,0.784511,0.210459,0.478591,0.132617


## Load Youtube Audio Features

In [9]:
from pandas import read_csv

csv_filepath = os.path.join(DATASET_PATH, "30s_features_mfcc_20.csv")

mfcc_df = read_csv(csv_filepath)
mfcc_df.drop(columns="Unnamed: 0", inplace=True)
mfcc_df.head()

Unnamed: 0,channel_name,video_id,track_name,mfcc_1_mean,mfcc_1_var,mfcc_2_mean,mfcc_2_var,mfcc_3_mean,mfcc_3_var,mfcc_4_mean,...,mfcc_16_mean,mfcc_16_var,mfcc_17_mean,mfcc_17_var,mfcc_18_mean,mfcc_18_var,mfcc_19_mean,mfcc_19_var,mfcc_20_mean,mfcc_20_var
0,maggie_rogers,0dzZXpf7sSQ,track_1,-247.604264,126.564314,94.436743,44.407494,-17.821985,39.812022,5.147592,...,-9.767659,9.709967,-15.15989,10.092836,-5.886746,8.073567,-3.59984,7.643674,-0.015861,7.409028
1,maggie_rogers,0dzZXpf7sSQ,track_2,-283.419875,68.689398,108.620859,45.736677,22.183784,23.642963,36.270842,...,-20.55451,14.498205,-6.835958,12.063171,-8.068021,14.05134,-3.437384,10.086706,8.037604,14.541075
2,maggie_rogers,0dzZXpf7sSQ,track_3,-191.882983,87.372699,118.984447,32.211981,8.047093,18.469054,16.9371,...,-6.896051,12.169556,-4.585148,6.950206,-3.239831,7.794687,-4.770292,8.486704,-1.037769,8.149938
3,maggie_rogers,0dzZXpf7sSQ,track_4,-182.191475,71.259583,74.172125,38.583971,0.74717,18.817206,21.487847,...,-5.200492,13.869197,-5.36841,10.311301,-1.005449,8.997953,-2.508238,9.274194,5.581843,12.752089
4,maggie_rogers,0dzZXpf7sSQ,track_5,-204.492367,68.350796,101.394189,46.044563,21.449712,21.58782,38.283024,...,-7.574725,10.880968,-6.204093,9.224697,-2.311896,8.471938,-2.994891,7.776321,5.986408,10.474859


## Out of Sample Predictions

In [None]:
#mfcc_df["track_label"] = mfcc_df["channel_name"] + "_" + mfcc_df["video_id"] + "_" + mfcc_df["track_name"]
#mfcc_df["track_label"]

In [56]:
# making youtube column names match the gtzan convention:
mfcc_df.columns = [col.replace("mfcc_", "mfcc") for col in mfcc_df.columns]

x = mfcc_df[features]
#x.index = mfcc_df["track_label"]
x

Unnamed: 0,mfcc1_mean,mfcc1_var,mfcc2_mean,mfcc2_var,mfcc3_mean,mfcc3_var,mfcc4_mean,mfcc4_var,mfcc5_mean,mfcc5_var,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
0,-247.604264,126.564314,94.436743,44.407494,-17.821985,39.812022,5.147592,21.637402,-5.117160,19.775668,...,-9.767659,9.709967,-15.159890,10.092836,-5.886746,8.073567,-3.599840,7.643674,-0.015861,7.409028
1,-283.419875,68.689398,108.620859,45.736677,22.183784,23.642963,36.270842,18.142406,4.363370,13.825269,...,-20.554510,14.498205,-6.835958,12.063171,-8.068021,14.051340,-3.437384,10.086706,8.037604,14.541075
2,-191.882983,87.372699,118.984447,32.211981,8.047093,18.469054,16.937100,13.607642,-1.512961,9.572726,...,-6.896051,12.169556,-4.585148,6.950206,-3.239831,7.794687,-4.770292,8.486704,-1.037769,8.149938
3,-182.191475,71.259583,74.172125,38.583971,0.747170,18.817206,21.487847,14.833436,-4.348487,11.652041,...,-5.200492,13.869197,-5.368410,10.311301,-1.005449,8.997953,-2.508238,9.274194,5.581843,12.752089
4,-204.492367,68.350796,101.394189,46.044563,21.449712,21.587820,38.283024,15.049941,10.165238,14.806656,...,-7.574725,10.880968,-6.204093,9.224697,-2.311896,8.471938,-2.994891,7.776321,5.986408,10.474859
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,-126.295066,76.964783,105.274319,32.933817,-2.122840,20.203390,37.958635,15.411133,4.583544,14.222600,...,-2.008210,8.618912,-3.950875,9.735785,2.288542,9.793921,-6.366817,7.404871,-1.547669,10.210957
239,-122.054477,69.334728,111.313190,28.755073,-4.131346,20.107382,31.233111,14.249997,1.186262,18.077495,...,-3.013207,6.449025,-8.324395,7.403035,-2.395323,8.365698,-5.696421,8.885964,1.138039,9.225098
240,-125.434740,66.307994,109.874518,22.530859,-0.937652,16.624893,32.527456,9.195076,11.426024,9.682593,...,-0.950024,7.358911,-6.365503,8.418615,-0.499741,7.825861,-3.160970,8.408555,2.832871,8.286946
241,-102.869669,60.974435,105.135508,26.251879,-7.195252,20.858523,32.119343,16.401840,2.399281,15.151803,...,-1.816212,6.711948,-6.550254,8.668815,-0.327015,8.528303,-6.540576,8.001002,1.182942,8.671015


In [57]:
y_pred = clf.predict(x)
y_pred.shape

(243,)

In [58]:
mfcc_df["predicted_genre"] = y_pred
mfcc_df[["channel_name", "video_id", "track_name", "predicted_genre"]]

Unnamed: 0,channel_name,video_id,track_name,predicted_genre
0,maggie_rogers,0dzZXpf7sSQ,track_1,reggae
1,maggie_rogers,0dzZXpf7sSQ,track_2,reggae
2,maggie_rogers,0dzZXpf7sSQ,track_3,reggae
3,maggie_rogers,0dzZXpf7sSQ,track_4,reggae
4,maggie_rogers,0dzZXpf7sSQ,track_5,classical
...,...,...,...,...
238,chris_stapleton,l6_w3887Rwo,track_5,classical
239,chris_stapleton,l6_w3887Rwo,track_6,jazz
240,chris_stapleton,l6_w3887Rwo,track_7,classical
241,chris_stapleton,l6_w3887Rwo,track_8,jazz


In [66]:
mfcc_df[ mfcc_df["channel_name"] == "maggie_rogers"]["predicted_genre"].value_counts()

jazz         37
classical    18
reggae       13
Name: predicted_genre, dtype: int64

In [65]:
mfcc_df[ mfcc_df["channel_name"] == "chris_stapleton"]["predicted_genre"].value_counts()

classical    48
jazz         37
reggae        7
Name: predicted_genre, dtype: int64

In [67]:
mfcc_df[ mfcc_df["channel_name"] == "john_mayer"]["predicted_genre"].value_counts()

jazz         51
classical    31
reggae        1
Name: predicted_genre, dtype: int64

Hmm jazz classical and reggae. Not the most promising results.