In [2]:
import librosa
import numpy as np
import os
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.utils import shuffle

In [6]:
def featurise_data(folder):
    res = []
    time_slice = 30
    for genre in os.listdir(folder):
        dir_path = os.path.join(folder, genre)
        if os.path.isdir(dir_path):
            for file in os.listdir(dir_path):
                if '.mp3' in file:
                    songname = os.path.join(dir_path, file)
                    print(f'Featurising song: {songname}')
                    duration = librosa.get_duration(filename=songname)
                    samples = int(duration // time_slice)
                    for s in range(2, samples):
                        y, sr = librosa.load(songname, mono=True, offset=s*time_slice, duration=time_slice)
                        mfcc = librosa.feature.mfcc(y=y, sr=sr)
                        res.append(np.hstack([np.array([genre]), np.median(mfcc, axis=1)]))
    return pd.DataFrame(np.array(res))

In [7]:
train = featurise_data('../songs/train/')
test = featurise_data('../songs/test/')

Featurising song: ../songs/train/mohanam/02-enduku_bAga_teliyadu-mOhanam.mp3
Featurising song: ../songs/train/mohanam/01-ninnu_kOriyunnAnurA-VARNAM-mOhanam.mp3
Featurising song: ../songs/train/mohanam/04-rArA_rAjIva_lOcana_rAma-mOhanam.mp3
Featurising song: ../songs/train/mohanam/09-swagatham_krishna-mohanam.mp3
Featurising song: ../songs/train/mohanam/02-En_paLLI_koNDIrayya-mOhanam-aruNAcala_kavi.mp3
Featurising song: ../songs/train/mohanam/04-pAhi_mAm_pArvati_paramEshvari-mOhanam.mp3
Featurising song: ../songs/train/mohanam/01-Maruvaka_daya_mOhananga_nApai-mOhanam.mp3
Featurising song: ../songs/train/mohanam/06-dhim_dhim_kitataka_dhimta-mOhanam.mp3
Featurising song: ../songs/train/mohanam/06-sadA_pAlaya_sArasAkSi-mOhanam.mp3
Featurising song: ../songs/train/mohanam/04-mayil_vAhanA_vaLLi-mOhanam.mp3
Featurising song: ../songs/train/mohanam/07_giridhara_gOpAla_mOhana_pApanAsam_shivan.mp3
Featurising song: ../songs/train/mohanam/13-jaya_mangaLam_nitya-mOhanam.mp3
Featurising song: ../so

In [8]:
train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,mohanam,-176.14355,127.84101,-61.583946,13.907997,-12.348064,-22.32513,-10.976179,-27.523857,-13.104563,...,-19.61353,3.3898528,-9.396015,5.5523033,1.8146789,-7.2779694,-2.4999118,6.3418217,-11.846633,-1.9977283
1,mohanam,-151.59512,114.136215,-72.81222,8.535552,-19.30595,-21.013327,-9.82255,-26.85816,-14.194257,...,-22.188576,3.0120127,-11.667684,5.793357,-3.9413216,-6.5100236,-2.1149497,2.050315,-8.128593,6.8564577
2,mohanam,-151.77126,104.05441,-73.2093,5.809149,-21.483883,-20.574593,-7.6724606,-25.219181,-12.896346,...,-21.560486,4.0897655,-11.80772,5.332128,-5.1456957,-5.8093204,1.392189,4.375893,-3.238349,10.547811
3,mohanam,-159.75441,116.82361,-72.42599,6.436346,-15.484459,-20.90335,-9.409181,-22.470825,-14.668507,...,-23.819666,2.983787,-11.152506,2.5721855,-4.819583,-1.1119543,6.5185823,14.185802,6.100915,7.5950446
4,mohanam,-163.90497,113.75903,-72.449005,10.20752,-14.549137,-18.184446,-9.009656,-23.108187,-15.285181,...,-25.078304,-1.3208702,-9.604034,4.201627,-3.7372203,3.9685543,8.37973,8.353036,-0.33120215,4.3409824


In [9]:
X_original, lab_train = train.iloc[:, 1:], train[0]
X_test, lab_test = test.iloc[:, 1:], test[0]

model_y = LabelEncoder().fit(lab_train)
y_original = model_y.transform(lab_train)
y_test = model_y.transform(lab_test)
X_train, y_train = shuffle(X_original, y_original, random_state=42)

In [13]:
model = Pipeline([('scaler', StandardScaler()),
                  ('gb', GradientBoostingClassifier(learning_rate=0.05, n_estimators=200, random_state=42))])
model.fit(X_train, y_train)
np.mean(model.predict(X_test) == y_test)

0.6707317073170732

In [18]:
def predict_raaga(model, songname):
    time_slice = 30           
    duration = librosa.get_duration(filename=songname)
    samples = int(duration // time_slice)
    preds = []
    for s in range(2, samples):
        y, sr = librosa.load(songname, mono=True, offset=s*time_slice, duration=time_slice)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        X_features = np.median(mfcc, axis=1)
        preds.append(model.predict([X_features])[0])
    return np.argmax(np.bincount(preds))

In [19]:
def run_predictions(model, folder):
    for genre in os.listdir(folder):
        dir_path = os.path.join(folder, genre)
        if os.path.isdir(dir_path):
            for file in os.listdir(dir_path):
                if '.mp3' in file:
                    pred = predict_raaga(model, os.path.join(dir_path, file))
                    print(f'Actual = {genre}, Prediction = {pred}')

In [20]:
run_predictions(model, '../songs/test/')

Actual = mohanam, Prediction = 1
Actual = mohanam, Prediction = 0
Actual = mohanam, Prediction = 1
Actual = mohanam, Prediction = 1
Actual = mohanam, Prediction = 1
Actual = mohanam, Prediction = 1
Actual = mohanam, Prediction = 1
Actual = hindolam, Prediction = 1
Actual = hindolam, Prediction = 0
Actual = hindolam, Prediction = 1
Actual = hindolam, Prediction = 0
Actual = hindolam, Prediction = 1
Actual = hindolam, Prediction = 1


In [24]:
import pickle

with open('raaga_model_1.pkl', 'wb') as f:
    pickle.dump(model, f)

In [27]:
with open('raaga_model_1.pkl', 'rb') as f:
    p_mod = pickle.load(f)