In [1]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [2]:
!pip install essentia

Collecting essentia
[?25l  Downloading https://files.pythonhosted.org/packages/71/fd/cbb601736ebdf5bfdaf8d215e7741b76b6519371ddf1f64427cf275af05d/essentia-2.1b6.dev374-cp37-cp37m-manylinux1_x86_64.whl (12.0MB)
[K     |████████████████████████████████| 12.0MB 309kB/s 
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev374


In [3]:
import numpy as np
import pandas as pd
import librosa
import os
from glob import glob
from scipy.stats import norm, kurtosis, skew, entropy
from tqdm import tqdm
from sklearn.model_selection import train_test_split, cross_val_score
import essentia
import essentia.standard as es

In [4]:
class FeatureExtracter():
    df = pd.DataFrame()
    def __init__(self, path, mfcc=True, entropy=False):
        self.path = path
        self.data_files = glob(path + "/*/*.wav")
        self.spectral_features = ['spectral_centroid', 'spectral_bandwidth','spectral_contrast', 'spectral_flatness',
                    'spectral_rolloff', 'zero_crossing_rate','poly_features', 'rms']
        self.mfcc = mfcc
        self.entropy = entropy
        self.spectral_columns = []
        self.lpc_columns = []
        if mfcc:
            self.spectral_features.append('mfcc')
        
        self.genres = {v:k for k,v in enumerate([i[54:] for i in glob(path + "/*")])}
        self.fields = ['mean', 'std', 'kurtosis', 'skewness', 'median', 'min', 'max']
        if entropy:
            self.fields.append('entropy')
        
        classes = list(self.genres)

    
    def getColumnNames(self):
        features = []
        for i in self.spectral_features:
            features.append(i)
            features.append(i + "_delta1")
            features.append(i + "_delta2")

        spectral_columns = [i+"_"+j for i in features for j in self.fields]
        
        lpc_columns = ["LPC_"+i for i in self.fields]

        return spectral_columns, lpc_columns
    
    def calculate_values(self, arr):
        mean = arr.mean()
        std = arr.std()
        kurt = kurtosis(arr.T)[0]
        skewness = skew(arr.T)[0]
        median = np.median(arr)
        min_val = arr.min()
        max_val = arr.max()
        values = list((mean, std, kurt, skewness, median, min_val, max_val))
        if self.entropy:
            ent = entropy(arr)
            values.append(ent)
        return values

    def extract(self):
        spectral_df = []
        for i in tqdm(self.data_files):
            x, sr = librosa.load(i)
            line = []
            for k in self.spectral_features:
                if k == 'spectral_flatness':
                    arr = eval("librosa.feature.{}(x)".format(k))
                else:
                    arr = eval("librosa.feature.{}(x,sr)".format(k))
                arr_delta1 = librosa.feature.delta(arr, order=1)
                arr_delta2 = librosa.feature.delta(arr, order=2)
                line += self.calculate_values(arr)
                line += self.calculate_values(arr_delta1)
                line += self.calculate_values(arr_delta2)
            spectral_df.append(line.copy())

        lpc_df = []
        for i in tqdm(self.data_files):
            line = []
            audio = es.MonoLoader(filename=i)()
            lpc = es.LPC()(audio)[0]
            mean = lpc.mean()
            std = lpc.std()
            kurt = kurtosis(lpc)
            skewness = skew(lpc)
            median = np.median(lpc)
            min_val = lpc.min()
            max_val = lpc.max()
            line += [mean, std, kurt, skewness, median, min_val, max_val]
            lpc_df.append(line)
        
        spectral_columns, lpc_columns = self.getColumnNames()

        spectral_df = pd.DataFrame(data=spectral_df, columns=spectral_columns)
        lpc_df = pd.DataFrame(data=lpc_df, columns=lpc_columns)
        df = pd.concat([spectral_df, lpc_df], axis=1)
        labels = [self.genres[k] for k in [i[54:54+(i[54:].find("/"))] for i in self.data_files]]
        df['labels'] = labels.copy()
        self.df = df
        return self.df

    def save(self, path):
        self.df.to_pickle(path, protocol=4)

In [None]:
extracter = FeatureExtracter("/content/drive/MyDrive/For_Colab/Data/genres_original",
                             mfcc=True, entropy=False)

In [None]:
df = extracter.extract()

100%|██████████| 1000/1000 [16:49<00:00,  1.01s/it]
100%|██████████| 1000/1000 [11:30<00:00,  1.45it/s]


In [None]:
extracter.save("/content/drive/MyDrive/For_Colab/Data/extracter.pkl")

In [5]:
df = pd.read_pickle("/content/drive/MyDrive/For_Colab/Data/extracter.pkl")
df.head()

Unnamed: 0,spectral_centroid_mean,spectral_centroid_std,spectral_centroid_kurtosis,spectral_centroid_skewness,spectral_centroid_median,spectral_centroid_min,spectral_centroid_max,spectral_centroid_delta1_mean,spectral_centroid_delta1_std,spectral_centroid_delta1_kurtosis,spectral_centroid_delta1_skewness,spectral_centroid_delta1_median,spectral_centroid_delta1_min,spectral_centroid_delta1_max,spectral_centroid_delta2_mean,spectral_centroid_delta2_std,spectral_centroid_delta2_kurtosis,spectral_centroid_delta2_skewness,spectral_centroid_delta2_median,spectral_centroid_delta2_min,spectral_centroid_delta2_max,spectral_bandwidth_mean,spectral_bandwidth_std,spectral_bandwidth_kurtosis,spectral_bandwidth_skewness,spectral_bandwidth_median,spectral_bandwidth_min,spectral_bandwidth_max,spectral_bandwidth_delta1_mean,spectral_bandwidth_delta1_std,spectral_bandwidth_delta1_kurtosis,spectral_bandwidth_delta1_skewness,spectral_bandwidth_delta1_median,spectral_bandwidth_delta1_min,spectral_bandwidth_delta1_max,spectral_bandwidth_delta2_mean,spectral_bandwidth_delta2_std,spectral_bandwidth_delta2_kurtosis,spectral_bandwidth_delta2_skewness,spectral_bandwidth_delta2_median,...,rms_delta1_kurtosis,rms_delta1_skewness,rms_delta1_median,rms_delta1_min,rms_delta1_max,rms_delta2_mean,rms_delta2_std,rms_delta2_kurtosis,rms_delta2_skewness,rms_delta2_median,rms_delta2_min,rms_delta2_max,mfcc_mean,mfcc_std,mfcc_kurtosis,mfcc_skewness,mfcc_median,mfcc_min,mfcc_max,mfcc_delta1_mean,mfcc_delta1_std,mfcc_delta1_kurtosis,mfcc_delta1_skewness,mfcc_delta1_median,mfcc_delta1_min,mfcc_delta1_max,mfcc_delta2_mean,mfcc_delta2_std,mfcc_delta2_kurtosis,mfcc_delta2_skewness,mfcc_delta2_median,mfcc_delta2_min,mfcc_delta2_max,LPC_mean,LPC_std,LPC_kurtosis,LPC_skewness,LPC_median,LPC_min,LPC_max
0,1459.366472,661.709451,0.841681,0.966495,1312.423565,415.63357,4499.417241,0.637114,81.736023,4.675139,0.938981,-6.190309,-259.683492,557.643613,0.02433,46.319293,4.39699,-0.544101,4.797188,-321.764147,221.536483,1389.009131,430.143278,-0.790174,0.647521,1245.152886,763.89669,2682.510435,0.209451,47.979855,1.043309,-0.135983,-0.33007,-183.963257,172.859493,0.158786,27.622576,1.208689,-0.422796,2.859325,...,2.665213,0.53622,-0.001357,-0.069095,0.070742,2.4e-05,0.007349,5.086851,-0.517029,0.000367,-0.051982,0.03329,-7.856247,59.767918,-0.573822,-0.19289,-6.295442,-443.449982,201.39595,0.003157,4.244223,0.753149,0.755701,-0.0148,-35.611916,61.940201,0.002818,2.34022,0.416216,-0.177388,0.008556,-29.587139,27.44458,0.000257,3.167496,0.477754,0.252205,0.119992,-5.814752,7.004394
1,1552.811865,395.559911,11.603143,1.927469,1510.972589,417.254802,4669.770713,0.341307,68.457817,8.657651,-0.177911,-2.044817,-482.51584,413.034054,0.018451,39.079206,10.091214,-0.989551,0.481424,-359.25083,182.149399,1747.702312,276.141616,0.071457,0.017533,1741.173116,960.211956,2662.739385,0.431479,49.357886,-0.236268,-0.029415,-0.261212,-151.377924,149.766938,0.092237,30.548318,-0.35642,-0.060871,0.956199,...,-0.447615,-0.071674,0.00055,-0.03272,0.031167,2.8e-05,0.006142,-0.013974,-0.16667,0.000172,-0.024213,0.019053,1.034164,43.077038,0.500346,-0.577687,-3.161015,-343.934814,198.838501,7e-05,3.274759,-0.185535,0.05324,0.011495,-34.185745,39.863064,-0.000213,2.004048,0.270294,0.053689,0.001813,-24.621988,24.823193,0.000194,4.738595,-0.091538,-0.206327,0.134157,-9.6085,8.883484
2,1835.004266,586.003361,1.535905,1.05019,1732.517594,849.658285,4464.156642,-0.214389,68.072488,0.3299,0.430382,-3.85722,-197.758805,235.302721,-0.28249,44.159677,0.291734,-0.134831,1.29248,-151.569984,130.421292,1748.172116,297.397392,-0.284144,0.340901,1737.639906,1081.656537,2770.695509,0.091363,34.881861,0.038399,-0.017917,-0.559164,-128.846344,97.525389,-0.072185,23.628324,0.168883,-0.12874,0.675958,...,1.094863,0.393583,-0.000821,-0.02752,0.036711,-2.7e-05,0.003921,1.608814,-0.446032,0.00019,-0.017889,0.012219,-8.45884,51.298744,-0.526156,-0.311698,-6.86877,-356.302917,188.284668,0.006732,3.458114,-0.469113,0.117897,0.004508,-33.172958,39.574139,-0.002041,1.932273,0.152121,-0.26632,0.00138,-22.671379,21.499109,0.000404,8.84921,-0.549701,-0.153309,-0.137374,-17.529226,13.330379
3,1831.99394,1015.126775,0.890008,1.272868,1471.018339,645.881662,5399.377714,0.426899,107.962351,4.090815,0.105436,-5.166355,-572.08652,570.335862,-0.056108,58.068683,2.139839,-0.543998,4.282486,-290.870516,229.813951,1729.653287,449.344532,-1.072683,0.17882,1695.965514,940.593017,2827.733353,-0.045605,56.352033,0.108411,0.051005,-2.315556,-197.565059,160.34357,-0.020121,32.187028,0.428083,-0.362141,2.632657,...,7.424725,0.784603,-0.000644,-0.055123,0.063957,2e-05,0.004886,6.603911,-0.170253,0.000115,-0.024597,0.026737,-7.099053,55.202843,-0.156458,-0.418035,-5.271656,-451.742767,194.460266,0.005214,3.875076,1.175011,0.867217,-0.023272,-33.51643,66.347992,0.003392,2.127665,0.804901,-0.19576,0.011245,-26.828115,27.473246,-0.000663,29.449717,-1.052057,0.18738,-2.980688,-44.937874,47.398655
4,1451.667066,670.498443,1.379991,1.19725,1295.280201,540.133477,4625.431451,-0.617969,68.123245,2.887162,0.072218,-3.556383,-321.068353,286.932199,-0.090716,41.904498,5.628362,-0.344055,1.562003,-279.895912,217.037894,1577.270941,410.136488,-0.938447,0.076742,1608.505546,794.07319,2744.393863,-0.731761,47.647918,0.466731,0.338505,-3.603228,-128.615904,170.031321,-0.004892,30.344732,0.794148,-0.313922,1.001719,...,2.565038,0.837543,-0.000721,-0.045054,0.055055,-4.8e-05,0.00618,2.014225,-0.171516,0.000237,-0.025862,0.024715,-5.961263,57.036411,-0.224637,-0.023226,-6.441982,-413.145874,211.605225,0.003429,3.53956,0.867173,0.630095,0.00819,-30.188179,53.281082,0.00033,2.034518,0.807855,-0.019555,0.004078,-25.910381,27.182009,0.000201,5.135298,-0.030357,-0.124329,0.416601,-10.593861,9.370218


In [None]:
full_df = df.copy()

In [200]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [227]:
class ModelSelection():
    all_models = []
    def __init__(self, df, k, random_forest=True, logistic_reg=True, adaboost=True, decision_tree=True,
                 knn=True, svm=True, naive_bayes=True, mlp=True):

        self.df = df
        self.k = k
        self.random_forest = random_forest
        self.logistic_reg = logistic_reg
        self.adaboost = adaboost
        self.decision_tree = decision_tree
        self.knn = knn
        self.svm = svm
        self.naive_bayes = naive_bayes
        self.mlp = mlp

    def create_models(self):
        if self.random_forest:
            self.all_models.append(RandomForestClassifier(n_estimators=100, max_depth=10, max_features=0.6, min_samples_split=2))
            self.all_models.append(RandomForestClassifier(n_estimators=100, max_depth=20, max_features=0.6, min_samples_split=2))
            self.all_models.append(RandomForestClassifier(n_estimators=100, max_depth=10, max_features='sqrt', min_samples_split=2))
            self.all_models.append(RandomForestClassifier(n_estimators=100, max_depth=20, max_features='sqrt', min_samples_split=2))
            self.all_models.append(RandomForestClassifier(n_estimators=100, max_depth=50, max_features='auto', min_samples_split=2))
        if self.logistic_reg:
            self.all_models.append(LogisticRegression(C=10, max_iter=500, n_jobs=-1))
            self.all_models.append(LogisticRegression(C=1, max_iter=500, n_jobs=-1))
            self.all_models.append(LogisticRegression(C=0.1, max_iter=500, n_jobs=-1))
            self.all_models.append(LogisticRegression(C=0.01, max_iter=500, n_jobs=-1))
            self.all_models.append(LogisticRegression(C=0.001, max_iter=500, n_jobs=-1))
        if self.adaboost:
            self.all_models.append(AdaBoostClassifier(n_estimators=100, learning_rate=1, algorithm='SAMME.R'))
            self.all_models.append(AdaBoostClassifier(n_estimators=100, learning_rate=0.1, algorithm='SAMME.R'))
            self.all_models.append(AdaBoostClassifier(n_estimators=100, learning_rate=0.01, algorithm='SAMME.R'))
            self.all_models.append(AdaBoostClassifier(n_estimators=100, learning_rate=0.1, algorithm='SAMME'))
            self.all_models.append(AdaBoostClassifier(n_estimators=100, learning_rate=0.01, algorithm='SAMME'))
        if self.decision_tree:
            self.all_models.append(DecisionTreeClassifier(criterion='gini', max_depth=10))
            self.all_models.append(DecisionTreeClassifier(criterion='gini', max_depth=30))
            self.all_models.append(DecisionTreeClassifier(criterion='entropy', max_depth=10))
            self.all_models.append(DecisionTreeClassifier(criterion='entropy', max_depth=30))
            self.all_models.append(DecisionTreeClassifier(criterion='gini', max_depth=50))
        if self.knn:
            self.all_models.append(KNeighborsClassifier(n_neighbors=1, weights='uniform'))
            self.all_models.append(KNeighborsClassifier(n_neighbors=1, weights='distance'))
            self.all_models.append(KNeighborsClassifier(n_neighbors=2, weights='distance'))
            self.all_models.append(KNeighborsClassifier(n_neighbors=3, weights='uniform'))
            self.all_models.append(KNeighborsClassifier(n_neighbors=3, weights='distance'))
        if self.svm:
            self.all_models.append(SVC(kernel='linear'))
            self.all_models.append(SVC(kernel='poly', degree=3, C=1))
            self.all_models.append(SVC(kernel='poly', degree=4, C=1))
            self.all_models.append(SVC(kernel='poly', degree=5, C=1))
            self.all_models.append(SVC(kernel='poly', degree=5, C=0.1))
        if self.mlp:
            self.all_models.append(MLPClassifier(hidden_layer_sizes=(100,100), solver='adam', learning_rate_init=0.001, max_iter=500, early_stopping=True))
            self.all_models.append(MLPClassifier(hidden_layer_sizes=(256,256), solver='adam', learning_rate_init=0.001, max_iter=500, early_stopping=True))
            self.all_models.append(MLPClassifier(hidden_layer_sizes=(100,100), solver='lbfgs', learning_rate_init=0.001, max_iter=500, early_stopping=True))
            self.all_models.append(MLPClassifier(hidden_layer_sizes=(256,256), solver='lbfgs', learning_rate_init=0.001, max_iter=500, early_stopping=True))
            self.all_models.append(MLPClassifier(hidden_layer_sizes=(100,100), solver='lbfgs', learning_rate_init=1e-4, max_iter=500, early_stopping=True))

    def apply_models(self):
        self.all_models.clear()
        self.create_models()
        X = self.df.drop('genre',axis=1).values
        y = self.df['genre'].values
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        scores = []
        for i in tqdm(self.all_models):
            print("\n{} ------> Cross Validation Score : {}".format(i,cross_val_score(i,X,y,scoring='accuracy').mean()))
            scores.append("\n{} ------> Cross Validation Score : {}".format(i,cross_val_score(i,X,y,scoring='accuracy').mean()))
        return scores

In [228]:
from sklearn.model_selection import KFold
selector = ModelSelection(df, 5, naive_bayes=False)

In [229]:
KFold(n_splits=5)

KFold(n_splits=5, random_state=None, shuffle=False)

In [230]:
selector = ModelSelection(df, 5, naive_bayes=False)
scores = selector.apply_models()



  0%|          | 0/35 [00:00<?, ?it/s][A[A


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=10, max_features=0.6,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False) ------> Cross Validation Score : 0.576




  3%|▎         | 1/35 [00:52<29:49, 52.64s/it][A[A


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=20, max_features=0.6,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False) ------> Cross Validation Score : 0.576




  6%|▌         | 2/35 [01:47<29:15, 53.18s/it][A[A


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=10, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False) ------> Cross Validation Score : 0.5569999999999999




  9%|▊         | 3/35 [01:55<21:15, 39.85s/it][A[A


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=20, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False) ------> Cross Validation Score : 0.567




 11%|█▏        | 4/35 [02:05<15:50, 30.68s/it][A[A


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=50, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False) ------> Cross Validation Score : 0.567




 14%|█▍        | 5/35 [02:14<12:06, 24.23s/it][A[A


LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=500,
                   multi_class='auto', n_jobs=-1, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False) ------> Cross Validation Score : 0.649




 17%|█▋        | 6/35 [02:26<10:01, 20.75s/it][A[A


LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=500,
                   multi_class='auto', n_jobs=-1, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False) ------> Cross Validation Score : 0.655




 20%|██        | 7/35 [02:36<08:05, 17.36s/it][A[A


LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=500,
                   multi_class='auto', n_jobs=-1, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False) ------> Cross Validation Score : 0.6519999999999999




 23%|██▎       | 8/35 [02:41<06:13, 13.84s/it][A[A


LogisticRegression(C=0.01, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=500,
                   multi_class='auto', n_jobs=-1, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False) ------> Cross Validation Score : 0.597




 26%|██▌       | 9/35 [02:45<04:40, 10.78s/it][A[A


LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=500,
                   multi_class='auto', n_jobs=-1, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False) ------> Cross Validation Score : 0.542




 29%|██▊       | 10/35 [02:48<03:28,  8.35s/it][A[A


AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1,
                   n_estimators=100, random_state=None) ------> Cross Validation Score : 0.23500000000000001




 31%|███▏      | 11/35 [03:12<05:15, 13.15s/it][A[A


AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=0.1,
                   n_estimators=100, random_state=None) ------> Cross Validation Score : 0.28300000000000003




 34%|███▍      | 12/35 [03:37<06:20, 16.56s/it][A[A


AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=0.01,
                   n_estimators=100, random_state=None) ------> Cross Validation Score : 0.344




 37%|███▋      | 13/35 [04:01<06:54, 18.86s/it][A[A


AdaBoostClassifier(algorithm='SAMME', base_estimator=None, learning_rate=0.1,
                   n_estimators=100, random_state=None) ------> Cross Validation Score : 0.40800000000000003




 40%|████      | 14/35 [04:24<07:04, 20.20s/it][A[A


AdaBoostClassifier(algorithm='SAMME', base_estimator=None, learning_rate=0.01,
                   n_estimators=100, random_state=None) ------> Cross Validation Score : 0.252




 43%|████▎     | 15/35 [04:47<07:01, 21.10s/it][A[A


DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=10, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best') ------> Cross Validation Score : 0.45




 46%|████▌     | 16/35 [04:49<04:49, 15.21s/it][A[A


DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=30, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best') ------> Cross Validation Score : 0.454




 49%|████▊     | 17/35 [04:50<03:19, 11.10s/it][A[A


DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=10, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best') ------> Cross Validation Score : 0.449




 51%|█████▏    | 18/35 [04:55<02:37,  9.28s/it][A[A


DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=30, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best') ------> Cross Validation Score : 0.446




 54%|█████▍    | 19/35 [05:00<02:08,  8.01s/it][A[A


DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=50, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best') ------> Cross Validation Score : 0.45500000000000007




 57%|█████▋    | 20/35 [05:02<01:30,  6.06s/it][A[A


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform') ------> Cross Validation Score : 0.517




 60%|██████    | 21/35 [05:03<01:02,  4.49s/it][A[A


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='distance') ------> Cross Validation Score : 0.517




 63%|██████▎   | 22/35 [05:04<00:43,  3.37s/it][A[A


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=2, p=2,
                     weights='distance') ------> Cross Validation Score : 0.517




 66%|██████▌   | 23/35 [05:04<00:31,  2.59s/it][A[A


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform') ------> Cross Validation Score : 0.525




 69%|██████▊   | 24/35 [05:05<00:22,  2.07s/it][A[A


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='distance') ------> Cross Validation Score : 0.5369999999999999




 71%|███████▏  | 25/35 [05:06<00:16,  1.68s/it][A[A


SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False) ------> Cross Validation Score : 0.6129999999999999




 74%|███████▍  | 26/35 [05:08<00:14,  1.66s/it][A[A


SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False) ------> Cross Validation Score : 0.426




 77%|███████▋  | 27/35 [05:10<00:16,  2.02s/it][A[A


SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=4, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False) ------> Cross Validation Score : 0.298




 80%|████████  | 28/35 [05:14<00:16,  2.41s/it][A[A


SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=5, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False) ------> Cross Validation Score : 0.28600000000000003




 83%|████████▎ | 29/35 [05:17<00:15,  2.65s/it][A[A


SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=5, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False) ------> Cross Validation Score : 0.205




 86%|████████▌ | 30/35 [05:20<00:14,  2.92s/it][A[A


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=(100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False) ------> Cross Validation Score : 0.615




 89%|████████▊ | 31/35 [05:27<00:16,  4.08s/it][A[A


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=(256, 256), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False) ------> Cross Validation Score : 0.623




 91%|█████████▏| 32/35 [05:41<00:21,  7.00s/it][A[A


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=(100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False) ------> Cross Validation Score : 0.6180000000000001




 94%|█████████▍| 33/35 [05:51<00:15,  7.87s/it][A[A


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=(256, 256), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False) ------> Cross Validation Score : 0.65




 97%|█████████▋| 34/35 [06:17<00:13, 13.22s/it][A[A


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=(100, 100), learning_rate='constant',
              learning_rate_init=0.0001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False) ------> Cross Validation Score : 0.639




100%|██████████| 35/35 [06:27<00:00, 11.06s/it]


In [235]:
with open("/content/drive/MyDrive/For_Colab/Data/scores.txt","w") as f:
    for i in scores:
        f.write(i)

In [178]:
X_train, X_test, y_train, y_test = train_test_split(df.values, y, test_size=0.33, random_state=42)

In [179]:
from sklearn.naive_bayes import GaussianNB, CategoricalNB, MultinomialNB

In [180]:
from sklearn.preprocessing import StandardScaler

In [181]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [182]:
gnb.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=(100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [183]:
gnb.score(X_test, y_test)

0.5666666666666667