In [4]:
### Importing Libraries ######################################################################

# General:
import pandas as pd
import numpy as np
import pickle
import sys
import yaml
import itertools
from sklearn.ensemble import StackingClassifier
import warnings
warnings.filterwarnings("ignore")

# Functions:
from functions import create_data_matrix, modify_index

# Standardization:
from sklearn.preprocessing import LabelEncoder, StandardScaler, LabelBinarizer

# Feature Selection:
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import f_classif, mutual_info_classif
from numpy import linalg

# Feature Reduction:
from sklearn.decomposition import PCA, FastICA, NMF

# Classifiers:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from lightgbm import LGBMClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier

# Metrics:
from sklearn.metrics import accuracy_score, f1_score, balanced_accuracy_score, confusion_matrix, roc_auc_score, cohen_kappa_score, recall_score, precision_score
from scipy import stats

### Parameters ##############################################################################

with open('parameters_test.yaml') as file:
    params = yaml.load(file, Loader=yaml.FullLoader)

texture_selector = params['texture_selector']['name']
num_selected_textures = params['texture_selector']['num_textures']

with open(r'D:\Users\Isabelle\Desktop\Infnet\data2_gtzan_handcrafted_textures.pkl', 'rb') as f:
    data_textures = pickle.load(f)

list_features_name = list(data_textures[list(data_textures.keys())[0]].keys())[:-1]

features_set = params['features_set']['name']

if features_set == 'baseline':
    statistics = ['mean', 'std']
    features_name = [a for a in list_features_name if 'd1_' not in a and 'd2_' not in a and 'd3_' not in a and 'd4_' not in a]
elif features_set == 'sub_delta':
    statistics = ['mean', 'std']
    features_name = [a for a in list_features_name if 'd2_' not in a and 'd3_' not in a and 'd4_' not in a]
elif features_set == 'delta':
    statistics = ['mean', 'std']
    features_name = [a for a in list_features_name if 'd3_' not in a and 'd4_' not in a]
elif features_set == 'stats':
    statistics = ['mean', 'std', 'skew', 'kurt']
    features_name = [a for a in list_features_name if 'd1_' not in a and 'd2_' not in a and 'd3_' not in a and 'd4_' not in a]
elif features_set == 'sub_delta_stats':
    statistics = ['mean', 'std', 'skew', 'kurt']
    features_name = [a for a in list_features_name if 'd2_' not in a and 'd3_' not in a and 'd4_' not in a]
elif features_set == 'delta_stats':
    statistics = ['mean', 'std', 'skew', 'kurt']
    features_name = [a for a in list_features_name if 'd3_' not in a and 'd4_' not in a]

    
total_features = len(features_name) * len(statistics)

features_set = params['features_set']['name']
feature_selector = params['feature_selector']['name']
feature_reduction = params['feature_reduction']['name']
classifier = params['classifier']['name']

with open(r'D:\Users\Isabelle\Desktop\Infnet\index_folds.pkl', 'rb') as f:
    index_folds = pickle.load(f)

### Save ###################################################################################

filename_save = 'results_GTZAN_test2'

if texture_selector == 'LINSPACE':
    filename_save = f'{filename_save}_linspace_{num_selected_textures}'
elif texture_selector == 'ALL':
    filename_save = f'{filename_save}_all'
    
filename_save = f'{filename_save}_{features_set}'

if feature_selector != '':
    filename_save = f'{filename_save}_{feature_selector}'

if feature_reduction != '':
    filename_save = f'{filename_save}_{feature_reduction}'
    
filename_save = f'{filename_save}_{classifier}'       
filename_save = f'{filename_save}.pkl'    

print(f'Results will be saved in {filename_save}', flush=True)

### Creating data matrix ##################################################################

data = create_data_matrix(data_textures, index_folds, features_name, statistics)

X = data.drop(['label','ind_filename','num_texture'], axis=1)
y = data['label']


if feature_selector == 'manual':
    with open(params['feature_selector']['manual_list_filename'], 'rb') as f:
        manual_list = pickle.load(f)
    features_selected = []
    for feature in manual_list:
        features_selected.append(X.columns.get_loc(feature))
    list_percentile = [100]
    

### Encoding labels #######################################################################

filenames = list(data_textures.keys())
labels = [data_textures[filename]['label'] for filename in filenames]

encoder = LabelEncoder()
encoder.fit(labels)
y = encoder.transform(y) # y is a vector

lb = LabelBinarizer()
Y = lb.fit_transform(y) # Y is a binary matrix

### Calculating number of textures per audio track #######################################

max_textures_per_filename = np.array(data[['ind_filename','num_texture']].groupby(['ind_filename']).max()['num_texture']) + 1

### Test #############################################################################

del data_textures
del data

results_test = {}

for ind_set in range(1,100,10):
    
    accuracy_dict = {}
    
    print(f'Model: {ind_set}/100\n', flush=True)
    
    ind_dev = modify_index(np.concatenate((index_folds['sets'][ind_set]['train'], index_folds['sets'][ind_set]['val'])),                                                max_textures_per_filename, texture_selector, num_selected_textures) 
    
    scaler = StandardScaler()
    X_dev = scaler.fit_transform(X.iloc[ind_dev])
    
    y_dev = y[ind_dev]
    
    Y_dev = Y[ind_dev,:]
    
    ### Feature Selector:
    
    # Parameters:
    
    if feature_selector == '':
        fs_params = {'fs':'N'}
    elif feature_selector == 'anova':
        fs_params = {'fs':['anova']}
    elif feature_selector == 'mutual':
        fs_params = {'fs':['mutual']}
    elif feature_selector == 'somp':
        fs_params = {'fs':['somp'],
                     'somp_n': params['feature_selector']['somp_n'],
                     'somp_K': params['feature_selector']['somp_K']}
    elif feature_selector == 'mrmr':
        fs_params = {'fs':['mrmr'],
                    'mrmr_K': params['feature_selector']['mrmr_K']}
    elif feature_selector == 'manual':
        fs_params = {'fs':['manual']}
        
    keys, values = zip(*fs_params.items())
    perm_fs_params = [dict(zip(keys, v)) for v in itertools.product(*values)]
        
    # Function:
    
    dict_string_params = {}  # Initialize string_params !!!
    
    for fs_param in perm_fs_params:
        
        dict_string_params.update(fs_param)
        
        if feature_selector == '':
            features_selected = list(range(0,total_features))
            list_percentile = [100]
        elif feature_selector == 'anova':
            fs = f_classif(X_dev, y_dev)
            aux = sorted([(x,ind) for (ind,x) in enumerate(fs[0])], reverse=True)
            features_selected = [ind for x, ind in aux]
            list_percentile = params['feature_selector']['percentile']
        elif feature_selector == 'mutual':
            fs = mutual_info_classif(X_dev, y_dev)
            aux = sorted([(x,ind) for (ind,x) in enumerate(fs)], reverse=True)
            features_selected = [ind for x, ind in aux]
            list_percentile = params['feature_selector']['percentile']
        elif feature_selector == 'somp':
            K = fs_param['somp_K'] #total_features
            n_dict = {}
            n = fs_param['somp_n']
            R = Y_dev
            uIdx = list(range(0,total_features))
            sIdx = []
            X_ = X_dev.copy()
            Y_ = Y_dev.copy()
            for k in range(0, K):
                idx = np.argmax([linalg.norm(np.array([x_j]) @ R) / linalg.norm(np.array([x_j]).T) for x_j in X_[:,uIdx].T])
                sIdx.append(uIdx[idx])
                uIdx.remove(uIdx[idx])
                A = np.linalg.inv(X_[:,sIdx].T @ X_[:,sIdx]) @ X_[:,sIdx].T @ Y_
                R = Y_ - n * X_[:,sIdx] @ A
            features_selected = sIdx
            list_percentile = [100] #params['feature_selector']['percentile']
        elif feature_selector == 'mrmr':
            features_selected = mrmr_classif(X_dev, y_dev, K = fs_param['mrmr_K']) #total_features)
            list_percentile = [100] #params['feature_selector']['percentile']
            
            
        for p in list_percentile:
            dict_string_params.update({'percentile':p})
            aux_p = int(total_features * p / 100)           
            X_dev_fs = X_dev[:,features_selected[:aux_p]]
            
        
            ### Feature Reduction:
        
            # Parameters:
    
            if feature_reduction == '':
                fr_params = {'fr':'N'}
            elif feature_reduction == 'pca':
                fr_params = {'fr':['pca'],
                             'n_components': params['feature_reduction']['n_components']}
            elif feature_reduction == 'fastica':
                fr_params = {'fr':['fastica'],
                             'n_components': params['feature_reduction']['n_components']}
            
            keys, values = zip(*fr_params.items())
            perm_fr_params = [dict(zip(keys, v)) for v in itertools.product(*values)]
            
            # Function:
    
            for fr_param in perm_fr_params:
            
                dict_string_params.update(fr_param)
            
                if feature_reduction == '':
                    X_dev_fr = X_dev_fs.copy()
                elif feature_reduction == 'pca':
                    fr = PCA(n_components=fr_param['n_components'])
                    X_dev_fr = fr.fit_transform(X_dev_fs)
                elif feature_reduction == 'fastica':
                    fr = FastICA(n_components=fr_param['n_components'],
                                 max_iter=params['fastica_parameters']['max_iter'])
                    X_dev_fr = fr.fit_transform(X_dev_fs)
                    
            # bootstrap

                X_df_test = pd.DataFrame (X_dev_fr)
                y_df_test = pd.DataFrame (y_dev)
                X_df_test['81'] = y_df_test
                samples = []
                for i in range(10):
                    samples.append(X_df_test.sample(frac=1, replace=True))
                
                
                    
                ### Classifier:
        
                # Parameters:
            
                if classifier == 'logistic':
                    clf_params = params['logistic_parameters']
                elif classifier == 'knn':
                    clf_params = params['knn_parameters']
                elif classifier == 'svm':
                    clf_params = params['svm_parameters']
                elif classifier == 'rf':
                    clf_params = params['rf_parameters']    
                elif classifier == 'lgboost':
                    clf_params = params['lgboost_parameters']
                elif classifier == 'gnb':
                    clf_params = params['gnb_parameters']
                elif classifier == 'lda':
                    clf_params = params['lda_parameters']
                elif classifier == 'qda':
                    clf_params = params['qda_parameters']
                elif classifier == 'mlp':
                    clf_params = params['mlp_parameters']
                elif classifier == 'svm-linear':
                    clf_params = params['svm-linear_parameters']
                    
                    ## stacking fk
                    
                elif classifier == 's3fk1':
                    clf_params = params['s3fk1_parameters']
                elif classifier == 's3fk2':
                    clf_params = params['s3fk2_parameters']
                elif classifier == 's3fk3':
                    clf_params = params['s3fk3_parameters']
                    
                elif classifier == 's5fk1':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5fk2':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5fk3':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5fk4':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5fk5':
                    clf_params = params['s3fk3_parameters']
                
                elif classifier == 's8fk1':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fk2':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fk3':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fk4':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fk5':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fk6':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fk7':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fk8':
                    clf_params = params['s3fk3_parameters']
                
                elif classifier == 's10fk1':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's10fk2':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's10fk3':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's10fk4':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's10fk5':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's10fk6':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's10fk7':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's10fk8':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's10fk9':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's10fk10':
                    clf_params = params['s3fk3_parameters']
                
                ## stacking fx 
                
                elif classifier == 's3fx1':
                    clf_params = params['s3fk1_parameters']
                elif classifier == 's3fx2':
                    clf_params = params['s3fk2_parameters']
                elif classifier == 's3fx3':
                    clf_params = params['s3fk3_parameters']
                    
                elif classifier == 's5fx1':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5fx2':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5fx3':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5fx4':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5fx5':
                    clf_params = params['s3fk3_parameters']
                
                elif classifier == 's8fx1':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fx2':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fx3':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fx4':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fx5':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fx6':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fx7':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8fx8':
                    clf_params = params['s3fk3_parameters']
                 
                ## stacking SOMP
                
                elif classifier == 's3somp1':
                    clf_params = params['s3fk1_parameters']
                elif classifier == 's3somp2':
                    clf_params = params['s3fk2_parameters']
                elif classifier == 's3somp3':
                    clf_params = params['s3fk3_parameters']
                    
                elif classifier == 's5somp1':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5somp2':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5somp3':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5somp4':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's5somp5':
                    clf_params = params['s3fk3_parameters']
                
                elif classifier == 's8somp1':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8somp2':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8somp3':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8somp4':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8somp5':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8somp6':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8somp7':
                    clf_params = params['s3fk3_parameters']
                elif classifier == 's8somp8':
                    clf_params = params['s3fk3_parameters']
                
                
                    
                    
                keys, values = zip(*clf_params.items())
                perm_clf_params = [dict(zip(keys, v)) for v in itertools.product(*values)]
                    
                # Function:
    
                for clf_param in perm_clf_params:
            ## stacking fk
            
                    estimators_fk3 = [('svm',SVC(kernel= 'rbf', gamma='auto', C=1, cache_size=2000)),
        ('lgboost',LGBMClassifier(n_estimators=200, learning_rate=0.1)),
        ('logistic',LogisticRegression(max_iter=10000, C=0.01)),
        ]
                
                estimators_fk5 = [('svm',SVC(kernel= 'rbf', gamma='auto', C=1, cache_size=2000)),
        ('lgboost',LGBMClassifier(n_estimators=200, learning_rate=0.1) ),
        ('logistic',LogisticRegression(max_iter=10000, C=0.01) ),
        ('qda',QuadraticDiscriminantAnalysis(reg_param=0.1) ),
        ('mlp',MLPClassifier(hidden_layer_sizes=[25],
                                        random_state=1, max_iter=1000, activation='tanh',
                                        early_stopping=True, validation_fraction=0.1))]
                
                estimators_fk8 = [('svm',SVC(kernel= 'rbf', gamma='auto', C=1, cache_size=2000)),
        ('lgboost',LGBMClassifier(n_estimators=200, learning_rate=0.1) ),
        ('logistic',LogisticRegression(max_iter=10000, C=0.01) ),
        ('qda',QuadraticDiscriminantAnalysis(reg_param=0.1) ),
        ('mlp',MLPClassifier(hidden_layer_sizes=[25],
                                        random_state=1, max_iter=1000, activation='tanh',
                                        early_stopping=True, validation_fraction=0.1)),
        ('lda',LinearDiscriminantAnalysis(solver='lsqr', shrinkage=0.1)),
        ('svm-linear',LinearSVC(max_iter=1000, C=0.01, dual=False)),
        ('knn',KNeighborsClassifier(n_neighbors=5) )]
                
                estimators_fk10 = [('svm',SVC(kernel= 'rbf', gamma='auto', C=1, cache_size=2000)),
        ('lgboost',LGBMClassifier(n_estimators=200, learning_rate=0.1) ),
        ('logistic',LogisticRegression(max_iter=10000, C=0.01) ),
        ('qda',QuadraticDiscriminantAnalysis(reg_param=0.1) ),
        ('mlp',MLPClassifier(hidden_layer_sizes=[25],
                                        random_state=1, max_iter=1000, activation='tanh',
                                        early_stopping=True, validation_fraction=0.1)),
        ('lda',LinearDiscriminantAnalysis(solver='lsqr', shrinkage=0.1)),
        ('svm-linear',LinearSVC(max_iter=1000, C=0.01, dual=False)),
        ('knn',KNeighborsClassifier(n_neighbors=5) ),
                                   ('rf',RandomForestClassifier(n_estimators=50) ),
                                  ('gnb',GaussianNB(var_smoothing=0.000000001))]
                
                
        ## stacking fx
                
                estimators_fx3 = [('gnb',GaussianNB(var_smoothing=0.000000001)),
        ('knn',KNeighborsClassifier(n_neighbors=5)),
        ('svm_linear', LinearSVC(max_iter=1000, C=0.01, dual=False))]
                
                estimators_fx5 = [('gnb',GaussianNB(var_smoothing=0.000000001)),
        ('knn',KNeighborsClassifier(n_neighbors=5)),
        ('svm_linear', LinearSVC(max_iter=1000, C=0.01, dual=False)),
        ('rf',RandomForestClassifier(n_estimators=50) ),
        ('logistic',LogisticRegression(max_iter=10000, C=0.01))]
                
                estimators_fx8 = [('gnb',GaussianNB(var_smoothing=0.000000001)),
        ('knn',KNeighborsClassifier(n_neighbors=5)),
        ('svm_linear', LinearSVC(max_iter=1000, C=0.01, dual=False)),
        ('rf',RandomForestClassifier(n_estimators=50) ),
        ('logistic',LogisticRegression(max_iter=10000, C=0.01)),
        ('qda',QuadraticDiscriminantAnalysis(reg_param=0.1)),
        ('lgboost',LGBMClassifier(n_estimators=200, learning_rate=0.1)),
        ('mlp',MLPClassifier(hidden_layer_sizes=[25],
                                        random_state=1, max_iter=1000, activation='tanh',
                                        early_stopping=True, validation_fraction=0.1))]
                
                estimators_somp3 = [('rf',RandomForestClassifier(n_estimators=50) ),
                                    ('lgboost',LGBMClassifier(n_estimators=200, learning_rate=0.1) ),
                                    ('svm',SVC(kernel= 'rbf', gamma='auto', C=1, cache_size=2000))]
                
                
                
                
                
                
                estimators_somp5 = [('rf',RandomForestClassifier(n_estimators=50) ),
                                    ('lgboost',LGBMClassifier(n_estimators=200, learning_rate=0.1) ),
                                    ('svm',SVC(kernel= 'rbf', gamma='auto', C=1, cache_size=2000)),
                                   ('knn',KNeighborsClassifier(n_neighbors=5) ),
                                    ('qda',QuadraticDiscriminantAnalysis(reg_param=0.1))
                                   ]
                
                
                
                estimators_somp8 = [('rf',RandomForestClassifier(n_estimators=50) ),
                                    ('lgboost',LGBMClassifier(n_estimators=200, learning_rate=0.1) ),
                                    ('svm',SVC(kernel= 'rbf', gamma='auto', C=1, cache_size=2000)),
                                   ('knn',KNeighborsClassifier(n_neighbors=5) ),
                                    ('qda',QuadraticDiscriminantAnalysis(reg_param=0.1)),
                                    ('mlp',MLPClassifier(hidden_layer_sizes=[25],
                                        random_state=1, max_iter=1000, activation='tanh',
                                        early_stopping=True, validation_fraction=0.1)),
                                     ('logistic',LogisticRegression(max_iter=10000, C=0.01) ),
                                     ('svm-linear',LinearSVC(max_iter=1000, C=0.01, dual=False))
                                   ]
                
                
                
                                
            
                dict_string_params.update(clf_param)
            
                if classifier == 'logistic':
                    clf = LogisticRegression(max_iter=clf_param['max_iter'], C=clf_param['C'])
                elif classifier == 'knn':
                    clf = KNeighborsClassifier(n_neighbors=clf_param['n_neighbors'])
                elif classifier == 'svm':
                    clf = SVC(kernel=clf_param['kernel'], gamma=clf_param['gamma'], C=clf_param['C'], cache_size=2000)
                elif classifier == 'rf':
                    clf = RandomForestClassifier(n_estimators=clf_param['n_estimators'])
                elif classifier == 'lgboost':
                    clf = LGBMClassifier(n_estimators=clf_param['n_estimators'], learning_rate=clf_param['learning_rate'])
                elif classifier == 'gnb':
                    clf = GaussianNB(var_smoothing=clf_param['var_smoothing'])
                elif classifier == 'lda':
                    clf = LinearDiscriminantAnalysis(solver=clf_param['solver'], shrinkage=clf_param['shrinkage'])
                elif classifier == 'qda':
                    clf = QuadraticDiscriminantAnalysis(reg_param=clf_param['reg_param'])
                elif classifier == 'mlp':
                    clf = MLPClassifier(hidden_layer_sizes=tuple(clf_param['hidden_layer_sizes']),
                                        random_state=1, max_iter=clf_param['max_iter'], activation=clf_param['activation'],
                                        early_stopping=True, validation_fraction=0.1)
                elif classifier == 'svm-linear':
                    clf = LinearSVC(max_iter=clf_param['max_iter'], C=clf_param['C'], dual=False)
                    
                ## stacking fk 
                
                elif classifier == 's3fk1':
                    clf = StackingClassifier(estimators=estimators_fk3, 
                                             final_estimator= SVC(C=1,kernel= 'rbf',gamma= 'auto'))
                elif classifier == 's3fk2':
                    clf = StackingClassifier(estimators=estimators_fk3, 
                                             final_estimator= LGBMClassifier(n_estimators=200, learning_rate=0.1))
                elif classifier == 's3fk3':
                    clf = StackingClassifier(estimators=estimators_fk3, 
                                             final_estimator=LogisticRegression(max_iter=10000, C=0.01))
                elif classifier == 's5fk1':
                    clf = StackingClassifier(estimators=estimators_fk5, 
                                             final_estimator=SVC(C=1,kernel= 'rbf',gamma= 'auto') )
                elif classifier == 's5fk2':
                    clf = StackingClassifier(estimators=estimators_fk5, 
                                             final_estimator=LGBMClassifier(n_estimators=200, learning_rate=0.1) )
                elif classifier == 's5fk3':
                    clf = StackingClassifier(estimators=estimators_fk5, 
                                             final_estimator= LogisticRegression(max_iter=10000, C=0.01))
                elif classifier == 's5fk4':
                    clf = StackingClassifier(estimators=estimators_fk5, 
                                             final_estimator=QuadraticDiscriminantAnalysis(reg_param=0.1) )
                elif classifier == 's5fk5':
                    clf = StackingClassifier(estimators=estimators_fk5, 
                                             final_estimator=MLPClassifier(hidden_layer_sizes=[25],
                                        random_state=1, max_iter=1000, activation='tanh',
                                        early_stopping=True, validation_fraction=0.1) )
                elif classifier == 's8fk1':
                    clf = StackingClassifier(estimators=estimators_fk8, 
                                             final_estimator= SVC(C=1,kernel= 'rbf',gamma= 'auto'))
                elif classifier == 's8fk2':
                    clf = StackingClassifier(estimators=estimators_fk8, 
                                             final_estimator= LGBMClassifier(n_estimators=200, learning_rate=0.1) )
                elif classifier == 's8fk3':
                    clf = StackingClassifier(estimators=estimators_fk8, 
                                             final_estimator= LogisticRegression(max_iter=10000, C=0.01))
                elif classifier == 's8fk4':
                    clf = StackingClassifier(estimators=estimators_fk8, 
                                             final_estimator=QuadraticDiscriminantAnalysis(reg_param=0.1) )
                elif classifier == 's8fk5':
                    clf = StackingClassifier(estimators=estimators_fk8, 
                                             final_estimator=MLPClassifier(hidden_layer_sizes=[25],
                                        random_state=1, max_iter=1000, activation='tanh',
                                        early_stopping=True, validation_fraction=0.1) )
                elif classifier == 's8fk6':
                    clf = StackingClassifier(estimators=estimators_fk8, 
                                             final_estimator= LinearDiscriminantAnalysis(solver='lsqr', shrinkage=0.1))
                elif classifier == 's8fk7':
                    clf = StackingClassifier(estimators=estimators_fk8, 
                                             final_estimator=LinearSVC(max_iter=1000, C=0.01, dual=False) )
                elif classifier == 's8fk8':
                    clf = StackingClassifier(estimators=estimators_fk8, 
                                             final_estimator=KNeighborsClassifier(n_neighbors=5))
                    
                
                
                elif classifier == 's10fk1':
                    clf = StackingClassifier(estimators=estimators_fk10, 
                                             final_estimator= SVC(C=1,kernel= 'rbf',gamma= 'auto'))
                elif classifier == 's10fk2':
                    clf = StackingClassifier(estimators=estimators_fk10, 
                                             final_estimator= LGBMClassifier(n_estimators=200, learning_rate=0.1) )
                elif classifier == 's10fk3':
                    clf = StackingClassifier(estimators=estimators_fk10, 
                                             final_estimator= LogisticRegression(max_iter=10000, C=0.01))
                elif classifier == 's10fk4':
                    clf = StackingClassifier(estimators=estimators_fk10, 
                                             final_estimator=QuadraticDiscriminantAnalysis(reg_param=0.1) )
                elif classifier == 's10fk5':
                    clf = StackingClassifier(estimators=estimators_fk10, 
                                             final_estimator=MLPClassifier(hidden_layer_sizes=[25],
                                        random_state=1, max_iter=1000, activation='tanh',
                                        early_stopping=True, validation_fraction=0.1) )
                elif classifier == 's10fk6':
                    clf = StackingClassifier(estimators=estimators_fk10, 
                                             final_estimator= LinearDiscriminantAnalysis(solver='lsqr', shrinkage=0.1))
                elif classifier == 's10fk7':
                    clf = StackingClassifier(estimators=estimators_fk10, 
                                             final_estimator=LinearSVC(max_iter=1000, C=0.01, dual=False) )
                elif classifier == 's10fk8':
                    clf = StackingClassifier(estimators=estimators_fk10, 
                                             final_estimator=KNeighborsClassifier(n_neighbors=5))
                
                
                elif classifier == 's10fk9':
                    clf = StackingClassifier(estimators=estimators_fk10, 
                                             final_estimator=RandomForestClassifier(n_estimators=50))
                elif classifier == 's10fk10':
                    clf = StackingClassifier(estimators=estimators_fk10, 
                                             final_estimator=GaussianNB(var_smoothing=0.000000001))
                ## stacking fx
                
                elif classifier == 's3fx1':
                    clf = StackingClassifier(estimators=estimators_fx3, 
                                             final_estimator= GaussianNB(var_smoothing=0.000000001))
                elif classifier == 's3fx2':
                    clf = StackingClassifier(estimators=estimators_fx3, 
                                             final_estimator= KNeighborsClassifier(n_neighbors=5))
                elif classifier == 's3fx3':
                    clf = StackingClassifier(estimators=estimators_fx3, 
                                             final_estimator=LinearSVC(max_iter=1000, C=0.01, dual=False))
                elif classifier == 's5fx1':
                    clf = StackingClassifier(estimators=estimators_fx5, 
                                             final_estimator= GaussianNB(var_smoothing=0.000000001))
                elif classifier == 's5fx2':
                    clf = StackingClassifier(estimators=estimators_fx5, 
                                             final_estimator=  KNeighborsClassifier(n_neighbors=5))
                elif classifier == 's5fx3':
                    clf = StackingClassifier(estimators=estimators_fx5, 
                                             final_estimator= LinearSVC(max_iter=1000, C=0.01, dual=False))
                elif classifier == 's5fx4':
                    clf = StackingClassifier(estimators=estimators_fx5, 
                                             final_estimator=RandomForestClassifier(n_estimators=50)  )
                elif classifier == 's5fx5':
                    clf = StackingClassifier(estimators=estimators_fx5, 
                                             final_estimator= LogisticRegression(max_iter=10000, C=0.01))
                elif classifier == 's8fx1':
                    clf = StackingClassifier(estimators=estimators_fx8, 
                                             final_estimator= GaussianNB(var_smoothing=0.000000001) )
                elif classifier == 's8fx2':
                    clf = StackingClassifier(estimators=estimators_fx8, 
                                             final_estimator= KNeighborsClassifier(n_neighbors=5))
                elif classifier == 's8fx3':
                    clf = StackingClassifier(estimators=estimators_fx8, 
                                             final_estimator=LinearSVC(max_iter=1000, C=0.01, dual=False))
                elif classifier == 's8fx4':
                    clf = StackingClassifier(estimators=estimators_fx8, 
                                             final_estimator=RandomForestClassifier(n_estimators=50))
                elif classifier == 's8fx5':
                    clf = StackingClassifier(estimators=estimators_fx8, 
                                             final_estimator=LogisticRegression(max_iter=10000, C=0.01))
                elif classifier == 's8fx6':
                    clf = StackingClassifier(estimators=estimators_fx8, 
                                             final_estimator=QuadraticDiscriminantAnalysis(reg_param=0.1) )
                elif classifier == 's8fx7':
                    clf = StackingClassifier(estimators=estimators_fx8, 
                                             final_estimator=MLPClassifier(hidden_layer_sizes=[25],
                                        random_state=1, max_iter=1000, activation='tanh',
                                        early_stopping=True, validation_fraction=0.1))
                elif classifier == 's8fx8':
                    clf = StackingClassifier(estimators=estimators_fx8, 
                                             final_estimator=LGBMClassifier(n_estimators=200, learning_rate=0.1))
                
                ### SOMP
                
                
                
                elif classifier == 's3somp1':
                    clf = StackingClassifier(estimators=estimators_somp3, 
                                             final_estimator= RandomForestClassifier(n_estimators=50))
                elif classifier == 's3somp2':
                    clf = StackingClassifier(estimators=estimators_somp3, 
                                             final_estimator= LGBMClassifier(n_estimators=200, learning_rate=0.1))
                elif classifier == 's3somp3':
                    clf = StackingClassifier(estimators=estimators_somp3, 
                                             final_estimator=SVC(kernel= 'rbf', gamma='auto', C=1, cache_size=2000))
                elif classifier == 's5somp1':
                    clf = StackingClassifier(estimators=estimators_somp5, 
                                             final_estimator=RandomForestClassifier(n_estimators=50) )
                elif classifier == 's5somp2':
                    clf = StackingClassifier(estimators=estimators_somp5, 
                                             final_estimator=LGBMClassifier(n_estimators=200, learning_rate=0.1))
                elif classifier == 's5somp3':
                    clf = StackingClassifier(estimators=estimators_somp5, 
                                             final_estimator=SVC(kernel= 'rbf', gamma='auto', C=1, cache_size=2000))
                elif classifier == 's5somp4':
                    clf = StackingClassifier(estimators=estimators_somp5, 
                                             final_estimator=KNeighborsClassifier(n_neighbors=5))
                elif classifier == 's5somp5':
                    clf = StackingClassifier(estimators=estimators_somp5, 
                                             final_estimator= QuadraticDiscriminantAnalysis(reg_param=0.1))
                elif classifier == 's8somp1':
                    clf = StackingClassifier(estimators=estimators_somp8, 
                                             final_estimator= RandomForestClassifier(n_estimators=50))
                elif classifier == 's8somp2':
                    clf = StackingClassifier(estimators=estimators_somp8, 
                                             final_estimator= LGBMClassifier(n_estimators=200, learning_rate=0.1))
                elif classifier == 's8somp3':
                    clf = StackingClassifier(estimators=estimators_somp8, 
                                             final_estimator=SVC(kernel= 'rbf', gamma='auto', C=1, cache_size=2000))
                elif classifier == 's8somp4':
                    clf = StackingClassifier(estimators=estimators_somp8, 
                                             final_estimator=KNeighborsClassifier(n_neighbors=5))
                elif classifier == 's8somp5':
                    clf = StackingClassifier(estimators=estimators_somp8, 
                                             final_estimator=QuadraticDiscriminantAnalysis(reg_param=0.1))
                elif classifier == 's8somp6':
                    clf = StackingClassifier(estimators=estimators_somp8, 
                                             final_estimator=MLPClassifier(hidden_layer_sizes=[25],
                                        random_state=1, max_iter=1000, activation='tanh',
                                        early_stopping=True, validation_fraction=0.1))
                elif classifier == 's8somp7':
                    clf = StackingClassifier(estimators=estimators_somp8, 
                                             final_estimator=LogisticRegression(max_iter=10000, C=0.01) )
                elif classifier == 's8somp8':
                    clf = StackingClassifier(estimators=estimators_somp8, 
                                             final_estimator=LinearSVC(max_iter=1000, C=0.01, dual=False))
                
                
                
                
                
                
                



                clf.fit(X_dev_fr, y_dev)
                xtrain = samples[9].loc[:, samples[9].columns != '81']
                ytrain = samples[9]['81']

                y_pred_total = [] 
                y_real_total = []
                y_all = []

                for ind_song in index_folds['sets'][ind_set]['test']:
                    ind_test_textures_per_song = modify_index([ind_song], max_textures_per_filename, 'ALL', num_selected_textures)

                    X_test_textures_per_song = X.iloc[ind_test_textures_per_song]
                    y_test_textures_per_song = y[ind_test_textures_per_song]

                    X_test_textures_per_song = scaler.transform(X_test_textures_per_song)
                    X_test_textures_per_song_fs = X_test_textures_per_song[:,features_selected[:aux_p]]

                    if feature_reduction == '':
                        X_test_textures_per_song_fr = X_test_textures_per_song_fs.copy()
                    else:
                        X_test_textures_per_song_fr = fr.transform(X_test_textures_per_song_fs)   

                    y_pred_textures_per_song = clf.predict(X_test_textures_per_song_fr)

                    y_pred_total += [stats.mode(y_pred_textures_per_song)[0]]
                    y_real_total += [stats.mode(y_test_textures_per_song)[0]]
                teste = clf.predict(X_dev_fr)


                for i in y_pred_total:
                    i = float(i)


                accuracy_dict[str(dict_string_params)] = {'accuracy': balanced_accuracy_score(y_real_total, y_pred_total),
                                                          'fk': 2*balanced_accuracy_score(y_real_total, y_pred_total) * (cohen_kappa_score(y_real_total, y_pred_total, weights = None))/ (balanced_accuracy_score(y_real_total, y_pred_total) +cohen_kappa_score(y_real_total, y_pred_total, weights = None)),
                                                          'fx': 2*balanced_accuracy_score(y_real_total, y_pred_total) * (1 -cohen_kappa_score(y_real_total, y_pred_total, weights = None))/ (balanced_accuracy_score(y_real_total, y_pred_total) +(1-cohen_kappa_score(y_real_total, y_pred_total, weights = None))),
                                                          'k': cohen_kappa_score(y_real_total, y_pred_total, weights = None)

                                      }

                print(dict_string_params, flush=True)
                a = Y_dev
 
    results_test[ind_set] = accuracy_dict

    
with open(filename_save, 'wb') as f:
    pickle.dump(results_test, f)

Results will be saved in results_GTZAN_test2_linspace_20_delta_stats_somp_s8fx7.pkl
Model: 1/100

{'fs': 'somp', 'somp_n': 1, 'somp_K': 81, 'percentile': 100, 'fr': 'N', 'stack_method': 'auto'}
Model: 11/100

{'fs': 'somp', 'somp_n': 1, 'somp_K': 81, 'percentile': 100, 'fr': 'N', 'stack_method': 'auto'}
Model: 21/100

{'fs': 'somp', 'somp_n': 1, 'somp_K': 81, 'percentile': 100, 'fr': 'N', 'stack_method': 'auto'}
Model: 31/100

{'fs': 'somp', 'somp_n': 1, 'somp_K': 81, 'percentile': 100, 'fr': 'N', 'stack_method': 'auto'}
Model: 41/100

{'fs': 'somp', 'somp_n': 1, 'somp_K': 81, 'percentile': 100, 'fr': 'N', 'stack_method': 'auto'}
Model: 51/100

{'fs': 'somp', 'somp_n': 1, 'somp_K': 81, 'percentile': 100, 'fr': 'N', 'stack_method': 'auto'}
Model: 61/100

{'fs': 'somp', 'somp_n': 1, 'somp_K': 81, 'percentile': 100, 'fr': 'N', 'stack_method': 'auto'}
Model: 71/100

{'fs': 'somp', 'somp_n': 1, 'somp_K': 81, 'percentile': 100, 'fr': 'N', 'stack_method': 'auto'}
Model: 81/100

{'fs': 'somp',

In [4]:
gnb = pd.DataFrame(teste, columns = ['GNB'])

gnb

Unnamed: 0,GNB
0,0
1,0
2,0
3,0
4,0
...,...
17995,9
17996,9
17997,9
17998,9


In [5]:
lda = pd.DataFrame(teste, columns = ['LDA'])

lda

Unnamed: 0,LDA
0,0
1,0
2,0
3,0
4,0
...,...
17995,9
17996,9
17997,9
17998,9


In [6]:
qda = pd.DataFrame(teste, columns = ['QDA'])

qda

Unnamed: 0,QDA
0,0
1,0
2,0
3,0
4,0
...,...
17995,9
17996,9
17997,9
17998,9


In [7]:
logistic = pd.DataFrame(teste, columns = ['LOGISTIC'])

logistic

Unnamed: 0,LOGISTIC
0,0
1,0
2,0
3,0
4,0
...,...
17995,9
17996,9
17997,9
17998,9


In [8]:
knn = pd.DataFrame(teste, columns = ['KNN'])

knn

Unnamed: 0,KNN
0,0
1,0
2,0
3,0
4,0
...,...
17995,9
17996,9
17997,9
17998,9


In [9]:
svm_linear = pd.DataFrame(teste, columns = ['SVM_LIN'])

svm_linear

Unnamed: 0,SVM_LIN
0,0
1,0
2,0
3,0
4,0
...,...
17995,9
17996,9
17997,9
17998,9


In [10]:
svm = pd.DataFrame(teste, columns = ['SVM'])

svm

Unnamed: 0,SVM
0,0
1,0
2,0
3,0
4,0
...,...
17995,9
17996,9
17997,9
17998,9


In [11]:
rf = pd.DataFrame(teste, columns = ['RF'])

rf

Unnamed: 0,RF
0,0
1,0
2,0
3,0
4,0
...,...
17995,9
17996,9
17997,9
17998,9


In [12]:
lgboost = pd.DataFrame(teste, columns = ['LGBOOST'])

lgboost

Unnamed: 0,LGBOOST
0,0
1,0
2,0
3,0
4,0
...,...
17995,9
17996,9
17997,9
17998,9


In [13]:
mlp = pd.DataFrame(teste, columns = ['MLP'])

mlp

Unnamed: 0,MLP
0,0
1,0
2,0
3,0
4,0
...,...
17995,9
17996,9
17997,9
17998,9


In [14]:
gnb['LDA'] = lda
gnb['QDA'] = qda
gnb['KNN'] = knn
gnb['MLR'] = logistic
gnb['SVM-LINEAR'] = svm_linear
gnb['SVM'] = svm
gnb['RF'] = rf
gnb['LGBOOST'] = lgboost
gnb['MLP'] = mlp

gnb.head()

Unnamed: 0,GNB,LDA,QDA,KNN,MLR,SVM-LINEAR,SVM,RF,LGBOOST,MLP
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0


In [15]:
np.shape(gnb)

(18000, 10)

In [16]:
X_train = gnb

X_train.head()

Unnamed: 0,GNB,LDA,QDA,KNN,MLR,SVM-LINEAR,SVM,RF,LGBOOST,MLP
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0


In [17]:
X_train = X_train.to_numpy()

In [18]:
Y_train = a

Y_train

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [20]:
K = 10
n_dict = {}
n = 10
R = Y_train
uIdx = list(range(0,10))
sIdx = []
X_ = X_train.copy()
Y_ = Y_train.copy()
for k in range(0, K):
    idx = np.argmax([linalg.norm(np.array([x_j]) @ R) / linalg.norm(np.array([x_j]).T) for x_j in X_[:,uIdx].T])
    sIdx.append(uIdx[idx])
    uIdx.remove(uIdx[idx])
    A = np.linalg.inv(X_[:,sIdx].T @ X_[:,sIdx]) @ X_[:,sIdx].T @ Y_
    R = Y_ - n * X_[:,sIdx] @ A
features_selected = sIdx

LinAlgError: Singular matrix

In [111]:
features_selected

[7, 8, 6, 3, 2, 9, 4, 5, 1, 0]

In [21]:
K = 10
n_dict = {}
n = 1
R = Y_train
uIdx = list(range(0,10))
sIdx = []
X_ = X_train.copy()
Y_ = Y_train.copy()
for k in range(0, K):
    idx = np.argmax([linalg.norm(np.array([x_j]) @ R) / linalg.norm(np.array([x_j]).T) for x_j in X_[:,uIdx].T])
    sIdx.append(uIdx[idx])
    uIdx.remove(uIdx[idx])
    A = np.linalg.inv(X_[:,sIdx].T @ X_[:,sIdx]) @ X_[:,sIdx].T @ Y_
    R = Y_ - n * X_[:,sIdx] @ A
features_selected = sIdx

LinAlgError: Singular matrix