# Binary Classification (Composer Based)

In [1]:
import csv # create csv
import pandas as pd # Open and manage CSV files
import pickle # Open python objects
import os # Directory Control
import seaborn # easy plots
from itertools import product
import warnings # find and don't show warnings

### Lets load our Data

In [32]:
def loadDicts(directory):
    '''Search for .p pickle objects in subdirectories of a file.'''
    subDir = [x[0] for x in os.walk(directory)]
    graphList = []
    for i, subfolder in enumerate(subDir[1:]):
        graphs = []
        for file in os.listdir(subfolder):
            if file.endswith(".p") :
                complete_name = subfolder + '/' + file
                print('Rendering --> ', file)
                picklegraphs = pickle.load(open(complete_name, "rb"))
                for pair in picklegraphs:
                    graphs.append(pair[0])
        graphList.append(graphs)
    return graphList


In [16]:
def write2csv(directory, file_name,  data):
    '''Write data to a .csv file.'''
    directory = directory + '/' + file_name + '.csv'
    with open(directory, 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(data)
    csvFile.close()

In [35]:
def createCSVfiles(graphList):
    for i, graphs1 in enumerate(graphList):
        if i!=7:
            for j, graphs2 in enumerate(graphList[i+1:]):
                graphs = graphs1 + graphs2
                values = [['Composer', 'Kalz Centrality', 'Global Clustering', 'Square Clustering', 'Harmonic Centrality', 'Closeness Centrality', 'Tonnetz']]
                composer = ''
                for graph in graphs:
                    values.append([graph.composer, graph.kalz_coef, graph.glob_clust_coef, graph.square_clustering_coef, graph.harmonic_coef, graph.closeness_coef, graph.trajectory.Tonnetz])
                write2csv('Comparison_Results/binary_classification', str(i)+str(j+i+1), values)

In [18]:
from sklearn.preprocessing import LabelEncoder

def encodingData(data) :
    label_quality = LabelEncoder()
    data = label_quality.fit_transform(data)
    return label_quality, data

In [19]:
def dropingData(data, label2Drop):
    data = data.drop(columns=label2Drop)
    return data

In [20]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
    
def separateDataset(data, label):
    X = data.drop(label, axis=1)
    y = data[label]
    return X, y

def splitDataset(data, label):
    X, y = separateDataset(data, label)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, y_train, X_test, y_test

In [21]:
def savePrints(string2output):
    complete_directory = 'Comparison_Results/binary_classification/results.txt'
    file = open(complete_directory,"w")
    file.write(string2output)
    file.close()

In [22]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

def applyRandomForest(data, label):
    X_train, y_train, X_test, y_test = splitDataset(data, label)
    rfc = RandomForestClassifier(n_estimators=1000, criterion="entropy")
    rfc.fit(X_train, y_train)
    pred_rfc = rfc.predict(X_test)

    return "%.2f" % round(f1_score(y_test, pred_rfc, average='weighted'), 2)

In [23]:
def BinaryPredictions(directory):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Do stuff here
        
        print('\033[1m' + 'BINARY CLASSIFICATION \n\n')
        print('\033[0m' + '| Composers | Score | Notes |')
        print('| -----------------------------------| --------------- |----------------------------------------|')
        for file in os.listdir(directory):
            if file.endswith('.csv'):
                complete_name = directory + '/' + file
                data = pd.read_csv(complete_name, sep = ',')
                data['Tonnetz'] = encodingData(data['Tonnetz'])[1]
                label_quality, data['Composer'] = encodingData(data['Composer'])
                composers = label_quality.inverse_transform([0])[0] + ' vs ' + label_quality.inverse_transform([1])[0]
                score = applyRandomForest(data, 'Composer')
                notes = 'All Atributes'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Harmonic Centrality'), 'Composer')
                notes = 'Wihout Harmonic Centrality'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Closeness Centrality'), 'Composer')
                notes = 'Wihout Closeness Centrality'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Kalz Centrality'), 'Composer')
                notes = 'Wihout Kalz Centrality'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Global Clustering'), 'Composer')
                notes = 'Wihout Global Clustering'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Square Clustering'), 'Composer')
                notes = 'Wihout Square Clustering'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Tonnetz'), 'Composer')
                notes = 'Wihout Tonnetz'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')


In [36]:
createCSVfiles(loadDicts('Comparison_Results/binary_classification'))

BinaryPredictions('Comparison_Results/binary_classification')

[1mBINARY CLASSIFICATION 


[0m| Composers | Score | Notes |
| -----------------------------------| --------------- |----------------------------------------|
|  bach vs beethoven  |  1.00  |  All Atributes  | 
|  bach vs beethoven  |  0.98  |  Wihout Harmonic Centrality  | 
|  bach vs beethoven  |  1.00  |  Wihout Closeness Centrality  | 
|  bach vs beethoven  |  1.00  |  Wihout Kalz Centrality  | 
|  bach vs beethoven  |  1.00  |  Wihout Global Clustering  | 
|  bach vs beethoven  |  1.00  |  Wihout Square Clustering  | 
|  bach vs beethoven  |  0.93  |  Wihout Tonnetz  | 
|  bach vs chopin  |  0.83  |  All Atributes  | 
|  bach vs chopin  |  0.92  |  Wihout Harmonic Centrality  | 
|  bach vs chopin  |  0.89  |  Wihout Closeness Centrality  | 
|  bach vs chopin  |  0.86  |  Wihout Kalz Centrality  | 
|  bach vs chopin  |  0.92  |  Wihout Global Clustering  | 
|  bach vs chopin  |  0.97  |  Wihout Square Clustering  | 
|  bach vs chopin  |  0.83  |  Wihout Tonnetz  | 
|  Unknown vs 

|  Unknown vs monteverdi  |  0.97  |  Wihout Square Clustering  | 
|  Unknown vs monteverdi  |  0.97  |  Wihout Tonnetz  | 
|  Unknown vs mozart  |  0.76  |  All Atributes  | 
|  Unknown vs mozart  |  0.94  |  Wihout Harmonic Centrality  | 
|  Unknown vs mozart  |  0.67  |  Wihout Closeness Centrality  | 
|  Unknown vs mozart  |  0.85  |  Wihout Kalz Centrality  | 
|  Unknown vs mozart  |  0.79  |  Wihout Global Clustering  | 
|  Unknown vs mozart  |  0.76  |  Wihout Square Clustering  | 
|  Unknown vs mozart  |  0.88  |  Wihout Tonnetz  | 
|  Unknown vs palestrina  |  1.00  |  All Atributes  | 
|  Unknown vs palestrina  |  0.94  |  Wihout Harmonic Centrality  | 
|  Unknown vs palestrina  |  0.97  |  Wihout Closeness Centrality  | 
|  Unknown vs palestrina  |  1.00  |  Wihout Kalz Centrality  | 
|  Unknown vs palestrina  |  0.97  |  Wihout Global Clustering  | 
|  Unknown vs palestrina  |  0.94  |  Wihout Square Clustering  | 
|  Unknown vs palestrina  |  0.94  |  Wihout Tonnetz  | 
| 