# Binary Classification (Composer Based)

In [1]:
import csv # create csv
import pandas as pd # Open and manage CSV files
import pickle # Open python objects
import os # Directory Control
import seaborn # easy plots
from itertools import product
import warnings # find and don't show warnings

from structural_functions import mergeDicts

### Lets load our Data

In [2]:
def loadDicts(directory):
    subDir = [x[0] for x in os.walk(directory)]
    graphList = []
    for i, subfolder in enumerate(subDir[1:]):
        graph = dict()
        for file in os.listdir(subfolder):
            if file.endswith(".p") :
                complete_name = subfolder + '/' + file
                print('Rendering --> ', file)
                graph.update(pickle.load(open(complete_name, "rb")))
        graphList.append(graph)
    return graphList


In [3]:
def write2csv(directory, file_name,  data):
    directory = directory + '/' + file_name + '.csv'
    with open(directory, 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(data)
    csvFile.close()

In [4]:
def createCSVfiles(graphList):
    for i, graphs1 in enumerate(graphList):
        if i!=7:
            for j, graphs2 in enumerate(graphList[i+1:]):
                graphs = mergeDicts(graphs1, graphs2)
                values = [['Composer', 'Kalz Centrality', 'Global Clustering', 'Square Clustering', 'Harmonic Centrality', 'Closeness Centrality', 'Tonnetz']]
                composer = ''
                for graph in graphs:
                    values.append([graph.composer, graph.kalz_coef, graph.glob_clust_coef, graph.square_clustering_coef, graph.harmonic_coef, graph.closeness_coef, graph.trajectory.Tonnetz])
                write2csv('Comparison_Results/binary_classification', str(i)+str(j+i+1), values)

In [5]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)





In [6]:
from sklearn.preprocessing import LabelEncoder

def encodingData(data) :
    label_quality = LabelEncoder()
    data = label_quality.fit_transform(data)
    return label_quality, data

In [7]:
def dropingData(data, label2Drop):
    data = data.drop(columns=label2Drop)
    return data

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
    
def separateDataset(data, label):
    X = data.drop(label, axis=1)
    y = data[label]
    return X, y

def splitDataset(data, label):
    X, y = separateDataset(data, label)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, y_train, X_test, y_test

In [9]:
def savePrints(string2output):
    complete_directory = 'Comparison_Results/binary_classification/results.txt'
    file = open(complete_directory,"w")
    file.write(string2output)
    file.close()

In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

def applyRandomForest(data, label):
    X_train, y_train, X_test, y_test = splitDataset(data, label)
    rfc = RandomForestClassifier(n_estimators=1000, criterion="entropy")
    rfc.fit(X_train, y_train)
    pred_rfc = rfc.predict(X_test)

    return f1_score(y_test, pred_rfc, average='weighted')

In [11]:
def BinaryPredictions(directory):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Do stuff here
        
        print('\033[1m' + 'BINARY CLASSIFICATION \n\n')
        print('\033[0m' + '| Composers | Score | Notes |')
        print('| -----------------------------------| --------------- |----------------------------------------|')
        for file in os.listdir(directory):
            if file.endswith('.csv'):
                complete_name = directory + '/' + file
                data = pd.read_csv(complete_name, sep = ',')
                data['Tonnetz'] = encodingData(data['Tonnetz'])[1]
                label_quality, data['Composer'] = encodingData(data['Composer'])
                composers = label_quality.inverse_transform([0])[0] + ' vs ' + label_quality.inverse_transform([1])[0]
                score = applyRandomForest(data, 'Composer')
                notes = 'All Atributes'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Harmonic Centrality'), 'Composer')
                notes = 'Wihout Harmonic Centrality'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Closeness Centrality'), 'Composer')
                notes = 'Wihout Closeness Centrality'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Kalz Centrality'), 'Composer')
                notes = 'Wihout Kalz Centrality'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Global Clustering'), 'Composer')
                notes = 'Wihout Global Clustering'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Square Clustering'), 'Composer')
                notes = 'Wihout Square Clustering'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applyRandomForest(dropingData(data, 'Tonnetz'), 'Composer')
                notes = 'Wihout Tonnetz'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')


In [12]:
BinaryPredictions('Comparison_Results/binary_classification')

[1mBINARY CLASSIFICATION
[0m| Composers | Score | Notes |
| -----------------------------------| --------------- |----------------------------------------|
|  bach vs beethoven  |  0.9525988885256621  |  All Atributes  | 
|  bach vs beethoven  |  0.9762589337057422  |  Wihout Harmonic Centrality  | 
|  bach vs beethoven  |  0.9525988885256621  |  Wihout Closeness Centrality  | 
|  bach vs beethoven  |  1.0  |  Wihout Kalz Centrality  | 
|  bach vs beethoven  |  1.0  |  Wihout Global Clustering  | 
|  bach vs beethoven  |  1.0  |  Wihout Square Clustering  | 
|  bach vs beethoven  |  1.0  |  Wihout Tonnetz  | 
|  bach vs beethoven  |  0.8333014354066985  |  All Atributes  | 
|  bach vs beethoven  |  0.7597766438817752  |  Wihout Harmonic Centrality  | 
|  bach vs beethoven  |  0.8277055256064689  |  Wihout Closeness Centrality  | 
|  bach vs beethoven  |  0.8295907928388746  |  Wihout Kalz Centrality  | 
|  bach vs beethoven  |  0.7149622926093515  |  Wihout Global Clustering  | 
|  b

|  chopin vs mozart  |  0.7355233002291826  |  Wihout Kalz Centrality  | 
|  chopin vs mozart  |  0.6470588235294118  |  Wihout Global Clustering  | 
|  chopin vs mozart  |  0.5853457172342622  |  Wihout Square Clustering  | 
|  chopin vs mozart  |  0.7058823529411765  |  Wihout Tonnetz  | 
|  chopin vs palestrina  |  0.8328173374613004  |  All Atributes  | 
|  chopin vs palestrina  |  0.9442724458204333  |  Wihout Harmonic Centrality  | 
|  chopin vs palestrina  |  0.9166023166023165  |  Wihout Closeness Centrality  | 
|  chopin vs palestrina  |  0.8054054054054054  |  Wihout Kalz Centrality  | 
|  chopin vs palestrina  |  0.804195804195804  |  Wihout Global Clustering  | 
|  chopin vs palestrina  |  0.8885448916408669  |  Wihout Square Clustering  | 
|  chopin vs palestrina  |  0.8583792289535799  |  Wihout Tonnetz  | 
|  chopin vs schumann  |  0.5897435897435898  |  All Atributes  | 
|  chopin vs schumann  |  0.3288770053475936  |  Wihout Harmonic Centrality  | 
|  chopin vs schuman