# Binary Classification (Composer Based)

In [12]:
import csv # create csv
import pandas as pd # Open and manage CSV files
import pickle # Open python objects
import os # Directory Control
import seaborn # easy plots
from itertools import product
import warnings # find and don't show warnings

### Lets load our Data

In [13]:
def loadDicts(directory):
    '''Search for .p pickle objects in subdirectories of a file.'''
    subDir = [x[0] for x in os.walk(directory)]
    graphList = []
    for i, subfolder in enumerate(subDir[1:]):
        graphs = []
        for file in os.listdir(subfolder):
            if file.endswith(".p") :
                complete_name = subfolder + '/' + file
                print('Rendering --> ', file)
                picklegraphs = pickle.load(open(complete_name, "rb"))
                for pair in picklegraphs:
                    graphs.append(pair)
        graphList.append(graphs)
    return graphList


In [14]:
def write2csv(directory, file_name,  data):
    '''Write data to a .csv file.'''
    directory = directory + '/' + file_name + '.csv'
    with open(directory, 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(data)
    csvFile.close()

In [15]:
def createCSVfiles(graphList):
    for i, graphs1 in enumerate(graphList):
        if i!=7:
            for j, graphs2 in enumerate(graphList[i+1:]):
                graphs = graphs1 + graphs2
                values = [['Composer', 'Kalz Centrality 1', 'Kalz Centrality 2', 'Global Clustering 1', 'Global Clustering 2', 'Square Clustering 1', 'Square Clustering 2', 'Harmonic Centrality 1', 'Harmonic Centrality 2', 'Closeness Centrality 1', 'Closeness Centrality 2', 'Tonnetz 1', 'Tonnetz 2']]
                composer = ''
                for graph in graphs:
                    graph1, graph2 = graph
                    values.append([graph1.composer, graph1.kalz_coef, graph2.kalz_coef, graph1.glob_clust_coef, graph2.glob_clust_coef, graph1.square_clustering_coef, graph2.square_clustering_coef, graph1.harmonic_coef, graph2.harmonic_coef, graph1.closeness_coef, graph2.closeness_coef, graph1.trajectory.Tonnetz, graph2.trajectory.Tonnetz])
                write2csv('Comparison_Results/binary_classification', str(i)+str(j+i+1), values)

In [16]:
from sklearn.preprocessing import LabelEncoder

def encodingData(data) :
    label_quality = LabelEncoder()
    data = label_quality.fit_transform(data)
    return label_quality, data

In [17]:
def dropingData(data, label2Drop):
    data = data.drop(columns=label2Drop)
    return data

In [18]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
    
def separateDataset(data, label):
    X = data.drop(label, axis=1)
    y = data[label]
    return X, y

def splitDataset(data, label):
    X, y = separateDataset(data, label)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, y_train, X_test, y_test

In [19]:
def savePrints(string2output):
    complete_directory = 'Comparison_Results/binary_classification/results.txt'
    file = open(complete_directory,"w")
    file.write(string2output)
    file.close()

In [20]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score

def applyRandomForest(data, label):
    X_train, y_train, X_test, y_test = splitDataset(data, label)
    rfc = RandomForestClassifier(n_estimators=1000, criterion="entropy")
    rfc.fit(X_train, y_train)
    pred_rfc = rfc.predict(X_test)

    return "%.2f" % round(f1_score(y_test, pred_rfc, average='weighted'), 2)

def applykNN(data, label):
    X_train, y_train, X_test, y_test = splitDataset(data, label)
    knn = KNeighborsClassifier(n_neighbors=10)
    knn.fit(X_train, y_train)
    pred_knn = knn.predict(X_test)

    return "%.2f" % round(f1_score(y_test, pred_knn, average='weighted'), 2)

In [25]:
def BinaryPredictions(directory):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Do stuff here
        
        print('\033[1m' + 'BINARY CLASSIFICATION \n\n')
        print('\033[0m' + '| Composers | Score | Notes |')
        print('| -----------------------------------| --------------- |----------------------------------------|')
        for file in os.listdir(directory):
            if file.endswith('.csv'):
                complete_name = directory + '/' + file
                data = pd.read_csv(complete_name, sep = ',')
                data['Tonnetz 1'] = encodingData(data['Tonnetz 1'])[1]
                data['Tonnetz 2'] = encodingData(data['Tonnetz 2'])[1]
                label_quality, data['Composer'] = encodingData(data['Composer'])
                composers = label_quality.inverse_transform([0])[0] + ' vs ' + label_quality.inverse_transform([1])[0]
                score = applyRandomForest(data, 'Composer')
                notes = 'RF'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
                score = applykNN(data,'Composer')
                notes = 'kNN'
                print('| ', composers , ' | ', score, ' | ', notes , ' | ')
#                 score = applyRandomForest(dropingData(data, 'Harmonic Centrality'), 'Composer')
#                 notes = 'Wihout Harmonic Centrality'
#                 print('| ', composers , ' | ', score, ' | ', notes , ' | ')
#                 score = applyRandomForest(dropingData(data, 'Closeness Centrality'), 'Composer')
#                 notes = 'Wihout Closeness Centrality'
#                 print('| ', composers , ' | ', score, ' | ', notes , ' | ')
#                 score = applyRandomForest(dropingData(data, 'Kalz Centrality'), 'Composer')
#                 notes = 'Wihout Kalz Centrality'
#                 print('| ', composers , ' | ', score, ' | ', notes , ' | ')
#                 score = applyRandomForest(dropingData(data, 'Global Clustering'), 'Composer')
#                 notes = 'Wihout Global Clustering'
#                 print('| ', composers , ' | ', score, ' | ', notes , ' | ')
#                 score = applyRandomForest(dropingData(data, 'Square Clustering'), 'Composer')
#                 notes = 'Wihout Square Clustering'
#                 print('| ', composers , ' | ', score, ' | ', notes , ' | ')
#                 score = applyRandomForest(dropingData(data, 'Tonnetz'), 'Composer')
#                 notes = 'Wihout Tonnetz'
#                 print('| ', composers , ' | ', score, ' | ', notes , ' | ')


In [26]:
createCSVfiles(loadDicts('Comparison_Results/binary_classification'))

BinaryPredictions('Comparison_Results/binary_classification')

Rendering -->  bach.p
Rendering -->  beethoven.p
Rendering -->  beethovenCorpus.p
Rendering -->  chopin.p
Rendering -->  chopinCorpus.p
Rendering -->  haydn_quartets.p
Rendering -->  jazz.p
Rendering -->  monteverdi.p
Rendering -->  mozart.p
Rendering -->  mozartCorpus.p
Rendering -->  mozart_quartets.p
Rendering -->  palestrina.p
Rendering -->  schumann.p
Rendering -->  schumannCorpus.p
[1mBINARY CLASSIFICATION 


[0m| Composers | Score | Notes |
| -----------------------------------| --------------- |----------------------------------------|
|  bach vs beethoven  |  1.00  |  RF  | 
|  bach vs beethoven  |  0.97  |  kNN  | 
|  bach vs chopin  |  0.97  |  RF  | 
|  bach vs chopin  |  0.94  |  kNN  | 
|  bach vs haydn  |  0.89  |  RF  | 
|  bach vs haydn  |  0.93  |  kNN  | 
|  Unknown vs bach  |  1.00  |  RF  | 
|  Unknown vs bach  |  0.98  |  kNN  | 
|  bach vs monteverdi  |  0.83  |  RF  | 
|  bach vs monteverdi  |  0.69  |  kNN  | 
|  bach vs mozart  |  1.00  |  RF  | 
|  bach vs 