# Contains Neural Network and IID Cross-Validation

In [1]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

In [2]:
#reading dataframe
df = pd.read_csv("dataset/phase_3_TRAIN_7d499bff69ca69b6_6372c3e_MLPC2021_generic.csv")

In [3]:
# drop target value, student annotations and string ID from input features:
X = df.drop(columns=['quadrant','mean_A','mean_V','id','score_mode','score_key_strength'])

# we want to predict the quadrant:
y = df['quadrant'].values

In [4]:
#splitting by pianist and piecw!

#create tags_dataframe:
X_tags=pd.DataFrame()
X_tags['id']=df['id']


#extract piece_id and pianist to later allow by piece/pianist/both cross validation
def extractPianist(x):
    return x[0:2]
def extract_piece_id(x):
    return x[3:5]
def extract_snippet_number(x):
    return x[6:9]

X_tags['Pianist']=X_tags['id'].apply(extractPianist)
X_tags['Piece_id']=X_tags['id'].apply(extract_piece_id)
X_tags['Snippet_number']=X_tags['id'].apply(extract_snippet_number)

X_tags.head()

Unnamed: 0,id,Pianist,Piece_id,Snippet_number
0,GG-01-000,GG,1,0
1,GG-01-001,GG,1,1
2,GG-01-002,GG,1,2
3,GG-01-003,GG,1,3
4,GG-01-004,GG,1,4


In [5]:
#get list of pianists and pieces!
pianist_list=list(set(X_tags['Pianist']))
piece_list=list(set(X_tags['Piece_id']))
pianist_list[0]

'SR'

In [6]:
#by pianist cross validation
score=0
for pianist in pianist_list:
    mask1=X_tags['Pianist']!=pianist
    mask2=X_tags['Pianist']==pianist
    X_train, y_train=X[mask1],y[mask1]
    X_test, y_test=X[mask2],y[mask2]
    #print(X_train)
    clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)
    score+=clf.score(X_test, y_test)
    #print(clf.score(X_test, y_test))

In [7]:
pianist_cross_validation_score=score/len(pianist_list)
print(pianist_cross_validation_score)

0.5113969192460408


In [8]:
#by piece cross validation
#split dataset into train and test data
score=0
for piece in piece_list:
    mask1=X_tags['Piece_id']!=piece
    mask2=X_tags['Piece_id']==piece
    #print(len(mask1))
    X_train, y_train=X[mask1],y[mask1]
    X_test, y_test=X[mask2],y[mask2]
    clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)
    score+=clf.score(X_test, y_test)
    #print(clf.score(X_test, y_test))

In [9]:
piece_cross_validation_score=score/len(piece_list)
print(piece_cross_validation_score)

0.40084637497575004


In [10]:
#by piece and pianist cross validation
#split dataset into train and test data
score=0
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
c=0
for i, piece in enumerate(piece_list):
    for j, pianist in enumerate(pianist_list):
        c+=1
        mask2 = ((X_tags['Piece_id'] ==piece) & (X_tags['Pianist']==pianist))
        mask1 = ((X_tags['Piece_id'] !=piece) & (X_tags['Pianist']!=pianist)
                         | (X_tags['Piece_id'] !=piece) & (X_tags['Pianist']==pianist)
                         | (X_tags['Piece_id'] ==piece) & (X_tags['Pianist']!=pianist))
        X_train, y_train=X[mask1],y[mask1]
        X_test, y_test=X[mask2],y[mask2]
        #print(X_train)
        clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)
        score+=clf.score(X_test, y_test)
        if c%10==0:
            print(f'percent done: {(c)/(len(pianist_list)*len(piece_list))*100}')
    


2
percent done: 0.9259259259259258
4
percent done: 1.8518518518518516
6
percent done: 2.7777777777777777
8
percent done: 3.7037037037037033
10
percent done: 4.62962962962963
12
percent done: 5.555555555555555
14
percent done: 6.481481481481481
16
percent done: 7.4074074074074066
18
percent done: 8.333333333333332
20
percent done: 9.25925925925926
22
percent done: 10.185185185185185
24
percent done: 11.11111111111111
26
percent done: 12.037037037037036
28
percent done: 12.962962962962962
30
percent done: 13.88888888888889
32
percent done: 14.814814814814813
34
percent done: 15.74074074074074
36
percent done: 16.666666666666664
38
percent done: 17.59259259259259
40
percent done: 18.51851851851852
42
percent done: 19.444444444444446
44
percent done: 20.37037037037037
46
percent done: 21.296296296296298
48
percent done: 22.22222222222222
50
percent done: 23.14814814814815
52
percent done: 24.074074074074073
54
percent done: 25.0
56
percent done: 25.925925925925924
58
percent done: 26.85185

In [11]:
both_cross_validation_score=score/(len(pianist_list)*len(piece_list))
print(both_cross_validation_score)

0.40544534531714654
