# static

In [1]:
TRAIN_PATH = '/content/drive/MyDrive/student_cup_2021/dataset/train.csv'
TEST_PATH = '/content/drive/MyDrive/student_cup_2021/dataset/test.csv'
GENRE_PATH = '/content/drive/MyDrive/student_cup_2021/dataset/genre_labels.csv'

MODEL_PATH = 'model.h5'

# function

## creating model

In [2]:
import pandas as pd

df_train = pd.read_csv(TRAIN_PATH)
df_genre = pd.read_csv(GENRE_PATH)

df_train['genre'].value_counts()

10    1337
8     1305
3      362
7      334
1      205
2      191
5      126
9       59
6       50
4       45
0       32
Name: genre, dtype: int64

## 

In [3]:
df_genre

Unnamed: 0,genre,labels
0,country,0
1,electronic,1
2,folk,2
3,hip-hop,3
4,jazz,4
5,latin,5
6,classic,6
7,other-light-music,7
8,pop,8
9,religious,9


# Loading data

In [4]:
import pandas as pd

df_train = pd.read_csv(TRAIN_PATH)
df_test = pd.read_csv(TEST_PATH)

df = pd.concat([df_train.drop('genre', axis=1), df_test], axis=0)

print('df_train shape: {0}, df_test shape: {1}, df shape: {2}'.format(df_train.shape, df_test.shape, df.shape))

df_train shape: (4046, 14), df_test shape: (4046, 13), df shape: (8092, 13)


# preprocessing

## missing data

In [5]:
col_list = ['acousticness', 'positiveness', 'danceability', 'energy', 'liveness', 'speechiness', 'instrumentalness']
for col in col_list:
  df_train[col+'_nan'] = df_train[col].isna().astype(int)

df_train = df_train.fillna(df_train.mean())

print('df_train shape: {0}'.format(df_train.shape))
df_train.head()

df_train shape: (4046, 21)


Unnamed: 0,index,genre,popularity,duration_ms,acousticness,positiveness,danceability,loudness,energy,liveness,speechiness,instrumentalness,tempo,region,acousticness_nan,positiveness_nan,danceability_nan,energy_nan,liveness_nan,speechiness_nan,instrumentalness_nan
0,0,10,11,201094,0.112811,0.157247,0.187841,-1.884852,0.893918,0.363568,0.390108,0.888884,121-152,region_H,0,0,0,0,0,0,0
1,1,8,69,308493,0.101333,0.346563,0.554444,-5.546495,0.874409,0.193892,0.161497,0.12391,153-176,region_I,0,0,0,0,0,0,0
2,2,3,43,197225,0.49642,0.265391,0.457642,-9.25567,0.439933,0.217146,0.369057,0.16647,64-76,region_E,0,0,0,0,0,0,0
3,3,10,45,301092,0.165667,0.245533,0.356578,-5.088788,0.868704,0.377025,0.226677,0.175399,177-192,region_C,0,0,0,0,0,0,0
4,4,3,57,277348,0.19072,0.777578,0.830479,-3.933896,0.650149,0.169323,0.222488,0.22603,97-120,unknown,0,0,0,0,0,0,0


## duration_ms

In [6]:
df_train['duration_m'] = df_train['duration_ms'] / 60000
df_train['duration_m'] = df_train['duration_m'].astype(int)

df_train = df_train.drop(['duration_ms'], axis=1)

In [7]:
df_train.head()

Unnamed: 0,index,genre,popularity,acousticness,positiveness,danceability,loudness,energy,liveness,speechiness,instrumentalness,tempo,region,acousticness_nan,positiveness_nan,danceability_nan,energy_nan,liveness_nan,speechiness_nan,instrumentalness_nan,duration_m
0,0,10,11,0.112811,0.157247,0.187841,-1.884852,0.893918,0.363568,0.390108,0.888884,121-152,region_H,0,0,0,0,0,0,0,3
1,1,8,69,0.101333,0.346563,0.554444,-5.546495,0.874409,0.193892,0.161497,0.12391,153-176,region_I,0,0,0,0,0,0,0,5
2,2,3,43,0.49642,0.265391,0.457642,-9.25567,0.439933,0.217146,0.369057,0.16647,64-76,region_E,0,0,0,0,0,0,0,3
3,3,10,45,0.165667,0.245533,0.356578,-5.088788,0.868704,0.377025,0.226677,0.175399,177-192,region_C,0,0,0,0,0,0,0,5
4,4,3,57,0.19072,0.777578,0.830479,-3.933896,0.650149,0.169323,0.222488,0.22603,97-120,unknown,0,0,0,0,0,0,0,4


## sensation

In [8]:
col_list = ['acousticness', 'positiveness', 'danceability', 'energy', 'liveness', 'speechiness', 'instrumentalness']
for col in col_list:
  df_train[col] = df_train[col]*100
  df_train[col] = df_train[col].astype(int)
  df_train[col] = (df_train[col] - df_train[col].min()) / (df_train[col].max() - df_train[col].min())

In [9]:
# df_train.head()

## tempo

In [10]:
df_train['tempo_max'] = df_train['tempo'].str.split('-').str.get(1).astype(int)
df_train = pd.concat([df_train, pd.get_dummies(df_train['tempo'])], axis=1)

df_train = df_train.drop(['tempo'], axis=1)

In [11]:
df_train.head()

Unnamed: 0,index,genre,popularity,acousticness,positiveness,danceability,loudness,energy,liveness,speechiness,instrumentalness,region,acousticness_nan,positiveness_nan,danceability_nan,energy_nan,liveness_nan,speechiness_nan,instrumentalness_nan,duration_m,tempo_max,0-40,121-152,153-176,177-192,193-208,209-220,41-50,51-56,57-63,64-76,77-96,97-120
0,0,10,11,0.11,0.153061,0.171717,-1.884852,0.89,0.36,0.443182,0.88,region_H,0,0,0,0,0,0,0,3,152,0,1,0,0,0,0,0,0,0,0,0,0
1,1,8,69,0.1,0.346939,0.545455,-5.546495,0.87,0.19,0.181818,0.12,region_I,0,0,0,0,0,0,0,5,176,0,0,1,0,0,0,0,0,0,0,0,0
2,2,3,43,0.49,0.265306,0.444444,-9.25567,0.43,0.21,0.409091,0.16,region_E,0,0,0,0,0,0,0,3,76,0,0,0,0,0,0,0,0,0,1,0,0
3,3,10,45,0.16,0.244898,0.343434,-5.088788,0.86,0.37,0.25,0.17,region_C,0,0,0,0,0,0,0,5,192,0,0,0,1,0,0,0,0,0,0,0,0
4,4,3,57,0.19,0.785714,0.828283,-3.933896,0.65,0.16,0.25,0.22,unknown,0,0,0,0,0,0,0,4,120,0,0,0,0,0,0,0,0,0,0,0,1


## index

In [12]:
df_train = df_train.drop(['index'], axis=1)

In [13]:
df_train.head()

Unnamed: 0,genre,popularity,acousticness,positiveness,danceability,loudness,energy,liveness,speechiness,instrumentalness,region,acousticness_nan,positiveness_nan,danceability_nan,energy_nan,liveness_nan,speechiness_nan,instrumentalness_nan,duration_m,tempo_max,0-40,121-152,153-176,177-192,193-208,209-220,41-50,51-56,57-63,64-76,77-96,97-120
0,10,11,0.11,0.153061,0.171717,-1.884852,0.89,0.36,0.443182,0.88,region_H,0,0,0,0,0,0,0,3,152,0,1,0,0,0,0,0,0,0,0,0,0
1,8,69,0.1,0.346939,0.545455,-5.546495,0.87,0.19,0.181818,0.12,region_I,0,0,0,0,0,0,0,5,176,0,0,1,0,0,0,0,0,0,0,0,0
2,3,43,0.49,0.265306,0.444444,-9.25567,0.43,0.21,0.409091,0.16,region_E,0,0,0,0,0,0,0,3,76,0,0,0,0,0,0,0,0,0,1,0,0
3,10,45,0.16,0.244898,0.343434,-5.088788,0.86,0.37,0.25,0.17,region_C,0,0,0,0,0,0,0,5,192,0,0,0,1,0,0,0,0,0,0,0,0
4,3,57,0.19,0.785714,0.828283,-3.933896,0.65,0.16,0.25,0.22,unknown,0,0,0,0,0,0,0,4,120,0,0,0,0,0,0,0,0,0,0,0,1


## region

In [14]:
df_train = pd.concat([df_train, pd.get_dummies(df_train['region'])], axis=1)
df_train = df_train.drop(['region'], axis=1)

In [15]:
df_train.head()

Unnamed: 0,genre,popularity,acousticness,positiveness,danceability,loudness,energy,liveness,speechiness,instrumentalness,acousticness_nan,positiveness_nan,danceability_nan,energy_nan,liveness_nan,speechiness_nan,instrumentalness_nan,duration_m,tempo_max,0-40,121-152,153-176,177-192,193-208,209-220,41-50,51-56,57-63,64-76,77-96,97-120,region_A,region_B,region_C,region_D,region_E,region_F,region_G,region_H,region_I,region_J,region_K,region_L,region_M,region_N,region_O,region_P,region_Q,region_R,region_S,region_T,unknown
0,10,11,0.11,0.153061,0.171717,-1.884852,0.89,0.36,0.443182,0.88,0,0,0,0,0,0,0,3,152,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,8,69,0.1,0.346939,0.545455,-5.546495,0.87,0.19,0.181818,0.12,0,0,0,0,0,0,0,5,176,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,3,43,0.49,0.265306,0.444444,-9.25567,0.43,0.21,0.409091,0.16,0,0,0,0,0,0,0,3,76,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,10,45,0.16,0.244898,0.343434,-5.088788,0.86,0.37,0.25,0.17,0,0,0,0,0,0,0,5,192,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,3,57,0.19,0.785714,0.828283,-3.933896,0.65,0.16,0.25,0.22,0,0,0,0,0,0,0,4,120,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


## loudness

In [16]:
import numpy as np

df_train['loudness'] = 10**df_train['loudness'] # [-inf ~ 0] -> [0 ~ 1]
# df_train['loudness'] = 100**df_train['loudness'].astype(int)
# print(df_train['loudness'].describe())
# df_train['loudness'] = df_train['loudness'].astype(int)
df_train.head()

Unnamed: 0,genre,popularity,acousticness,positiveness,danceability,loudness,energy,liveness,speechiness,instrumentalness,acousticness_nan,positiveness_nan,danceability_nan,energy_nan,liveness_nan,speechiness_nan,instrumentalness_nan,duration_m,tempo_max,0-40,121-152,153-176,177-192,193-208,209-220,41-50,51-56,57-63,64-76,77-96,97-120,region_A,region_B,region_C,region_D,region_E,region_F,region_G,region_H,region_I,region_J,region_K,region_L,region_M,region_N,region_O,region_P,region_Q,region_R,region_S,region_T,unknown
0,10,11,0.11,0.153061,0.171717,0.01303611,0.89,0.36,0.443182,0.88,0,0,0,0,0,0,0,3,152,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,8,69,0.1,0.346939,0.545455,2.841221e-06,0.87,0.19,0.181818,0.12,0,0,0,0,0,0,0,5,176,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,3,43,0.49,0.265306,0.444444,5.550475e-10,0.43,0.21,0.409091,0.16,0,0,0,0,0,0,0,3,76,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,10,45,0.16,0.244898,0.343434,8.151028e-06,0.86,0.37,0.25,0.17,0,0,0,0,0,0,0,5,192,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,3,57,0.19,0.785714,0.828283,0.0001164406,0.65,0.16,0.25,0.22,0,0,0,0,0,0,0,4,120,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


## standardization

In [17]:
col_list = ['popularity', 'tempo_max', 'duration_m']

for col in col_list:
  df_train[col] = (df_train[col] - df_train[col].min()) / (df_train[col].max() - df_train[col].min())

In [18]:
df_train.head()

Unnamed: 0,genre,popularity,acousticness,positiveness,danceability,loudness,energy,liveness,speechiness,instrumentalness,acousticness_nan,positiveness_nan,danceability_nan,energy_nan,liveness_nan,speechiness_nan,instrumentalness_nan,duration_m,tempo_max,0-40,121-152,153-176,177-192,193-208,209-220,41-50,51-56,57-63,64-76,77-96,97-120,region_A,region_B,region_C,region_D,region_E,region_F,region_G,region_H,region_I,region_J,region_K,region_L,region_M,region_N,region_O,region_P,region_Q,region_R,region_S,region_T,unknown
0,10,0.134146,0.11,0.153061,0.171717,0.01303611,0.89,0.36,0.443182,0.88,0,0,0,0,0,0,0,0.085714,0.622222,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,8,0.841463,0.1,0.346939,0.545455,2.841221e-06,0.87,0.19,0.181818,0.12,0,0,0,0,0,0,0,0.142857,0.755556,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,3,0.52439,0.49,0.265306,0.444444,5.550475e-10,0.43,0.21,0.409091,0.16,0,0,0,0,0,0,0,0.085714,0.2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,10,0.54878,0.16,0.244898,0.343434,8.151028e-06,0.86,0.37,0.25,0.17,0,0,0,0,0,0,0,0.142857,0.844444,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,3,0.695122,0.19,0.785714,0.828283,0.0001164406,0.65,0.16,0.25,0.22,0,0,0,0,0,0,0,0.114286,0.444444,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [19]:
df_train.describe()

Unnamed: 0,genre,popularity,acousticness,positiveness,danceability,loudness,energy,liveness,speechiness,instrumentalness,acousticness_nan,positiveness_nan,danceability_nan,energy_nan,liveness_nan,speechiness_nan,instrumentalness_nan,duration_m,tempo_max,0-40,121-152,153-176,177-192,193-208,209-220,41-50,51-56,57-63,64-76,77-96,97-120,region_A,region_B,region_C,region_D,region_E,region_F,region_G,region_H,region_I,region_J,region_K,region_L,region_M,region_N,region_O,region_P,region_Q,region_R,region_S,region_T,unknown
count,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0,4046.0
mean,7.28176,0.500684,0.341461,0.468492,0.494313,0.01066911,0.598683,0.260989,0.220102,0.209365,0.0,0.002472,0.001977,0.0,0.000741,0.001977,0.000247,0.101031,0.525179,0.002224,0.349728,0.103312,0.031389,0.012852,0.000989,0.000247,0.001236,0.003213,0.023233,0.185615,0.285961,0.003213,0.080326,0.020761,0.049184,0.175482,0.033613,0.014829,0.044736,0.185863,0.006179,0.056846,0.027929,0.000741,0.010875,0.034108,0.086258,0.008898,0.012358,0.014335,0.042017,0.091448
std,2.887542,0.197143,0.241043,0.229443,0.159826,0.08799292,0.201048,0.155743,0.094902,0.154267,0.0,0.04966,0.044428,0.0,0.027223,0.044428,0.015721,0.041467,0.169069,0.047117,0.476943,0.304403,0.174388,0.112651,0.031431,0.015721,0.035136,0.0566,0.150661,0.388844,0.451927,0.0566,0.271831,0.142602,0.21628,0.380426,0.180254,0.120885,0.206748,0.389044,0.078373,0.231577,0.164789,0.027223,0.103727,0.181528,0.280779,0.093919,0.110491,0.118883,0.200652,0.288281
min,0.0,0.0,0.0,0.0,0.0,1.5119689999999998e-38,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,7.0,0.378049,0.14,0.27551,0.383838,1.677515e-10,0.46,0.16,0.159091,0.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.444444,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,8.0,0.512195,0.25,0.459184,0.505051,6.464577e-08,0.63,0.21,0.204545,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.444444,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,10.0,0.634146,0.52,0.653061,0.606061,1.328778e-05,0.76,0.31,0.25,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.114286,0.622222,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,10.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [20]:
df_train.shape

(4046, 52)

# nn

In [21]:
from keras.utils import to_categorical

X_train = df_train.drop(['genre'], axis=1).values
Y_train = df_train['genre'].values
cY_train = to_categorical(df_train['genre'])

In [22]:
!pip install tensorflow_addons
import tensorflow_addons as tfa
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, LeakyReLU
from keras.optimizers import Adam

def create_model():

  model = Sequential()

  model.add(Dense(100, activation='relu', input_shape=(X_train.shape[1],)))
  model.add(Dense(50, activation='relu'))
  model.add(Dense(cY_train.shape[1], activation='softmax'))
 
  model.compile(optimizer=Adam(lr=1e-5),
                loss = tfa.losses.SigmoidFocalCrossEntropy(),
                metrics=['accuracy'],)
  
  return model



In [23]:
# from sklearn.utils import class_weight

# model = create_model()
# history = model.fit(X_train, cY_train, 
#                     epochs=500, 
#                     batch_size=4,
#                     verbose=1,
#                     class_weight=d_class_weights,
#                     validation_split=0.2)

In [24]:
# import matplotlib.pyplot as plt

# acc = history.history['accuracy']
# val_acc = history.history['val_accuracy']
# loss = history.history['loss']
# val_loss = history.history['val_loss']

# epochs = range(1, len(acc) + 1)

# plt.plot(epochs, acc, 'b', label='Training acc')
# plt.plot(epochs, val_acc, 'b', color='orange', label='Validation acc')
# plt.title('Training and validation accuracy')
# plt.legend()

# plt.show()

# plt.plot(epochs, loss, 'b', label='Training loss')
# plt.plot(epochs, val_loss, 'b', color='orange', label='Validation loss')
# plt.title('Training and validation loss')
# plt.legend()

# plt.show()

In [25]:
from sklearn.model_selection import StratifiedKFold
import numpy as np
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from sklearn.metrics import accuracy_score, f1_score

def cross_val_score_for_keras(X_train, cY_train, epochs, batch_size, n_splits=10):

  skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=2021)

  acc = []
  f1_macro = []

  for train_idx, val_idx in skf.split(X_train, cY_train.argmax(axis=1)):
    # creating dataset #
    train_data = X_train[train_idx]
    train_labels = cY_train[train_idx] 
    val_data = X_train[val_idx],
    val_labels = cY_train[val_idx]
    # creating model, save & load the best model #
    model = create_model()
    callbacks_list = [ModelCheckpoint(filepath=MODEL_PATH, monitor='val_loss', save_best_only=True, mode='min'),]
    history = model.fit(train_data, train_labels, 
                        epochs=epochs, 
                        batch_size=batch_size, 
                        verbose=0,
                        callbacks=callbacks_list,
                        validation_data=(val_data, val_labels))
    model = load_model(MODEL_PATH)
    # calculate the metrics #
    acc_score = accuracy_score(np.argmax(val_labels, axis=1), np.argmax(model.predict(val_data), axis=1))
    acc.append(acc_score)
    score = f1_score(np.argmax(val_labels, axis=1), np.argmax(model.predict(val_data), axis=1), average='macro')
    f1_macro.append(score)

    print('accuracy: {0:.3f}, f1 macro: {1:.3f}'.format(acc_score, score))

  return acc, f1_macro

# cv

|missing data|duration_ms|sensation|tempo|loudness|standardization|nn|epochs|ModelCheckpoint|accuracy|f1 score|
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
|mean|creating minuites col|-|only max|[0-1]|min-max, [popularity, duration_m,tempo_max]|3|500|val_loss|0.617|0.508|
|||||||5|||0.637|0.498|
|||||||3||val_accuracy|0.635|0.511|
|||[0-1] -> [0-100] -> [0-1]|||||||0.632|0.513|
||||||||1000||0.644|0.544|
|one-hot + mean|||one-hot encoding|||52-100-50-11|500||0.596|0.472|

In [26]:
from time import time

EPOCHS = 500
BATCH_SIZE = 4

start_time = time()
acc, f1_macro = cross_val_score_for_keras(X_train, cY_train, EPOCHS, BATCH_SIZE, 10)
elapsed_time = time() - start_time

print('Elapsed time: {0:.3f} m'.format(elapsed_time / 60))
print('accuracy: {0:.3f}, f1 macro: {1:.3f} with (CV=10)'.format(np.mean(acc), np.mean(f1_macro)))

accuracy: 0.605, f1 macro: 0.517
accuracy: 0.585, f1 macro: 0.425
accuracy: 0.583, f1 macro: 0.471
accuracy: 0.598, f1 macro: 0.498
accuracy: 0.590, f1 macro: 0.436
accuracy: 0.598, f1 macro: 0.486
accuracy: 0.579, f1 macro: 0.425
accuracy: 0.599, f1 macro: 0.445
accuracy: 0.614, f1 macro: 0.505
accuracy: 0.611, f1 macro: 0.512
Elapsed time: 96.693 m
accuracy: 0.596, f1 macro: 0.472 with (CV=10)
