# Task 4: The Challenge

## Prepare Dataset

In [1]:
import numpy as np
import pandas as pd
import torch
import xgboost as xgb
from tqdm import tqdm

from sklearn.model_selection import GroupShuffleSplit, GroupKFold, cross_val_score, GridSearchCV, KFold, RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error, auc, confusion_matrix
from sklearn.multioutput import MultiOutputRegressor

  from .autonotebook import tqdm as notebook_tqdm
  from pandas import MultiIndex, Int64Index


In [2]:
# load datasets

df_train = pd.read_csv("task_3_training_e8da4715deef7d56_f8b7378_generic.csv", header=0)
df_test = pd.read_csv("task_4_test_dd4bd32b08b776e6_daf99ad_generic.csv", header=0)

# keep high level features (GEMS) as separate array for training

hl_train = df_train[df_train.columns[175:]]

# drop columns 175-201, since they contain the high-level features not available in the test set

df_train = df_train.drop(df_train.columns[175:], axis=1)

# drop valence and arousal as specified in the task description as well as pianist, segment and snippet column

df_train = df_train.drop(["arousal", "valence", "pianist_id", "segment_id", "snippet_id"], axis=1)
df_test = df_test.drop(["pianist_id", "segment_id", "snippet_id"], axis=1)

In [3]:
# separate data and labels from training set

X_train = df_train[df_train.columns[:-1]]
y_train = df_train[df_train.columns[-1]]


## Predicting high-level features

In [5]:
# normalize data

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(df_test)

# use non-optimized xgb to predict high level features
# multioutputregressor trains one regressor for each high level feature

xgb_multi = MultiOutputRegressor(xgb.XGBRegressor(objective="reg:squarederror", random_state=42), n_jobs=-1)
xgb_multi.fit(X_train, hl_train)
hl_pred = xgb_multi.predict(X_test)



In [6]:
hl_pred

array([[3.5116117 , 2.4904354 , 1.868669  , ..., 1.0575113 , 0.8540447 ,
        0.46647558],
       [3.946867  , 2.7058012 , 1.9617884 , ..., 1.2638652 , 0.8770598 ,
        0.3638877 ],
       [3.880506  , 2.170359  , 1.9165782 , ..., 0.76343167, 0.89911634,
        0.633608  ],
       ...,
       [4.1904516 , 2.3419273 , 1.9908311 , ..., 1.0640273 , 0.50059384,
        0.68948734],
       [4.1904488 , 2.5996788 , 1.9494083 , ..., 0.69959044, 0.76545435,
        0.7986641 ],
       [3.0667982 , 2.430628  , 2.3061464 , ..., 0.67562795, 0.7791855 ,
        0.6755119 ]], dtype=float32)

In [7]:
# merge high level features together with test set
columns = ["gems_wonder","gems_transcendence","gems_tenderness","gems_nostalgia","gems_peacefulness","gems_power","gems_joyful_activation","gems_tension","gems_sadness","gemmes_movement","gemmes_force","gemmes_interior","gemmes_wandering","gemmes_flow","gems_wonder_binary","gems_transcendence_binary","gems_tenderness_binary","gems_nostalgia_binary","gems_peacefulness_binary","gems_power_binary","gems_joyful_activation_binary","gems_tension_binary","gems_sadness_binary","gemmes_movement_binary","gemmes_force_binary","gemmes_interior_binary","gemmes_wandering_binary","gemmes_flow_binary"]
df_test = pd.concat([df_test, pd.DataFrame(hl_pred, columns=columns)], axis=1)
df_test

Unnamed: 0,essentia_dissonance_mean,essentia_dissonance_stdev,essentia_dynamic_complexity,essentia_loudness,essentia_onset_rate,essentia_pitch_salience_mean,essentia_pitch_salience_stdev,essentia_spectral_centroid_mean,essentia_spectral_centroid_stdev,essentia_spectral_complexity_mean,...,gems_peacefulness_binary,gems_power_binary,gems_joyful_activation_binary,gems_tension_binary,gems_sadness_binary,gemmes_movement_binary,gemmes_force_binary,gemmes_interior_binary,gemmes_wandering_binary,gemmes_flow_binary
0,0.229886,0.062064,4.818434,93184008.0,3.600000,0.614987,0.090779,1365.162964,331.913849,28.465279,...,0.394958,0.274799,1.251832,0.347030,0.251038,0.555602,1.261292,1.057511,0.854045,0.466476
1,0.178258,0.062866,3.003938,109259888.0,4.000000,0.564438,0.094290,1215.545288,271.362946,27.719908,...,0.488988,0.295122,0.818109,0.301137,0.285532,0.725655,1.204014,1.263865,0.877060,0.363888
2,0.209623,0.063750,1.983438,98162960.0,4.600000,0.553583,0.111736,1339.914185,255.108398,28.067129,...,0.563384,0.358347,1.005447,0.224394,0.309679,0.972748,1.267268,0.763432,0.899116,0.633608
3,0.215004,0.061839,1.470698,72971816.0,3.915029,0.642106,0.092053,1143.090576,483.887421,28.758064,...,0.551134,0.346309,0.956883,0.129635,0.164789,0.957400,1.216697,0.680928,0.726769,0.368418
4,0.187017,0.072593,2.613981,80688832.0,2.600000,0.564319,0.104814,1148.319458,238.749054,28.587963,...,0.309295,0.244256,1.284931,0.371046,0.728385,0.922198,1.252617,1.151870,1.007555,0.478020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2080,0.239235,0.048011,1.178853,50233408.0,2.516549,0.640804,0.073967,1298.177979,268.525360,27.625000,...,0.575435,0.534717,0.668637,0.478304,0.995089,0.731338,0.755807,1.167662,0.524068,0.884944
2081,0.293661,0.060317,2.598517,178390704.0,2.400000,0.676194,0.080788,1602.075806,396.212982,29.527779,...,0.493605,0.625307,0.536833,0.333401,0.324574,0.697986,1.254674,0.677316,1.129276,0.372929
2082,0.272440,0.058219,1.851364,178757696.0,4.600000,0.681900,0.081200,1246.282837,252.229889,30.159721,...,0.623784,0.462297,0.510216,0.232884,0.436829,0.828144,1.271864,1.064027,0.500594,0.689487
2083,0.273417,0.060010,1.930694,285332608.0,2.000000,0.708230,0.066477,1536.803711,552.418701,29.668982,...,0.399971,0.557099,0.562442,0.284007,0.288621,0.581023,1.207719,0.699590,0.765454,0.798664
