## Скачивание файлов, деление на фолды

In [46]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, math
from tqdm.notebook import tqdm

URL = 'https://storage.googleapis.com/oleg-zyablov/misc/VoiceMOS'
!wget -q {URL}/data.csv

data = pd.read_csv('data.csv')
data = data[data.subset.isin(['train', 'val']) & data.file_exists]
#data = data[data.subset.isin(['train']) & data.file_exists] #TODO заменить

n_folds = 5
fold_size = math.ceil(len(data) / n_folds)
cumsum = data.groupby('system').file.count().cumsum()
system_to_fold = {}
for i in range(n_folds):
  systems = cumsum[(cumsum > fold_size*i) & (cumsum <= fold_size*(i+1))].index.tolist()
  for s in systems:
    system_to_fold[s] = i

data['fold'] = None
for i in data.index:
  data.loc[i, 'fold'] = system_to_fold[data.loc[i, 'system']]

folds = data.fold.to_numpy()
is_val = data.subset == 'val'
y = data.score_mean.to_numpy()

def get_split(X, y, fold_idx):
  if fold_idx == 'default':
    train_idx = ~is_val
    val_idx = is_val
  else:
    train_idx = folds != fold_idx
    val_idx = folds == fold_idx
  return (X[train_idx], y[train_idx]), (X[val_idx], y[val_idx])

data.sample(5)

Unnamed: 0,subset,system,utterance,file,file_exists,score_mean,score_std,n_votes,votes1,votes2,votes3,votes4,votes5,fold
5274,train,sysea497,uttd1882a3,sysea497-uttd1882a3.wav,True,1.625,0.744,8,4,3,1,0,0,4
2299,train,sys433f2,utt07fdcca,sys433f2-utt07fdcca.wav,True,3.75,0.7071,8,0,0,3,4,1,1
923,train,sys02a43,uttf9398a4,sys02a43-uttf9398a4.wav,True,2.5,1.069,8,1,4,1,2,0,0
4815,train,syscfed8,uttd6992f9,syscfed8-uttd6992f9.wav,True,1.625,0.9161,8,5,1,2,0,0,3
2325,train,sys433f2,uttce7d860,sys433f2-uttce7d860.wav,True,3.5,1.069,8,0,2,1,4,1,1


In [2]:
data_sources = [
    'wave2vec2.feature_extractor.conv_layers.3',
    'wave2vec2.feature_extractor.conv_layers.5',
    'wave2vec2.feature_extractor.conv_layers.6',
    'wave2vec2.transformer.layers.0',
    'wave2vec2.transformer.layers.1',
    'wave2vec2.transformer.layers.2',
    'wave2vec2.transformer.layers.4',
    'wave2vec2.transformer.layers.6',
    'wave2vec2.transformer.layers.8',
    'wave2vec2.transformer.layers.10',
    'wave2vec2.mix1',
    'wave2vec2.mix2',
    'wave2vec2.mix3',
    'wave2vec2.mix4',
    'emot_wave2vec2.transformer.0',
    'emot_wave2vec2.transformer.1',
    'emot_wave2vec2.transformer.2',
    'emot_wave2vec2.transformer.4',
    'emot_wave2vec2.transformer.7',
    'emot_wave2vec2.transformer.10',
    'emot_wave2vec2.transformer.15',
    'emot_wave2vec2.transformer.19',
    'emot_wave2vec2.transformer.23',
]

data_processing_and_reduction = {
    'none': {'mean', 'mean_std', 'max'},
    'random_projection': {'mean', 'mean_std', 'max'},
    'random_rnn': {'last', 'mean', 'mean_std', 'max'},
    'random_esn': {'last', 'mean', 'mean_std', 'max'},
    'random_biGRU_512': {'mean'},
    'random_biLSTM_512': {'mean'},
    'random_biGRU_try2_512': {'mean'},
    'random_biLSTM_try2_512': {'mean'},
    'random_biLSTM_2048': {'mean'},
}

def get_filename(src_name, proc_name, reduction_name):
  return f'{src_name}__{proc_name}__{reduction_name}.npz'

filenames = []
for source in data_sources:
  for proc, reductions in data_processing_and_reduction.items():
    for reduction in reductions:
      filenames.append(get_filename(source, proc, reduction))

for filename in tqdm(filenames):
  !wget -q {URL}/datasets/{filename}

def load_dataset(source, proc, reduction):
  try:
    filename = get_filename(source, proc, reduction)
    return np.load(filename)['arr_0']
  except:
    return None

all_procs_and_reductions = []
for proc, reductions in data_processing_and_reduction.items():
    for reduction in reductions:
      all_procs_and_reductions.append(f'{proc}__{reduction}')

  0%|          | 0/437 [00:00<?, ?it/s]

In [3]:
import sklearn.linear_model, sklearn.ensemble
from sklearn.metrics import mean_squared_error as mse
from sklearn.preprocessing import StandardScaler

sum_mse = 0
for i in range(5):
  (_, y_train), (_, y_val) = get_split(y, y, i)
  prediction = y_val.mean()
  sum_mse += mse(y_val, [prediction] * len(y_val))
print(sum_mse / 5)

0.890975555542332


In [4]:
from sklearn.svm import SVR
import sklearn.linear_model

results = pd.DataFrame(columns=all_procs_and_reductions, index=data_sources)

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

MIN = 1000

for source in data_sources:
  for proc, reductions in data_processing_and_reduction.items():
    for reduction in reductions:
      X = load_dataset(source, proc, reduction)
      if X is None:
        continue
      X = StandardScaler().fit_transform(X)
      best_mse = 1000
      for alpha in np.logspace(-2, 3, num=10):
        val_mse_sum = 0
        for i in range(5):
          (X_train, y_train), (X_val, y_val) = get_split(X, y, i)
          model = sklearn.linear_model.Ridge(alpha=alpha)
          model.fit(X_train, y_train)
          train_mse = mse(y_train, model.predict(X_train))
          val_mse = mse(y_val, model.predict(X_val))
          val_mse_sum += val_mse
        val_mse_mean = val_mse_sum / 5
        if val_mse_mean < best_mse:
          best_mse = val_mse_mean
      print(source, f'{proc}__{reduction}', best_mse)
      results.loc[source, f'{proc}__{reduction}'] = best_mse
      if MIN > best_mse:
        print('MIN')
        MIN = best_mse

wave2vec2.feature_extractor.conv_layers.3 none__mean_std 0.3686195260160805
MIN
wave2vec2.feature_extractor.conv_layers.3 none__mean 0.3917947712446126
wave2vec2.feature_extractor.conv_layers.3 none__max 0.3980277803001714
wave2vec2.feature_extractor.conv_layers.3 random_projection__mean_std 0.3756558351769928
wave2vec2.feature_extractor.conv_layers.3 random_projection__mean 0.40289978091055323
wave2vec2.feature_extractor.conv_layers.3 random_projection__max 0.37434134858248047
wave2vec2.feature_extractor.conv_layers.3 random_rnn__mean_std 0.37813242297276606
wave2vec2.feature_extractor.conv_layers.3 random_rnn__last 0.7630938138716177
wave2vec2.feature_extractor.conv_layers.3 random_rnn__mean 0.41296456322971664
wave2vec2.feature_extractor.conv_layers.3 random_rnn__max 0.42841476030577397
wave2vec2.feature_extractor.conv_layers.3 random_esn__mean_std 0.3680935510802021
MIN
wave2vec2.feature_extractor.conv_layers.3 random_esn__last 0.7963697944175689
wave2vec2.feature_extractor.conv_la

In [55]:
def run(X):
  X = StandardScaler().fit_transform(X)
  best_mse = 1000
  best_alpha = 0
  for alpha in np.logspace(-3, 4, num=30):
    val_mse_sum = 0
    for i in range(5):
      (X_train, y_train), (X_val, y_val) = get_split(X, y, i)
      model = sklearn.linear_model.Ridge(alpha=alpha)
      model.fit(X_train, y_train)
      train_mse = mse(y_train, model.predict(X_train))
      val_mse = mse(y_val, model.predict(X_val))
      val_mse_sum += val_mse
    val_mse_mean = val_mse_sum / 5
    if val_mse_mean < best_mse:
      best_mse = val_mse_mean
      best_alpha = alpha
  print(best_mse, 'alpha =', best_alpha)

In [56]:
X = load_dataset('wave2vec2.transformer.layers.1', 'none', 'mean')
run(X)

0.21993893158603878 alpha = 1082.636733874054


In [69]:
X1 = load_dataset('wave2vec2.transformer.layers.1', 'none', 'mean')
X2 = load_dataset('emot_wave2vec2.transformer.4', 'none', 'mean')
X = np.concatenate([X1, X2], axis=1)
run(X)

0.21216565444602303 alpha = 1887.3918221350996


In [74]:
from tensorflow import keras

X1 = load_dataset('wave2vec2.transformer.layers.1', 'none', 'mean')
X2 = load_dataset('emot_wave2vec2.transformer.4', 'none', 'mean')
X = np.concatenate([X1, X2], axis=1)

val_mse_sum = 0
for i in range(5):
  (X_train, y_train), (X_val, y_val) = get_split(X, y, i)
  model = keras.Sequential([
      keras.layers.Dense(1000, 'relu'),
      keras.layers.Dropout(0.5),
      keras.layers.Dense(1000, 'relu'),
      keras.layers.Dropout(0.5),
      keras.layers.Dense(1)
  ])
  model.compile(loss='mse', optimizer=keras.optimizers.Adam(1e-4))
  model.fit(X_train, y_train, validation_data=(X_val, y_val), verbose=1, epochs=999, callbacks=[
    keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(patience=10, factor=0.2)
  ])
  train_mse = mse(y_train, model.predict(X_train)[:, 0])
  val_mse = mse(y_val, model.predict(X_val)[:, 0])
  val_mse_sum += val_mse
val_mse_mean = val_mse_sum / 5
print(val_mse_mean)

Epoch 1/999
Epoch 2/999
Epoch 3/999
Epoch 4/999
Epoch 5/999
Epoch 6/999
Epoch 7/999
Epoch 8/999
Epoch 9/999
Epoch 10/999
Epoch 11/999
Epoch 12/999
Epoch 13/999
Epoch 14/999
Epoch 15/999
Epoch 16/999
Epoch 17/999
Epoch 18/999
Epoch 19/999
Epoch 20/999
Epoch 21/999
Epoch 22/999
Epoch 23/999
Epoch 24/999
Epoch 25/999
Epoch 26/999
Epoch 27/999
Epoch 28/999
Epoch 29/999
Epoch 30/999
Epoch 31/999
Epoch 32/999
Epoch 33/999
Epoch 34/999
Epoch 35/999
Epoch 36/999
Epoch 37/999
Epoch 38/999
Epoch 39/999
Epoch 1/999
Epoch 2/999
Epoch 3/999
Epoch 4/999
Epoch 5/999
Epoch 6/999
Epoch 7/999
Epoch 8/999
Epoch 9/999
Epoch 10/999
Epoch 11/999
Epoch 12/999
Epoch 13/999
Epoch 14/999
Epoch 15/999
Epoch 16/999
Epoch 17/999
Epoch 18/999
Epoch 19/999
Epoch 20/999
Epoch 21/999
Epoch 22/999
Epoch 23/999
Epoch 24/999
Epoch 25/999
Epoch 26/999
Epoch 27/999
Epoch 28/999
Epoch 29/999
Epoch 30/999
Epoch 31/999
Epoch 32/999
Epoch 33/999
Epoch 34/999
Epoch 35/999
Epoch 36/999
Epoch 37/999
Epoch 1/999
Epoch 2/999
Epoch 

In [68]:
X1 = load_dataset('wave2vec2.transformer.layers.1', 'none', 'mean')
X2 = load_dataset('emot_wave2vec2.transformer.4', 'none', 'mean')
X3 = load_dataset('wave2vec2.transformer.layers.1', 'random_biGRU_512', 'mean')
X4 = load_dataset('emot_wave2vec2.transformer.4', 'random_biGRU_512', 'mean')
X = np.concatenate([X1, X2, X3, X4], axis=1)
run(X)

0.21385659528655365 alpha = 3290.344562312671


In [59]:
X1 = load_dataset('wave2vec2.transformer.layers.1', 'none', 'mean')
X2 = load_dataset('wave2vec2.transformer.layers.1', 'random_biLSTM_try2_512', 'mean')
X = np.concatenate([X1, X2], axis=1)
run(X)

0.22187901602094465 alpha = 1887.3918221350996


In [62]:
X2 = load_dataset('wave2vec2.transformer.layers.2', 'none', 'mean')
X1 = load_dataset('wave2vec2.transformer.layers.1', 'none', 'mean')
X0 = load_dataset('wave2vec2.transformer.layers.0', 'none', 'mean')
X = np.concatenate([X2 - X1, X1 - X0], axis=1)
run(X)

0.2262944617740413 alpha = 1887.3918221350996


In [64]:
Xa = load_dataset('wave2vec2.transformer.layers.0', 'none', 'mean')
Xb = load_dataset('emot_wave2vec2.transformer.0', 'none', 'mean')
X = np.concatenate([Xa, Xb], axis=1)
run(X)

0.24800489771650028 alpha = 621.0169418915616


In [None]:
from IPython.display import HTML, display

pd.set_option('display.float_format', '{:.3f}'.format)

def format_vertical_headers(df):
    """Display a dataframe with vertical column headers"""
    styles = [dict(selector="th", props=[('width', '40px')]),
              dict(selector="th.col_heading",
                   props=[("writing-mode", "vertical-rl"),
                          ('transform', 'rotateZ(180deg)'), 
                          ('height', '290px'),
                          ('vertical-align', 'top')])]
    return (df.round(3).style.set_table_styles(styles))

format_vertical_headers(results)

Unnamed: 0,none__mean_std,none__max,none__mean,random_projection__mean_std,random_projection__max,random_projection__mean,random_rnn__mean_std,random_rnn__last,random_rnn__max,random_rnn__mean,random_esn__mean_std,random_esn__last,random_esn__max,random_esn__mean,random_biGRU_512__mean,random_biLSTM_512__mean,random_biGRU_try2_512__mean,random_biLSTM_try2_512__mean
wave2vec2.feature_extractor.conv_layers.3,0.372682,0.418258,0.39206,0.427311,0.410893,0.424811,0.382758,0.789094,0.452899,0.41275,0.36991,0.852557,0.491786,0.421207,0.399733,,,
wave2vec2.feature_extractor.conv_layers.5,0.335365,0.422317,0.34908,0.348375,0.362237,0.36485,0.332817,0.753044,0.384331,0.352826,0.337834,0.803694,0.463314,0.394679,,,,
wave2vec2.feature_extractor.conv_layers.6,0.316358,0.414847,0.370762,0.352313,0.329919,0.394742,0.324412,0.740715,0.415992,0.382969,0.343803,0.814144,0.486577,0.376861,,,,
wave2vec2.transformer.layers.0,0.283368,0.355321,0.250647,0.308239,0.285937,0.268514,0.267469,0.541493,0.346459,0.269494,0.27901,0.615287,0.436034,0.265138,0.252984,0.253195,0.25582,0.256999
wave2vec2.transformer.layers.1,0.260379,0.319049,0.228526,0.279327,0.272578,0.257324,0.260067,0.423876,0.33646,0.239265,0.25629,0.517695,0.379067,0.237188,0.23459,0.232838,0.231774,0.234518
wave2vec2.transformer.layers.2,0.290564,0.34061,0.252189,0.310167,0.295284,0.286377,0.261802,0.357895,0.335835,0.249198,0.260031,0.461288,0.365335,0.25333,0.251244,0.252329,0.256078,0.251075
wave2vec2.transformer.layers.4,0.276949,0.382982,0.259631,0.310971,0.309951,0.29909,0.284691,0.396283,0.366651,0.262969,0.289271,0.483063,0.410059,0.258321,0.256986,0.259485,0.262628,0.264758
wave2vec2.transformer.layers.6,0.327596,0.434856,0.303547,0.339847,0.34075,0.346588,0.326283,0.453688,0.424071,0.309083,0.328329,0.554344,0.462166,0.321778,,,,
wave2vec2.transformer.layers.8,0.37045,0.452041,0.338634,0.395469,0.38759,0.378177,0.356378,0.493026,0.433184,0.351219,0.362828,0.550018,0.482102,0.35254,,,,
wave2vec2.transformer.layers.10,0.39409,0.539402,0.389053,0.424378,0.509308,0.392563,0.424059,0.459578,0.488919,0.434785,0.429447,0.552357,0.511875,0.443218,,,,


In [6]:
results.to_csv('1.csv')