In [3]:
import os
import sys
import pickle

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [4]:
module_path = os.path.abspath(os.path.join('../../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [5]:
pwd

'/root/emotional-recognition/notebooks/low_level/supervised_learning/audio'

In [6]:
from global_config import ROOT_DIR, emotion_id_to_emotion_abr, conf_cmap, AUDIO_LLD_COLS
from src.analysis.supervised_learning.evaluation.confusion_matrix import ConfusionMatrixCreator


from src.analysis.data_exploration import plot_time_series_means_subplots

from src.preprocessing.dataset_creation.scaling.low_level_scaling import LowLevelScaler
from src.preprocessing.dataset_creation.helpers import slice_by, get_cols, get_fixed_col, get_padded_time_series_with_numpy
from src.preprocessing.dataset_creation.aggregation import get_aggregate_measures
from src.preprocessing.dataset_creation.interpolation import Interpolator

In [7]:
from s3fs.core import S3FileSystem
from sagemaker import get_execution_role

s3 = S3FileSystem()

role = get_execution_role()
bucket='files-and-examples-01'
file = 'datasets/su_dataset/audio/opensmile_lld_query.csv'

path = 's3://{}/{}'.format(bucket, file)

df = pd.read_csv(path)

In [8]:
df

Unnamed: 0,filename,video_id,intensity_level,emotion_1_id,Loudness_sma3,alphaRatio_sma3,hammarbergIndex_sma3,slope0-500_sma3,slope500-1500_sma3,spectralFlux_sma3,...,logRelF0-H1-A3_sma3nz,F1frequency_sma3nz,F1bandwidth_sma3nz,F1amplitudeLogRelF0_sma3nz,F2frequency_sma3nz,F2bandwidth_sma3nz,F2amplitudeLogRelF0_sma3nz,F3frequency_sma3nz,F3bandwidth_sma3nz,F3amplitudeLogRelF0_sma3nz
0,A223_ang_p_4,A223,4,12,0.023736,-3.451112,9.509703,0.033136,0.014846,0.000383,...,0.0,531.84520,1461.5830,-201.0,1213.4647,946.50100,-201.0,2291.4734,403.39297,-201.0
1,A223_ang_p_4,A223,4,12,0.024516,-1.056521,6.348400,0.057687,0.013466,0.000896,...,0.0,707.58480,1343.1768,-201.0,1511.3168,740.39545,-201.0,2665.4316,520.56330,-201.0
2,A223_ang_p_4,A223,4,12,0.025063,0.777676,3.963587,0.066820,0.013788,0.001307,...,0.0,906.96124,1082.0823,-201.0,1855.5275,583.71020,-201.0,2972.0889,611.52850,-201.0
3,A223_ang_p_4,A223,4,12,0.026145,1.330180,2.396539,0.087751,0.010414,0.001354,...,0.0,842.48580,950.2601,-201.0,1932.9799,517.32745,-201.0,2976.6843,714.99920,-201.0
4,A223_ang_p_4,A223,4,12,0.026809,-0.390972,5.190536,0.059138,0.011255,0.001537,...,0.0,703.61620,924.0280,-201.0,1714.7184,772.69090,-201.0,2589.3264,687.07060,-201.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4282675,A91_sad_p_3,A91,3,6,0.021170,-11.220714,25.441332,-0.048260,0.003177,0.002276,...,0.0,709.29110,1275.2358,-201.0,1424.7256,1390.13370,-201.0,2993.2100,1490.61900,-201.0
4282676,A91_sad_p_3,A91,3,6,0.021272,-13.286717,26.561953,-0.060351,0.000188,0.001969,...,0.0,763.52954,1129.4498,-201.0,1491.4346,1440.57800,-201.0,3113.5222,2543.79470,-201.0
4282677,A91_sad_p_3,A91,3,6,0.020825,-14.045362,28.375557,-0.049394,0.002035,0.001485,...,0.0,473.55710,1257.7916,-201.0,1501.9879,757.52010,-201.0,3114.7117,3698.08470,-201.0
4282678,A91_sad_p_3,A91,3,6,0.019518,-15.512489,28.432364,-0.043027,0.005086,0.001330,...,0.0,1051.26550,1198.6473,-201.0,1815.0340,1243.81460,-201.0,3105.9680,510.81277,-201.0


### Slice

In [9]:
slices = slice_by(df, "filename")

In [10]:
x = get_cols(slices, AUDIO_LLD_COLS)
y = get_fixed_col(slices, "emotion_1_id")
video_ids = get_fixed_col(slices, "video_id")

### Scale the data by video id

In [11]:
scaler = LowLevelScaler(x, "standard")
scaler.scale_by_video_id(video_ids)
x = scaler.slices

### Pad

In [12]:
x = get_padded_time_series_with_numpy(x)

In [13]:
x.shape

(10006, 3094, 25)

In [14]:
y.shape

(10006,)

### Train test split

In [15]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

### Define model

In [16]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10)

In [17]:
seq_len = x.shape[1]
n_cols = x.shape[2]
output_dim = len(np.unique(y))

model = keras.Sequential(
    [
        layers.Masking(mask_value = -1000, input_shape=(seq_len, n_cols)),
        layers.LSTM(units=512,
                   return_sequences=True,
                   input_shape=(seq_len, n_cols)),
        layers.LSTM(units=256,
                   return_sequences=True,),        
        layers.Flatten(),
        layers.Dense(256, activation="sigmoid"),
        layers.Dense(128, activation="sigmoid"),
        layers.Dense(output_dim, activation="sigmoid")
    ]
)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), 
              optimizer=optimizer, 
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking (Masking)            (None, 3094, 25)          0         
_________________________________________________________________
lstm (LSTM)                  (None, 3094, 512)         1101824   
_________________________________________________________________
lstm_1 (LSTM)                (None, 3094, 256)         787456    
_________________________________________________________________
flatten (Flatten)            (None, 792064)            0         
_________________________________________________________________
dense (Dense)                (None, 256)               202768640 
_________________________________________________________________
dense_1 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_2 (Dense)              (None, 44)                5

### Train

In [22]:
model.fit(X_train, y_train, batch_size=500, epochs=200, validation_data=(X_test, y_test))

InternalError: stream did not block host until done; was already in an error state