In [18]:
import numpy as np
from numpy import load
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

from matrix import make_confusion_matrix, subplot_confusion_matrix

from prl_utils import (
    Mode,
    read_hdf5,
    get_features,
    get_labels,
    normalize_train_labels,
    normalize_val_labels,
)

In [19]:
N_TRAIN_AGENT = 30000
N_VAL_AGENT = 3000
NUM_TRIAL = 2000

In [21]:
prefix = f'data/vara_{N_TRAIN_AGENT}agent_{NUM_TRIAL}t_2ParamRL_intractable'
# load dict of arrays
train_features = load(f'{prefix}_features.npz')['arr_0']
train_labels = load(f'{prefix}_labels.npz')['arr_0']
meta_labels = load(f'{prefix}_pest_labels.npz', allow_pickle=True)['arr_0'].tolist()
normalized_train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=2)

prefix = f'data/vara_{N_VAL_AGENT}agent_{NUM_TRIAL}t_2ParamRL_intractable'
val_features = load(f'{prefix}_features_val.npz')['arr_0']
val_labels = load(f'{prefix}_labels_val.npz')['arr_0']
val_meta_labels = load(f'{prefix}_pest_labels_val.npz', allow_pickle=True)['arr_0'].tolist()
normalized_val_labels = tf.keras.utils.to_categorical(val_labels, num_classes=2)

In [22]:
meta_labels, name_to_scaler = normalize_train_labels(meta_labels)
normalized_meta_labels = meta_labels.reshape(meta_labels.shape[0], meta_labels.shape[1] , 1)
normalized_meta_labels = np.swapaxes(np.tile(normalized_meta_labels, NUM_TRIAL), 1, 2)

val_meta_labels = normalize_val_labels(val_meta_labels, name_to_scaler)
normalized_val_meta_labels = val_meta_labels.reshape(val_meta_labels.shape[0], val_meta_labels.shape[1] , 1)
normalized_val_meta_labels = np.swapaxes(np.tile(normalized_val_meta_labels, NUM_TRIAL), 1, 2)

print(normalized_meta_labels.shape, normalized_val_meta_labels.shape)

(30000, 2000, 3) (3000, 2000, 3)


In [28]:
seq_outputs, _, _ = Bidirectional(GRU(23, return_state=True, return_sequences=True))(val_features)
seq_outputs.shape

TensorShape([3000, 2000, 46])

In [29]:
seq_outputs

<tf.Tensor: shape=(3000, 2000, 46), dtype=float32, numpy=
array([[[ 0.01905048,  0.09448703, -0.07114747, ..., -0.18389554,
          0.04767518,  0.2348642 ],
        [-0.16420776,  0.10160793, -0.16848373, ..., -0.19837473,
          0.1066085 ,  0.22861978],
        [-0.04389452,  0.15458778, -0.14774536, ..., -0.22222021,
          0.06771502,  0.26738757],
        ...,
        [-0.02933448,  0.08584926, -0.11511254, ..., -0.10244296,
          0.0132771 ,  0.1307762 ],
        [-0.10104173, -0.04918401, -0.07103021, ...,  0.01587854,
         -0.05734932,  0.05737255],
        [-0.12650415, -0.09756619, -0.04824181, ...,  0.02272945,
         -0.04428459,  0.05094029]],

       [[-0.08851406, -0.06745483, -0.02241098, ..., -0.06085907,
         -0.02401285,  0.10508896],
        [-0.21293856,  0.04585677, -0.14737621, ..., -0.1518887 ,
          0.07345036,  0.15780786],
        [-0.16807364, -0.02699324, -0.10685254, ..., -0.06884582,
         -0.04750039,  0.18017414],
        .

In [None]:
from sklearn.utils import shuffle
train_features, normalized_train_labels = val_features, normalized_val_labels
train_features, normalized_train_labels, meta_labels = shuffle(train_features, normalized_train_labels, meta_labels, random_state=22)

In [23]:
from tensorflow.keras import layers
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.layers import (
    Dense,
    Dropout,
    LSTM,
    Bidirectional,
    GRU,
)
from tensorflow import keras
from tensorflow.keras.optimizers import Adam, SGD

tf.keras.utils.set_random_seed(33)
tf.config.experimental.enable_op_determinism()

OUPUT_DIM = normalized_train_labels.shape[-1]

def create_gru_model(
    seq_input_dim: int,
    param_feature_input_dim: int,
    units: int = 70,
    dropout: float = 0.2,
    dropout1: float = 0.2,
    dropout2: float = 0.1,
    learning_rate: float = 3e-4,
):
    intput_b1 = layers.Input(shape=(None, seq_input_dim))
    intput_b2 = layers.Input(shape=(None, param_feature_input_dim))

    encoder = GRU(units, return_state=True, return_sequences=True) #Bidirectional
    encoder_outputs, forward_h = encoder(intput_b1)
    param_outputs = Dense(units, activation="relu")(intput_b2)
    encoder_outputs = tf.keras.layers.Concatenate(axis=2)(
        [encoder_outputs, param_outputs]
    )
    #rnn_outputs = Dropout(dropout)(rnn_outputs)

    # Dense layers
    outputs = Dense(int(units/2), activation="relu")(encoder_outputs)
    #outputs = Dropout(dropout1)(outputs)
    outputs = Dense(int(units / 4), activation="relu")(outputs)
    #outputs = Dropout(dropout2)(outputs)
    outputs = Dense(OUPUT_DIM, activation="softmax", name="trnn")(outputs)

    model = keras.Model(
        inputs=[intput_b1, intput_b2], outputs=outputs
    )
    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate)
    model.compile(loss="categorical_crossentropy", optimizer=optimizer)
    return model

In [None]:
batch_size = 128
units = 64 #
dropout = 0
dropout1 = 0
dropout2 = 0
learning_rate = 3e-3

normalized_meta_labels = np.swapaxes(np.tile(meta_labels.reshape(3000, 3 , 1), train_features.shape[1]), 1, 2)

identifier = f'vara_B{batch_size}_U{units}_D{dropout}_D{dropout1}_D{dropout2}_{learning_rate}'
print(normalized_meta_labels.shape, identifier)

best_model = create_gru_model(
    seq_input_dim=train_features.shape[2],
    param_feature_input_dim=normalized_meta_labels.shape[2],
    units=units,
    dropout=dropout,
    dropout1=dropout1,
    dropout2=dropout2,
    learning_rate=learning_rate)

In [None]:
callbacks = [EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)]
history = best_model.fit(
          x=[train_features, normalized_meta_labels], #tf.keras.layers.Concatenate(axis=2)([train_features, normalized_meta_labels]), 
          y=normalized_train_labels,
          batch_size=batch_size,
          epochs=200,
          verbose=2,
          callbacks=callbacks,
          validation_split=0.2,
          #validation_data = (val_features, normalized_val_labels),
          )

In [None]:
result = pd.DataFrame(history.history)
ax = sns.lineplot(result)
ax.set_xlabel('epochs')
ax.set_ylabel(f'categorial cross entropy loss')

#plt.savefig(f'meta_{identifier}.png')

In [None]:
prefix = f'data/vara_{N_VAL_AGENT}agent_{NUM_TRIAL}t_2ParamRL_intractable'
# load dict of arrays
features = load(f'{prefix}_features_test.npz')
test_features = features['arr_0']

test_labels = load(f'{prefix}_labels_test.npz')['arr_0']
test_meta_labels = load(f'{prefix}_pest_labels_test.npz', allow_pickle=True)['arr_0'].tolist()
normalized_test_meta_labels = normalize_val_labels(test_meta_labels, name_to_scaler)
normalized_test_meta_labels = np.swapaxes(np.tile(normalized_test_meta_labels.reshape(3000, 3 , 1), test_features.shape[1]), 1, 2)

all_test_features = tf.keras.layers.Concatenate(axis=2)([test_features, normalized_test_meta_labels])
avg_attentive_states = np.mean(test_labels, axis=1)
print(all_test_features.shape)

In [None]:
output_tokens = best_model.predict(all_test_features)
prediction = np.argmax(output_tokens, axis=-1)
prediction[3]

In [None]:
from sklearn.metrics import accuracy_score

t_score = {'avg_attentive_states': [], 'accuracy': []}
for i in range(len(test_labels)):
  y_true = test_labels[i]
  y_pred = prediction[i]
  score = accuracy_score(y_true, y_pred)

  t_score['avg_attentive_states'].append(avg_attentive_states[i])
  t_score['accuracy'].append(score)

t_score = pd.DataFrame(t_score)
t_score['mean'] = np.mean(t_score['accuracy'])
#t_score.to_csv(f'{RESULT_DIR}/{identifier}_accuracy_to_t.csv')

t_score['mean'].mean()

In [None]:
from sklearn.metrics import confusion_matrix

cf_matrixes = np.zeros((8, 2, 2), dtype=int)
for ag in range(len(avg_attentive_states)):
    idx = int((avg_attentive_states[ag]*10)%10-2)
    cf_matrix = confusion_matrix(test_labels[ag], prediction[ag])
    cf_matrixes[idx] += cf_matrix

In [None]:
nrows, ncols = 2, 4
fig, axes = plt.subplots(
    nrows=nrows, ncols=ncols, figsize=(ncols * 5, nrows * 5), sharey=True, sharex=True
)

avg_attentive_states_range = ['0.2-0.3', '0.3-0.4', '0.4-0.5', '0.5-0.6', '0.6-0.7', '0.7-0.8', '0.8-0.9', '>0.9']
for idx, ax in enumerate(axes.flat):
    ax.set_title(f'avg attentive state percent range {avg_attentive_states_range[idx]}', fontsize=10)
    subplot_confusion_matrix(
        cf_matrixes[idx], categories=["inattentive", "attentive"], percent="by_row", ax=ax,
        vmin=0, vmax=1
    )