In [36]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Activation, Flatten, concatenate, Input, Dropout, LSTM, Bidirectional,BatchNormalization,PReLU,ReLU,Reshape
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.utils import to_categorical





In [96]:

init_df = pd.read_csv('./csv/out_gameemo_time_domain_simple.csv',  sep=',')

print('Shape of data: ', init_df.shape)

Shape of data:  (4284224, 15)


In [97]:

df = init_df.copy()
print(df.head())

#HA_PV = high arousal, positive valence
#HA_NV = high arousal, negative valence
#LA_NV = low arousal, negative valence
#LA_PV = low arousal, positive valance
label_map = {1:"HA_PV", 2:"HA_NV", 3:"LA_NV", 4:"LA_PV"}

df["Label"] = df["Label"].map(label_map)

# df = df.to_numpy()

       AF3      AF4       F3       F4       F7       F8      FC5      FC6  \
0 -11.5692 -7.71280  13.0154 -7.71280  76.1643   9.1590  38.0819 -0.48193   
1 -12.5625 -6.52730  12.9572 -6.04490  76.1735  11.7427  35.5558 -0.69421   
2 -14.7008 -6.13520  12.6621 -4.71790  75.4600  13.9307  31.4947 -1.13450   
3  -7.3113 -2.63410  12.6253 -1.78380  83.2244  21.5305  29.8460  1.10270   
4  -2.2931 -0.78859  11.6270 -0.47166  89.5589  26.7464  26.8497  1.75970   

        O1      O2       P7       P8       T7       T8  Label  
0 -1.44630 -5.3026  0.48193 -10.1229  8.67710  5.30260    2.0  
1 -1.11830 -5.7076  0.69421  -9.7567  7.91530  3.77900    2.0  
2 -0.56926 -7.2934  0.65256 -11.0995  5.51220  0.17823    2.0  
3 -2.22230 -7.0967 -1.07380  -8.2644  3.49420  1.85480    2.0  
4 -5.70420 -7.8760 -4.62500  -7.0454  0.63323  1.98420    2.0  


In [None]:
# Restructure the X features data set to group them by samples.
# We know the sample size is 38252 each, so we just need to iterate and group them





In [135]:



print(df.head())

features = df.iloc[:, :-1]
label = df.iloc[:, -1:]

print('Shape of data: ', df.shape)
print('features.shape: ', features.shape)
print('label.shape: ', label.shape)

df.head()
print(df.columns)


y = label
X = features

# X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=48)

total_samples_count = int(X.shape[0]/38252)

print("total_samples_count:", total_samples_count)


train_sample_count = int(total_samples_count * 0.7)
test_sample_count = total_samples_count - train_sample_count

train_size = train_sample_count * 38252
test_size = test_sample_count * 38252

print("train size:", train_size)
print("test size:", test_size)

X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]


X_train = np.array(X_train).reshape((train_sample_count,38252,14))
X_test = np.array(X_test).reshape((test_sample_count,38252,14))

print("X_train.shape after reshape:",X_train.shape)
print("X_test.shape after reshape:",X_test.shape)

#collapse y_train and y_test to the same X sample counts instead

y_train_collapsed = np.array([])
for i in range(len(y_train)):
    if (i % 38252 == 0):
        y_train_collapsed = np.append(y_train_collapsed, (y_train.iloc[i]))
        
print("y_train_collapsed shape:",y_train_collapsed.shape)        

y_test_collapsed = np.array([])
for i in range(len(y_test)):
    if (i % 38252 == 0):
        y_test_collapsed = np.append(y_test_collapsed, (y_test.iloc[i]))
        
print("y_test_collapsed shape:",y_test_collapsed.shape)    


y_train = pd.get_dummies(y_train_collapsed)
y_test = pd.get_dummies(y_test_collapsed)

print("y_train.shape:", y_train.shape)
print("y_test.shape:", y_test.shape)

 


       AF3      AF4       F3       F4       F7       F8      FC5      FC6  \
0 -11.5692 -7.71280  13.0154 -7.71280  76.1643   9.1590  38.0819 -0.48193   
1 -12.5625 -6.52730  12.9572 -6.04490  76.1735  11.7427  35.5558 -0.69421   
2 -14.7008 -6.13520  12.6621 -4.71790  75.4600  13.9307  31.4947 -1.13450   
3  -7.3113 -2.63410  12.6253 -1.78380  83.2244  21.5305  29.8460  1.10270   
4  -2.2931 -0.78859  11.6270 -0.47166  89.5589  26.7464  26.8497  1.75970   

        O1      O2       P7       P8       T7       T8  Label  
0 -1.44630 -5.3026  0.48193 -10.1229  8.67710  5.30260  HA_NV  
1 -1.11830 -5.7076  0.69421  -9.7567  7.91530  3.77900  HA_NV  
2 -0.56926 -7.2934  0.65256 -11.0995  5.51220  0.17823  HA_NV  
3 -2.22230 -7.0967 -1.07380  -8.2644  3.49420  1.85480  HA_NV  
4 -5.70420 -7.8760 -4.62500  -7.0454  0.63323  1.98420  HA_NV  
Shape of data:  (4284224, 15)
features.shape:  (4284224, 14)
label.shape:  (4284224, 1)
Index(['AF3', 'AF4', 'F3', 'F4', 'F7', 'F8', 'FC5', 'FC6', 'O1', 

In [141]:
def train_model(x_train, y_train,x_test,y_test, save_to, epoch = 2):
    strategy = tf.distribute.MirroredStrategy(devices=None)
    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
    
    with strategy.scope():
        #inputs = tf.keras.Input(shape=(X_train.shape[0],14)) #input_dim = 14 channels(features)
#         inputs = tf.keras.Input(shape=(38252,14)) #input_dim = 14 channels(features)
        inputs = tf.keras.Input(shape=(38252,14))
        

        #ml_model = tf.keras.layers.GRU(256, return_sequences=True)(inputs)
        ml_model = tf.keras.layers.LSTM(256, return_sequences=True)(inputs)

        flat = Flatten()(ml_model)
        outputs = Dense(4, activation='softmax')(flat)
        model = tf.keras.Model(inputs, outputs)

        #model = tf.keras.models.load_model('_best_model.h5')

        model.summary()
        tf.keras.utils.plot_model(model)

        opt_adam = keras.optimizers.Adam(learning_rate=0.001)

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
        mc = ModelCheckpoint(save_to + '_best_model_lstm_time_domain.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
            
        lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 0.001 * np.exp(-epoch / 10.))
            
        model.compile(optimizer=opt_adam,
                      loss=['categorical_crossentropy'],
                      metrics=['accuracy'])

          
    history = model.fit(x_train,y_train,
                        batch_size=32,
                        epochs=epoch,
                        validation_data=(x_test,y_test),
                        callbacks=[es,mc,lr_schedule], shuffle=False)
        
    # saved_model = load_model(save_to + '_best_model_lstm_all_cat.h5')
        
    return model,history



In [142]:

model,history = train_model(X_train, y_train,X_test, y_test, save_to= './', epoch = 40)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Number of devices: 2
Model: "model_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_24 (InputLayer)        [(None, 38252, 14)]       0         
_________________________________________________________________
lstm_23 (LSTM)               (None, 38252, 256)        277504    
_________________________________________________________________
flatten_23 (Flatten)         (None, 9792512)           0         
_________________________________________________________________
dense_23 (Dense)             (None, 4)                 39170052  
Total params: 39,447,556
Trainable params: 39,447,556
Non-trainable params: 0
_________________________________________________________________
RRRRRR right before: x_train.shape:
[[[-1.156920e+01 -7.712800e+00  1.3015

2022-07-04 20:44:24.516914: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:656] In AUTO-mode, and switching to DATA-based sharding, instead of FILE-based sharding as we cannot find appropriate reader dataset op(s) to shard. Error: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_22270"
    }
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
. Consider either turning off auto-sharding or switching the auto_shard_policy to DATA to shard this dataset. You can do this by creating a new `tf.data.Options()` object then setting `options.experimental_distribute.au

Epoch 1/40
INFO:tensorflow:batch_all_reduce: 5 all-reduces with algorithm = nccl, num_packs = 1
INFO:tensorflow:batch_all_reduce: 5 all-reduces with algorithm = nccl, num_packs = 1

2022-07-04 20:44:33.247770: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:656] In AUTO-mode, and switching to DATA-based sharding, instead of FILE-based sharding as we cannot find appropriate reader dataset op(s) to shard. Error: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_27641"
    }
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
. Consider either turning off auto-sharding or switching the auto_shard_policy to DATA to shard this dataset. You can do this by creating a new `tf.data.Options()` object then setting `options.experimental_distribute.au


Epoch 00001: val_accuracy improved from -inf to 0.55882, saving model to ./_best_model_lstm_time_domain.h5
Epoch 2/40

Epoch 00002: val_accuracy did not improve from 0.55882
Epoch 3/40

Epoch 00003: val_accuracy did not improve from 0.55882
Epoch 4/40

Epoch 00004: val_accuracy did not improve from 0.55882
Epoch 5/40

Epoch 00005: val_accuracy did not improve from 0.55882
Epoch 6/40

Epoch 00006: val_accuracy did not improve from 0.55882
Epoch 7/40

Epoch 00007: val_accuracy did not improve from 0.55882
Epoch 8/40

Epoch 00008: val_accuracy did not improve from 0.55882
Epoch 9/40

Epoch 00009: val_accuracy did not improve from 0.55882
Epoch 10/40

Epoch 00010: val_accuracy did not improve from 0.55882
Epoch 11/40

Epoch 00011: val_accuracy did not improve from 0.55882
Epoch 00011: early stopping
