In [1]:
!pip install numpy==1.20
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option("display.precision", 2)
from pathlib import Path
from tqdm import tqdm
import tensorflow as tf
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix
import math
print(np.__version__)

1.20.0


In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("ok")

Num GPUs Available:  1
ok


session types:  
        a -> 0, b -> 1, s -> 18

In [3]:
np.set_printoptions(precision=2)
plt.rcParams["figure.figsize"] = (10,10)


def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    import itertools
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    #print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

def plot_cm_matrix(model, X_test, y_test):
    y_pred = model.predict_classes(X_test)
    y_test_single_digit = np.argmax(y_test, axis=-1)
    c_matrix = confusion_matrix(y_test_single_digit, y_pred)
    plt.figure()
    plot_confusion_matrix(c_matrix, classes=list("0123456789"))

def plot_acc_and_loss(history):
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['loss'])
    plt.title('model accuracy/loss')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['accuracy', 'loss'], loc='upper left')
    plt.show()

In [4]:
df = pd.read_pickle("/data/workspace_files/all_session.pkl") 
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,combined_eye_closure,left_image_eye_closure,right_image_eye_closure,combined_eye_state,left_image_eye_state,right_image_eye_state,subject_id,session_id,session_type,perclos_closed_combined,perclos_combined_60s_interval,max_blink_duration_60s_interval,min_blink_duration_60s_interval,mean_blink_duration_60s_interval,blink_counts_60s_interval,karolinska_response_nearest_interpolation,karolinska_response_linear_interpolation
filename,frame,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
001_1_a,0,0.26,0.25,0.28,0,0,0,1,1,0,False,,,,,,6,6
001_1_a,1,0.27,0.22,0.31,0,0,0,1,1,0,False,,,,,,6,6
001_1_a,2,0.27,0.23,0.32,0,0,0,1,1,0,False,,,,,,6,6
001_1_a,3,0.27,0.22,0.32,0,0,0,1,1,0,False,,,,,,6,6
001_1_a,4,0.26,0.22,0.31,0,0,0,1,1,0,False,,,,,,6,6


In [5]:


columns = ['combined_eye_closure', 'left_image_eye_closure',
       'right_image_eye_closure', 'combined_eye_state', 'left_image_eye_state',
       'right_image_eye_state', 'subject_id', 'session_id', 'session_type',
       'perclos_closed_combined', 'perclos_combined_60s_interval',
       'max_blink_duration_60s_interval', 'min_blink_duration_60s_interval',
       'mean_blink_duration_60s_interval', 'blink_counts_60s_interval',
       'karolinska_response_nearest_interpolation',
       'karolinska_response_linear_interpolation']

def df_to_train_and_test_data(full_df: pd.DataFrame) -> (np.ndarray, np.ndarray):
    full_df = full_df.sample(frac=1).copy()
    full_df = full_df.dropna()
    full_df = full_df[full_df["session_type"] != 0]
    features = full_df[['combined_eye_closure']].to_numpy()
    targets = full_df["karolinska_response_linear_interpolation"].to_numpy()
    return  features, targets

features, targets = df_to_train_and_test_data(df)
del df

In [6]:
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=.33, random_state=42)

X_train = X_train.astype(float)
X_test = X_test.astype(float)

y_test = to_categorical(y_test)
y_train = to_categorical(y_train)

In [7]:
BATCH_SIZE = 2_048
EPOCHS = 20

In [8]:
class EyeClosureGenerator(keras.utils.Sequence):
    def __init__(self, x_set, y_set, batch_size, interval_size = 60*30):
        if len(x_set) < interval_size:
            raise ValueError("interval size cannot be larger than input array shape")
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.interval_size = interval_size

    def __len__(self):
        return math.ceil((len(self.x) - self.interval_size + 1)/ self.batch_size)

    def __getitem__(self, idx):
        if idx >= len(self):
            raise IndexError(f"Index out of range for dataset of lenght {len(self)}.")
            
        #batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        start_idx = idx * self.batch_size
        end_idx = (idx + 1) * self.batch_size
        batch_x = np.lib.stride_tricks.sliding_window_view(self.x[start_idx: end_idx + self.interval_size - 1], window_shape=self.interval_size, axis=0)
        batch_y = self.y[start_idx + self.interval_size - 1:end_idx + self.interval_size - 1]

        batch_x = np.expand_dims(batch_x, -1)

        return batch_x, batch_y

In [38]:
ds = EyeClosureGenerator(x_set=np.squeeze(X_train), y_set=y_train, batch_size=BATCH_SIZE, interval_size=60 * 30)

In [36]:
for x,y in iter(ds):
    assert x.shape[0] == y.shape[0]
    if x.shape != (BATCH_SIZE, 1800, 1):
        print(x.shape, y.shape)
        break
    assert y.shape == (BATCH_SIZE, 10)
    

(662, 1800, 1) (662, 10)


In [41]:
model = Sequential()
model.add(keras.layers.Input(shape=(60 * 30, 1)))
model.add(keras.layers.Conv1D(filters=128, kernel_size=10, activation="relu"))

model.add(keras.layers.Conv1D(filters=128, kernel_size=50, activation="relu"))
model.add(keras.layers.MaxPool1D(pool_size=2))


model.add(keras.layers.MaxPool1D(pool_size=2))
model.add(keras.layers.Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dense(10, activation="softmax"))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
with tf.device("gpu:0"):
    history = model.fit(
        x=ds,
        epochs=20,
        batch_size=BATCH_SIZE)
    results = model.evaluate(X_test, y_test, batch_size=2)
    print("test loss, test acc:", results)


Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_10 (Conv1D)           (None, 1791, 128)         1408      
_________________________________________________________________
conv1d_11 (Conv1D)           (None, 1742, 128)         819328    
_________________________________________________________________
max_pooling1d_10 (MaxPooling (None, 871, 128)          0         
_________________________________________________________________
max_pooling1d_11 (MaxPooling (None, 435, 128)          0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 55680)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 512)               28508672  
_________________________________________________________________
dense_6 (Dense)              (None, 10)               

KeyboardInterrupt: KeyboardInterrupt: 

In [0]:
1

In [22]:
tmodel = Sequential()
model.add(Dense(256, input_dim=X_train.shape[-1], activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
with tf.device("gpu:0"):
    history = model.fit(
        X_train,
        y_train,
        epochs=20,
        batch_size=2048)
    results = model.evaluate(X_test, y_test, batch_size=2048)
    print("test loss, test acc:", results)
plot_cm_matrix(model, X_test, y_test)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, None, 10)          18010     
_________________________________________________________________
dense_5 (Dense)              (None, None, 256)         2816      
_________________________________________________________________
dense_6 (Dense)              (None, None, 10)          2570      
Total params: 23,396
Trainable params: 23,396
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20


ValueError: ValueError: in user code:

    /opt/anaconda3/envs/datalore-user/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:571 train_function  *
        outputs = self.distribute_strategy.run(
    /opt/anaconda3/envs/datalore-user/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /opt/anaconda3/envs/datalore-user/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /opt/anaconda3/envs/datalore-user/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    /opt/anaconda3/envs/datalore-user/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:531 train_step  **
        y_pred = self(x, training=True)
    /opt/anaconda3/envs/datalore-user/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:886 __call__
        self.name)
    /opt/anaconda3/envs/datalore-user/lib/python3.7/site-packages/tensorflow/python/keras/engine/input_spec.py:216 assert_input_compatibility
        ' but received input with shape ' + str(shape))

    ValueError: Input 0 of layer sequential_4 is incompatible with the layer: expected axis -1 of input shape to have value 1800 but received input with shape [None, 1]


In [0]:
plot_acc_and_loss(history)

In [0]:
model = Sequential()
model.add(Dense(1024, input_dim=X_train.shape[-1], activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
history = model.fit(
    X_train,

    y_train,
    epochs=20,
    batch_size=2048)
results = model.evaluate(X_test, y_test, batch_size=2048)
print("test loss, test acc:", results)
plot_cm_matrix(model, X_test, y_test)

In [0]:
model = Sequential()
model.add(Dense(1024, input_dim=X_train.shape[-1], activation='relu'))
model.add(Dense(1024, activation="relu"))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
history = model.fit(
    X_train,

    y_train,
    epochs=20,
    batch_size=2048)
results = model.evaluate(X_test, y_test, batch_size=2048)
print("test loss, test acc:", results)
plot_cm_matrix(model, X_test, y_test)

In [0]:
plot_acc_and_loss(history)



In [0]:
model = Sequential()
model.add(Dense(1024, input_dim=X_train.shape[-1], activation='relu'))
model.add(tf.keras.layers.Dropout(rate=.2))
model.add(Dense(1024, activation="relu"))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
history = model.fit(
    X_train,

    y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE)
results = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
print("test loss, test acc:", results)
plot_acc_and_loss(history)

plot_cm_matrix(model, X_test, y_test)

In [0]:
model = Sequential()
model.add(Dense(1024, input_dim=X_train.shape[-1], activation='relu'))
model.add(Dense(256, activation="relu"))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
history = model.fit(
    X_train,

    y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE)
results = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
print("test loss, test acc:", results)
plot_acc_and_loss(history)

plot_cm_matrix(model, X_test, y_test)

In [0]:
model = Sequential()
model.add(Dense(1024, input_dim=X_train.shape[-1], activation='relu'))
model.add(tf.keras.layers.Dropout(rate=.2))
model.add(Dense(1024, activation="relu"))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
history = model.fit(
    X_train,

    y_train,
    epochs=EPOCHS* 5,
    batch_size=BATCH_SIZE)
results = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
print("test loss, test acc:", results)
plot_acc_and_loss(history)

plot_cm_matrix(model, X_test, y_test)

In [0]:
plot_acc_and_loss(history)

plot_cm_matrix(model, X_test, y_test)