### Imports

In [7]:
import numpy as np
import os
from datetime import datetime
import sys

import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten, MaxPooling2D, TimeDistributed
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
import sklearn as sk
from keras import regularizers
import platform


print(f"Python Platform: {platform.platform()}")
print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tensorflow.keras.__version__}")
print()
print(f"Python {sys.version}")
print(f"Scikit-Learn {sk.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print("GPU is", "available" if gpu else "NOT AVAILABLE")

Python Platform: macOS-13.1-arm64-arm-64bit
Tensor Flow Version: 2.9.0
Keras Version: 2.9.0

Python 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:25:29) [Clang 14.0.6 ]
Scikit-Learn 1.2.0
GPU is available


##  Load frames

### Old dataset

In [6]:
drowsines_levels = np.array(["0", "5", "10"])
DATA_PATH = os.path.join('./Dataset')
label_map = {label: num for num, label in enumerate(drowsines_levels)}

files, labels = [], []

for video_index in range(1,11):
  for level in drowsines_levels:
    window = []      
    for frame in range(1, 4001, 2):
      file_path = os.path.join(DATA_PATH, str(video_index), level, (str(frame) + ".npy"))
      if os.path.exists(file_path):
        res = np.load(file_path)        
        window.append(res)
      else:
        print(f"File at {file_path} doesn't exists")
    files.append(window)
    labels.append(label_map[level])
  print(f"File number: {video_index} processed")

File number: 1 processed
File number: 2 processed
File number: 3 processed
File number: 4 processed
File number: 5 processed
File number: 6 processed
File number: 7 processed
File number: 8 processed
File number: 9 processed
File number: 10 processed


In [7]:
# Check loaded files shape
np.array(files).shape

(30, 2000, 1434)

### New dataset


#### Load new dataset pooling

In [None]:
from multiprocessing import Pool

def load_data(DATA_PATH, drowsines_levels):
    label_map = {label: num for num, label in enumerate(drowsines_levels)}
    files, labels = [], []
    pool = Pool(processes=4)
    for level in drowsines_levels:
        for video_index in range(1, 90):
            file_paths = [os.path.join(DATA_PATH, level, str(video_index), (str(frame) + ".npy")) for frame in range(1, 1001)]
            results = pool.map(process_file, file_paths)
            window = [result for result in results if result is not None]
            files.append(window)
            labels.append(label_map[level])
            print(f"File number: {video_index} processed for level {level}")
    return files, labels

def process_file(file_path):
    if os.path.exists(file_path):
        res = np.load(file_path)
        return res
    else:
        print(f"File at {file_path} doesn't exist")

drowsines_levels = np.array(["Sleepy", "NonSleepy"])
DATA_PATH = os.path.join('./Extracted_Values_Bigger_Confidence')
files, labels = load_data(DATA_PATH, drowsines_levels)

#### Load new dataset standard

In [8]:
drowsines_levels = np.array(["Sleepy", "NonSleepy"])
DATA_PATH = os.path.join('./Extracted_Values_Bigger_Confidence')
label_map = {label: num for num, label in enumerate(drowsines_levels)}

files, labels = [], []

for level in drowsines_levels:
    for video_index in range(1,90):
        window = []      
        for frame in range(1, 1001):
            file_path = os.path.join(DATA_PATH, level, str(video_index), (str(frame) + ".npy"))
            if os.path.exists(file_path):
                res = np.load(file_path)        
                window.append(res)
            else:
                print(f"File at {file_path} doesn't exists")
        files.append(window)
        labels.append(label_map[level])
        print(f"File number: {video_index} processed for level {level}")

print(f"Processed! Current dataset shape: {np.array(files).shape}")

File number: 1 processed for level Sleepy
File number: 2 processed for level Sleepy
File number: 3 processed for level Sleepy
File number: 4 processed for level Sleepy
File number: 5 processed for level Sleepy
File number: 6 processed for level Sleepy
File number: 7 processed for level Sleepy
File number: 8 processed for level Sleepy
File number: 9 processed for level Sleepy
File number: 10 processed for level Sleepy
File number: 11 processed for level Sleepy
File number: 12 processed for level Sleepy
File number: 13 processed for level Sleepy
File number: 14 processed for level Sleepy
File number: 15 processed for level Sleepy
File number: 16 processed for level Sleepy
File number: 17 processed for level Sleepy
File number: 18 processed for level Sleepy
File number: 19 processed for level Sleepy
File number: 20 processed for level Sleepy
File number: 21 processed for level Sleepy
File number: 22 processed for level Sleepy
File number: 23 processed for level Sleepy
File number: 24 proc

#### Load new dataset fancy progress

In [4]:
def processDataset():
    drowsines_levels = np.array(["Sleepy", "NonSleepy"])
    DATA_PATH = os.path.join('./Extracted_Values_Bigger_Confidence')
    label_map = {label: num for num, label in enumerate(drowsines_levels)}

    files, labels = [], []

    for level in drowsines_levels:
        for video_index in range(1,90):
            window = []      
            for frame in range(1, 1001):
                file_path = os.path.join(DATA_PATH, level, str(video_index), (str(frame) + ".npy"))
                if os.path.exists(file_path):
                    res = np.load(file_path)        
                    window.append(res)
                else:
                    print(f"File at {file_path} doesn't exists")
            files.append(window)
            labels.append(label_map[level])
            progress_percentage = (video_index / 89) * 100  # Calculate the percentage progress
            status = f"Progress: {progress_percentage:.2f}% | Level: {level}"
            print(status, end="\r")  # Overwrite the previous status

    print(f"Processed! Current dataset shape: {np.array(files).shape}")

Progress: 100.00% | Level: NonSleepy

In [6]:
np.array(files).shape

(178, 1000, 1434)

## Model training

#### Define model

In [None]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='tanh', input_shape=(1049, 1434)))
model.add(LSTM(128, return_sequences=True, activation='tanh')) 
model.add(LSTM(32, return_sequences=False, activation='tanh')) 
model.add(Dense(64, activation='softmax', kernel_regularizer='l2')) 
model.add(Dropout(0.2))
model.add(Dense(drowsines_levels.shape[0], activation='softmax'))

model.summary()
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

#### Train Test Split Validation

In [None]:
now = datetime.now()
date_string = now.strftime("%d/%m/%Y %H:%M:%S")

X = np.array(files)
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

log_dir = os.path.join(f'Logs_{date_string}_train_test_split')
tb_callback = TensorBoard(log_dir=log_dir)
es = EarlyStopping(monitor='val_loss', patience=50)

# model = Sequential()
# model.add(LSTM(64, return_sequences=True, activation='tanh', input_shape=(1099, 1434)))
# model.add(LSTM(128, return_sequences=True, activation='tanh')) 
# model.add(LSTM(32, return_sequences=False, activation='tanh')) 
# model.add(Dense(32, activation='softmax', kernel_regularizer='l2')) 
# model.add(Dense(drowsines_levels.shape[0], activation='softmax'))

# model.summary()
# model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])


model.fit(X_train, y_train, epochs=500, batch_size=100, shuffle=True, callbacks=[tb_callback, es], validation_split = 0.2) 

model.save(f'Models/drowsines_levels_train_test_split_${date_string}.h5')

### Cross validation

In [None]:
from sklearn.model_selection import KFold

now = datetime.now()
date_string = now.strftime("%d/%m/%Y %H:%M:%S")

X = np.array(files)
y = to_categorical(labels).astype(int)

es = EarlyStopping(monitor='val_loss', patience=50)

log_dir = os.path.join(f'Logs/Logs_{date_string}_CrossValid')
tb_callback = TensorBoard(log_dir=log_dir)

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

cv_scores = []
for train, test in kfold.split(X):
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, activation='tanh', input_shape=(1100, 1434)))
    model.add(LSTM(128, return_sequences=True, activation='tanh')) 
    model.add(LSTM(32, return_sequences=False, activation='tanh')) 
    model.add(Dense(64, activation='softmax', kernel_regularizer='l2')) 
    model.add(Dropout(0.1))
    model.add(Dense(drowsines_levels.shape[0], activation='softmax'))

    model.summary()
    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X[train], y[train], epochs=1000, batch_size=250, shuffle=True, callbacks=[tb_callback, es], validation_split=0.2) 

    scores = model.evaluate(X[test], y[test], verbose=0)
    print(f'Accuracy: {scores[1]*100}%')
    cv_scores.append(scores[1])

print(f'Cross-validation accuracy: {np.mean(cv_scores)*100:.2f}% +/- {np.std(cv_scores)*100:.2f}%')
model.save(f'Models/drowsines_levels_cross_validation_${date_string}.h5')

# Find hyperparameters


In [None]:
from sklearn.model_selection import KFold

now = datetime.now()
date_string = now.strftime("%d/%m/%Y %H:%M:%S")

X = np.array(files)
y = to_categorical(labels).astype(int)

es = EarlyStopping(monitor='val_loss', patience=50)

log_dir = os.path.join(f'Logs/Logs_{date_string}_CrossValid')
tb_callback = TensorBoard(log_dir=log_dir)

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

batch_sizes = [50, 100, 150, 200, 250]

cv_scores = []

for batch_size in batch_sizes:

    for train, test in kfold.split(X):
        model = Sequential()
        model.add(LSTM(64, return_sequences=True, activation='tanh', input_shape=(1099, 1434)))
        model.add(LSTM(128, return_sequences=True, activation='tanh')) 
        model.add(LSTM(32, return_sequences=False, activation='tanh')) 
        model.add(Dense(64, activation='softmax', kernel_regularizer='l2')) 
        model.add(Dropout(0.1))
        model.add(Dense(drowsines_levels.shape[0], activation='softmax'))

        model.summary()
        model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

        model.fit(X[train], y[train], epochs=1000, batch_size=199, shuffle=True, callbacks=[tb_callback, es], validation_split=0.2) 

        scores = model.evaluate(X[test], y[test], verbose=0)
        print(f'Accuracy: {scores[1]*100}%')
        cv_scores.append(scores[1])

    cross_validation_accuracy = np.mean(cv_scores)*100
    cross_validation_accuracy_error = np.std(cv_scores)*100
    print(f'Cross-validation accuracy: {cross_validation_accuracy:.2f}% +/- {cross_validation_accuracy_error:.2f}% for batch size: {batch_size}')
    
    # model.save(f'Models/drowsines_levels_cross_validation_${date_string}.h5')

### Other architecture experiments

In [None]:
now = datetime.now()
date_string = now.strftime("%d/%m/%Y %H:%M:%S")

X = np.array(files)
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# es = EarlyStopping(monitor='val_loss', patience=50)

log_dir = os.path.join(f'Logs_15')
tb_callback = TensorBoard(log_dir=log_dir)

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='tanh', input_shape=(1000, 1434)))
model.add(LSTM(128, return_sequences=True, activation='tanh')) 
model.add(TimeDistributed(Dropout(0.1)))
model.add(LSTM(32, return_sequences=False, activation='tanh')) 
model.add(Dense(32, activation='softmax', kernel_regularizer=regularizers.l2(0.008))) 
model.add(Dense(drowsines_levels.shape[0], activation='softmax'))

model.summary()
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=500, batch_size=50, shuffle = True, callbacks=[tb_callback], validation_split = 0.2) 

model.save(f'drowsines_weights_noStop_l2008_drop01_${date_string}.h5')

## Model validation

### Confusion Matrix

In [5]:
def generateConfusionMatrix():
    yhat = model.predict(X_test)
    ytrue = np.argmax(y_test, axis=1).tolist()
    yhat = np.argmax(yhat, axis=1).tolist()
    multilabel_confusion_matrix(ytrue, yhat)


NameError: name 'model' is not defined

### Tensorboard

In [None]:
%load_ext tensorboard
%tensorboard --logdir Logs/Logs_15/04/2023 21:41:01

## Links

In [None]:
https://machinelearningmastery.com/use-dropout-lstm-networks-time-series-forecasting/ - LSTM Dropout
https://openai.com/research/how-ai-training-scales - OpenAI odnosnie bathc size