In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from utils import *

In [4]:
import tensorflow as tf
import keras

2024-01-13 10:15:11.049780: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-01-13 10:15:11.049821: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [5]:
def create_conv_model(channels, length):
	input_shape = (length, channels)
	num_classes = 1

	input_layer = keras.layers.Input(input_shape)

	conv1 = keras.layers.Conv1D(filters=64, kernel_size=6, padding="same")(input_layer)
	conv1 = keras.layers.BatchNormalization()(conv1)
	conv1 = keras.layers.ReLU()(conv1)

	conv2 = keras.layers.Conv1D(filters=64, kernel_size=6, padding="same")(conv1)
	conv2 = keras.layers.BatchNormalization()(conv2)
	conv2 = keras.layers.ReLU()(conv2)

	conv3 = keras.layers.Conv1D(filters=64, kernel_size=6, padding="same")(conv2)
	conv3 = keras.layers.BatchNormalization()(conv3)
	conv3 = keras.layers.ReLU()(conv3)

	gap = keras.layers.GlobalAveragePooling1D()(conv3)

	output_layer = keras.layers.Dense(num_classes, activation="sigmoid")(gap)

	model = keras.models.Model(inputs=input_layer, outputs=output_layer)

	model.compile(
		optimizer="adam",
		loss="binary_crossentropy",
		metrics=["accuracy"],
	)

	return model

In [6]:
def create_simple_model(channels, length):
	num_classes = 1
 
	model = tf.keras.models.Sequential([
		tf.keras.layers.Dense(256, activation='relu', input_dim=channels*length),
		tf.keras.layers.Dense(128, activation='relu'),
		tf.keras.layers.Dense(64, activation='relu'),
		tf.keras.layers.Dense(num_classes, activation='sigmoid')
	])
	
	model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
		optimizer=tf.keras.optimizers.Adam(), # use Adam instead of SGD
		metrics=['accuracy']
	)

	return model

In [8]:
# train convolutional model

epochs = 60
batch_size = 32

# dataset specific
channels = 6

datasets = [300, 400, 500]

for data in datasets:
    # set datapoint length for reshaping
    length = data

    model = create_conv_model(channels, length)

    print("Training on dataset with ", data, "datapoints...")

    print("Loading training data...")
    (df, X_conv, y) = get_data_conv("train", data, channels)

    print("Training model...")
    model.fit(X_conv, y, 
        batch_size=batch_size,
        epochs=epochs,
        validation_split=0.2,
        verbose=1
    )

    # get stats on test data
    print("Loading test data...")
    (df, X_test_conv, y_test) = get_data_conv("test", data, channels)

    print("Evaluating model...")
    result = model.evaluate(X_test_conv, y_test)
    print("Accuracy of model for " + str(data) + " datapoints: ", result[1])

    print("Saving model...")
    model.save("./models/conv_" + str(data) + ".keras")

Training on dataset with  300 datapoints...
Loading training data...
Reshaping training data...
Training model...
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Loading test data...
Reshaping test data...
Evaluating model...
Accuracy of model for 300 datapoints:  0.6465466022491455
Saving model...
Training on dataset with  400 datapoi

In [9]:
# train simple model

epochs = 60
batch_size = 32

# dataset specific
channels = 6

datasets = [50, 100, 200, 300, 400, 500]

for data in datasets:
    # set datapoint length for reshaping
    length = data

    model = create_simple_model(channels, length)

    print("Training on dataset with ", data, "datapoints...")

    print("Loading training data...")
    (df, X, y) = get_data("train", data)

    print("Training model...")
    model.fit(X, y, 
        batch_size=batch_size,
        epochs=epochs,
        validation_split=0.2,
        verbose=1
    )

    # get stats on test data
    print("Loading test data...")
    (df, X_test, y_test) = get_data("test", data)

    print("Evaluating model...")
    result = model.evaluate(X_test, y_test)
    print("Accuracy of model for " + str(data) + " datapoints: ", result[1])

    print("Saving model...")
    model.save("./models/simple_" + str(data) + ".keras")

Training on dataset with  50 datapoints...
Loading training data...
Training model...
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Loading test data...
Evaluating model...
Accuracy of model for 50 datapoints:  0.5974569320678711
Saving model...
Training on dataset with  100 datapoints...
Loading training data...
Training model...
Ep

In [None]:
# 400 datapoints seems to provide the highest accuracy