In [None]:
#region Init
from tqdm import tqdm

print("Begin init")
%run -i ./init_notebook.ipynb
import tensorflow as tf
%load_ext tensorboard
import pandas as pd

import datetime, os
import random


import numpy as np
import seaborn as sns
sns.set()
import matplotlib.pyplot as plt
import logging
logger = tf.get_logger()
logger.setLevel(logging.ERROR)

logs_base_dir = "../.logs"

print("Complete init")
#endregion

In [None]:
print(tf.config.list_physical_devices('GPU'))

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
assert tf.config.experimental.get_memory_growth(physical_devices[0])

# Set logs for tf
#tf.debugging.set_log_device_placement(True)

strategy = tf.distribute.OneDeviceStrategy("/gpu:0")
print("Strategy", strategy)

In [None]:
executionId = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [None]:
import time
def dataset_benchmark(dataset, num_epochs=2):
    start_time = time.perf_counter()
    for epoch_num in range(num_epochs):
        print(f"EPOCH {epoch_num}" )
        for sample in tqdm(dataset):
            pass
    print("Execution time:", time.perf_counter() - start_time)


def get_dataset_from_file(file_path):
    try:
        print(f"Loading {file_path}")
        return tf.data.experimental.load(file_path)
    except Exception as e:
        print(e)
        return None



def get_dataset_from_folder(folder, batch_size):
    files_pathes = [os.path.join(folder, name) for name in os.listdir(folder)]

    train_datasets = map(get_dataset_from_file, files_pathes)
    train_datasets = filter(lambda a: a is not None, train_datasets)

    merged_dataset = next(train_datasets)
    for ds in train_datasets:
        d = ds.shuffle(1000)
        merged_dataset = merged_dataset.concatenate(d)

    result_ds = merged_dataset.batch(batch_size).shuffle(100, reshuffle_each_iteration=True)
    return result_ds
# ########

In [8]:
train_folder = "c:\\_data_for_training\\"
validation_folder = "c:\\_data_for_validation\\"
batch_size = 100
train_dataset = get_dataset_from_folder(train_folder, batch_size)
valid_dataset = get_dataset_from_folder(validation_folder, batch_size)

Loading c:\_data_for_training\BABA.dataset
Loading c:\_data_for_training\DDOG.dataset
Loading c:\_data_for_training\DOCU.dataset
Loading c:\_data_for_training\MSFT.dataset
Loading c:\_data_for_training\NVDA.dataset
Loading c:\_data_for_training\TEAM.dataset
Loading c:\_data_for_training\TSLA.dataset
Loading c:\_data_for_training\TTD.dataset
Loading c:\_data_for_training\ZS.dataset
Loading c:\_data_for_validation\AAPL.dataset


In [9]:
def check_balance(ds)
    x_example, y_example = None, None
    count = 0
    positive = 0

    for x, y in tqdm(train_dataset):
        s = tf.math.reduce_sum(y).numpy()
        positive = positive + s
        count = count + y.shape[0]

    print("Y", positive, count, 100*positive/count)
    return positive, count, 100*positive/count

training_balance = check_balance(train_dataset)
valid_balance = check_balance(valid_dataset)

100%|██████████| 24030/24030 [05:01<00:00, 79.74it/s]

Y 1308816 2402998 54.465962934634156





In [None]:
# tf.data.experimental.save(train_dataset, "d:\\temp\\train.dataset")

In [10]:
sample = list(train_dataset.take(1))[0]
sample_x = sample[0][0,:]
sample_y = sample[1][0,:]

NUMBER_OF_FEATURE = sample_x.shape[0]
FEATURE_LENGTH =  sample_x.shape[1]

print("x ", sample_x.shape)

print()
print("NUMBER_OF_FEATURE", NUMBER_OF_FEATURE)
print("FEATURE_LENGTH", FEATURE_LENGTH)
print()
print("y ", sample_y.shape, sample_y)


x  (43, 60)

NUMBER_OF_FEATURE 43
FEATURE_LENGTH 60

y  (1,) tf.Tensor([0], shape=(1,), dtype=int8)


In [11]:
log_dir = os.path.join(logs_base_dir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
print("Save logs to", log_dir)


def get_metrics():
    metrics = [
        tf.keras.metrics.FalseNegatives(name="fn"),
        tf.keras.metrics.FalsePositives(name="fp"),
        tf.keras.metrics.TrueNegatives(name="tn"),
        tf.keras.metrics.TruePositives(name="tp"),
        tf.keras.metrics.Precision(name="precision"),
        tf.keras.metrics.Recall(name="recall"),
        #tf.keras.metrics.BinaryAccuracy(name="ba")
    ]
    return metrics

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        "best_model.h5", monitor="val_loss"
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=20, min_lr=0.00001
    ),
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
    tf.keras.callbacks.TensorBoard(log_dir, histogram_freq=1)
]

def build_model(input_shape):
    input_layer = tf.keras.layers.Input(input_shape)

    conv1 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(input_layer)
    conv1 = tf.keras.layers.BatchNormalization()(conv1)
    conv1 = tf.keras.layers.ReLU()(conv1)

    conv2 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv1)
    conv2 = tf.keras.layers.BatchNormalization()(conv2)
    conv2 = tf.keras.layers.ReLU()(conv2)

    conv3 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv2)
    conv3 = tf.keras.layers.BatchNormalization()(conv3)
    conv3 = tf.keras.layers.ReLU()(conv3)

    gap = tf.keras.layers.GlobalAveragePooling1D()(conv3)

    output_layer = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)(gap)

    return tf.keras.models.Model(inputs=input_layer, outputs=output_layer)


with strategy.scope():
    model = build_model(input_shape=(NUMBER_OF_FEATURE , FEATURE_LENGTH))
    model.summary()
    metrics = get_metrics()
    model.compile(
        optimizer="adam",
        loss="binary_crossentropy",
        metrics=metrics)

    training_history = model.fit(
        train_dataset,
        validation_data = valid_dataset,
        epochs=500,
        batch_size=batch_size,
        shuffle=True,
        callbacks=callbacks)

# print("*"*200)
# print("Finished training the model")
# validation_result  = model.evaluate(test_dataset, batch_size=300)
# print("Finished evaluating the model", validation_result)



Save logs to ../.logs\20210603-120554
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 43, 60)]          0         
_________________________________________________________________
conv1d (Conv1D)              (None, 43, 64)            11584     
_________________________________________________________________
batch_normalization (BatchNo (None, 43, 64)            256       
_________________________________________________________________
re_lu (ReLU)                 (None, 43, 64)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 43, 64)            12352     
_________________________________________________________________
batch_normalization_1 (Batch (None, 43, 64)            256       
_________________________________________________________________
re_lu_1 (ReLU)         

KeyboardInterrupt: 

In [None]:
model.save_weights(f'./checkpoint.final.{executionId}.99999.tf')

In [None]:
train_folder = "c:\\_data_for_validation\\"
batch_size = 100
valid_dataset = get_dataset_from_folder(train_folder, batch_size)

In [None]:
print("Evaluating model")
validation_result  = model.evaluate(valid_dataset, batch_size=300)
print("Finished evaluating the model", validation_result)

In [None]:
tf.data.experimental.save(train_dataset, "d:\\temp\\train.dataset")