In [None]:
from fastai.tabular.all import *
import pandas as pd
import numpy as np
import pickle
import os

In [None]:
training_path = 'E:/BitBot/training_data_sections/'
models_path = 'E:/BitBot/models'
if not os.path.exists(models_path):
    os.makedirs(models_path)

In [None]:
symbols, timestamps_train, timestamps_valid = set(), set(), set()
for filename in os.listdir(training_path):
    symbols.add(filename[11:-11].replace('.csv', ''))
    if 'train' in filename:
        timestamps_train.add(filename[:10])
    elif 'valid' in filename:
        timestamps_valid.add(filename[:10])

symbols = list(symbols)
timestamps_train, timestamps_valid = sorted(list(timestamps_train)), sorted(list(timestamps_valid))
timestamps = {}
for timestamp_train, timestamp_valid in zip(timestamps_train, timestamps_valid):
    timestamps[timestamp_train] = timestamp_valid

print(symbols)
print(list(timestamps))

In [None]:
def read_training_data(timestamp_train, timestamp_valid):
    dfs_train, dfs_valid = [], []
    for filename in os.listdir(training_path):
        if timestamp_train in filename and 'train' in filename:
            df = pd.read_csv(training_path + filename)
            dfs_train.append(df)
        elif timestamp_valid in filename and 'valid' in filename:
            df = pd.read_csv(training_path + filename)
            dfs_valid.append(df)
    dfs_train, dfs_valid = pd.concat(dfs_train), pd.concat(dfs_valid)
    return dfs_train, dfs_valid

In [None]:
def make_splits(dfs_train, dfs_valid):
    len_train, len_valid = dfs_train.shape[0], dfs_valid.shape[0]
    splits = [
        list(range(0, len_train)),
        list(range(len_train, len_train + len_valid))
    ]
    df = pd.concat([dfs_train, dfs_valid])
    return df, splits

In [None]:
def make_dataloader(df):
    y_count = 7
    cat_names = list(df.columns)[-len(symbols)-y_count:-y_count]
    cont_names = list(df.columns)[1:-len(symbols)-y_count]
    y_names = list(df.columns)[-y_count:]        
    to = TabularPandas(df, procs=[Categorify], cat_names=cat_names, cont_names=cont_names, y_names=y_names, splits=splits)
    dataloader = to.dataloaders(bs=2**10)
    return dataloader

In [None]:
def train(dataloader):
    learn = tabular_learner(dataloader, metrics=rmse)
    learn.fit_one_cycle(4, lr_max=5e-5)
    return learn

In [None]:
def make_predictions(timestamp, df, learn):
    dl_train = DataLoader(dataset=df.iloc[splits[0]])
    df_val = DataLoader(dataset=df.iloc[splits[1]])
    df_train, df_val = df.iloc[splits[0]], df.iloc[splits[1]]
    dl_train = learn.dls.test_dl(df_train)
    dl_val = learn.dls.test_dl(df_val)
    pred_train, gt_train = learn.get_preds(dl=dl_train)
    pred_val, gt_val = learn.get_preds(dl=dl_val)
    with open(f'preds_{timestamp}.pickle', 'wb') as f:
        pickle.dump({
            'pred_train': pred_train.squeeze(),
            'gt_train': gt_train.squeeze(),
            'pred_val': pred_val.squeeze(),
            'gt_val': gt_val.squeeze()
        }, f)

In [None]:
#started = False
for timestamp_train in timestamps:
    #if timestamp_train == "2020-07-06":
    #    started = True
    #if not started:
    #    continue
    
    timestamp_valid = timestamps[timestamp_train]
    print(f"{timestamp_train} - {timestamp_valid}")
    dfs_train, dfs_valid = read_training_data(timestamp_train, timestamp_valid)
    df, splits = make_splits(dfs_train, dfs_valid)
    dataloader = make_dataloader(df)
    learn = train(dataloader)
    make_predictions(timestamp_train, df, learn)
    learn.export(models_path + f"/model_{timestamp_train}_{timestamp_valid}.pickle")

In [None]:
learn.show_results(max_n=10)

df = pd.read_csv("E:/BitBot/training_data_sections/2021-07-21_XRPUSDT_valid.csv")

df

In [None]:
test_dl = learn.dls.test_dl(df)

In [None]:
df = pd.read_csv("E:/BitBot/training_data_sections/2021-07-21_XRPUSDT_valid.csv")

In [None]:
test_dl = learn.dls.test_dl(df)

In [None]:
preds = learn.get_preds(dl=test_dl)

In [None]:
preds[0].numpy()