In [None]:
from fastai.tabular.all import *
import pandas as pd
import numpy as np
import pickle
import os

In [None]:
training_path = 'E:/BitBot/training_data_sections/'
symbols, timestamps_train, timestamps_valid = set(), set(), set()
for filename in os.listdir(training_path):
    symbols.add(filename[11:-11].replace('.csv', ''))
    if 'train' in filename:
        timestamps_train.add(filename[:10])
    elif 'valid' in filename:
        timestamps_valid.add(filename[:10])

symbols = list(symbols)
timestamps_train, timestamps_valid = sorted(list(timestamps_train)), sorted(list(timestamps_valid))
timestamps = {}
for timestamp_train, timestamp_valid in zip(timestamps_train, timestamps_valid):
    timestamps[timestamp_train] = timestamp_valid

print(symbols)
print(list(timestamps))

In [None]:
def read_training_data(timestamp_train, timestamp_valid):
    dfs_train, dfs_valid = [], []
    for filename in os.listdir(training_path):
        if timestamp_train in filename and 'train' in filename:
            dfs_train.append(pd.read_csv(training_path + filename))
        elif timestamp_valid in filename and 'valid' in filename:
            dfs_valid.append(pd.read_csv(training_path + filename))
    dfs_train, dfs_valid = pd.concat(dfs_train), pd.concat(dfs_valid)
    return dfs_train, dfs_valid

In [None]:
def make_splits(dfs_train, dfs_valid):
    len_train, len_valid = dfs_train.shape[0], dfs_valid.shape[0]
    splits = [
        list(range(0, len_train)),
        list(range(len_train, len_train + len_valid))
    ]
    df = pd.concat([dfs_train, dfs_valid])
    return df, splits

In [None]:
def make_dataloader(df):
    cat_names = list(df.columns)[-len(symbols)-1:-1]
    cont_names = list(df.columns)[:-len(symbols)-1]
    y_names = list(df.columns)[-1:]
    to = TabularPandas(df, procs=[Categorify], cat_names=cat_names, cont_names=cont_names, y_names=y_names, splits=splits)
    dataloader = to.dataloaders(bs=2**9)
    return dataloader

In [None]:
def train(dataloader):
    learn = tabular_learner(dataloader, metrics=rmse)
    learn.fit_one_cycle(5, lr_max=2e-5)
    return learn

In [None]:
def make_predictions(timestamp, df, learn):
    dl_train = DataLoader(dataset=df.iloc[splits[0]])
    df_val = DataLoader(dataset=df.iloc[splits[1]])
    df_train, df_val = df.iloc[splits[0]], df.iloc[splits[1]]
    dl_train = learn.dls.test_dl(df_train)
    dl_val = learn.dls.test_dl(df_val)
    pred_train, gt_train = learn.get_preds(dl=dl_train)
    pred_val, gt_val = learn.get_preds(dl=dl_val)
    with open(f'preds_{timestamp}.pickle', 'wb') as f:
        pickle.dump({
            'pred_train': pred_train.squeeze(),
            'gt_train': gt_train.squeeze(),
            'pred_val': pred_val.squeeze(),
            'gt_val': gt_val.squeeze()
        }, f)

In [None]:
for timestamp_train in timestamps:
    timestamp_valid = timestamps[timestamp_train]
    print(timestamp_train, timestamp_valid)
    dfs_train, dfs_valid = read_training_data(timestamp_train, timestamp_valid)
    df, splits = make_splits(dfs_train, dfs_valid)
    dataloader = make_dataloader(df)
    learn = train(dataloader)
    make_predictions(timestamp_train, df, learn)

In [None]:
#learn = tabular_learner(dataloader, metrics=rmse)
#learn.lr_find()
#learn.fit(n_epoch=2, lr=5e-5)
#learn.export(f'model_section_{year}-{month:02}.pickle')
#break

In [None]:
learn = tabular_learner(dataloader, metrics=rmse)
learn.fit(n_epoch=1, lr=1e-3)
learn.fit(n_epoch=1, lr=1e-4)
learn.fit(n_epoch=1, lr=5e-5)
learn.fit(n_epoch=1, lr=2e-5)
learn.fit(n_epoch=1, lr=1e-5)
learn.fit(n_epoch=1, lr=5e-6)
#learn.fit(n_epoch=1, lr=2e-6)
#learn.fit(n_epoch=1, lr=1e-6)

In [None]:
learn = tabular_learner(dataloader, metrics=rmse)
learn.fit_one_cycle(5, lr_max=3e-5)

In [None]:
#learn.export('model_all_a.pickle')

In [None]:
dl_train = DataLoader(dataset=df.iloc[splits[0]])
df_val = DataLoader(dataset=df.iloc[splits[1]])
df_train, df_val = df.iloc[splits[0]], df.iloc[splits[1]]
dl_train = learn.dls.test_dl(df_train)
dl_val = learn.dls.test_dl(df_val)

In [None]:
pred_train, gt_train = learn.get_preds(dl=dl_train)
pred_val, gt_val = learn.get_preds(dl=dl_val)
import pickle
with open('preds.pickle', 'wb') as f:
    pickle.dump({
        'pred_train': pred_train.squeeze(),
        'gt_train': gt_train.squeeze(),
        'pred_val': pred_val.squeeze(),
        'gt_val': gt_val.squeeze()
    }, f)