In [1]:
import numpy as np 
import pandas as pd 
import json 
import ccxt 
import seaborn as sns
import os 
import pandas_ta as ta 
import time
from datetime import datetime, timedelta
import math
from tqdm.auto import tqdm 
import matplotlib.pyplot as plt 
from transformers import * 
import torch 
from torch import Tensor 
from torch.utils.data import * 
import torch.nn as nn 
import torch.nn.functional as F 
from sklearn.utils.class_weight import compute_class_weight 
from sklearn.metrics import f1_score
from imblearn.under_sampling import RandomUnderSampler
from pytorch_metric_learning import miners, losses
from pytorch_metric_learning.distances import CosineSimilarity
from scipy.spatial.distance import cdist 
import pickle



# Random Data

In [2]:
with open("random_aug.pkl", "rb") as handle: 
    random_aug_saved = pickle.load(handle) 

In [4]:
with open("BTC_USDT-1h-12.json") as f: 
        d = json.load(f) 

chart_df = pd.DataFrame(d) 
chart_df = chart_df.rename(columns={0:"timestamp", 1:"open", 2:"high", 3:"low", 4:"close", 5:"volume"})

def process(df): 
        binance = ccxt.binance() 
        dates = df["timestamp"].values 
        timestamp = [] 
        for i in range(len(dates)):
                date_string = binance.iso8601(int(dates[i])) 
                date_string = date_string[:10] + " " + date_string[11:-5] 
                timestamp.append(date_string) 
        df["datetime"] = timestamp
        df = df.drop(columns={"timestamp"}) 
        return df 

chart_df = process(chart_df) 

hours, days, months, years = [],[],[],[] 
for dt in tqdm(chart_df["datetime"]):
        dtobj = pd.to_datetime(dt) 
        hour = dtobj.hour 
        day = dtobj.day 
        month = dtobj.month 
        year = dtobj.year 
        hours.append(hour) 
        days.append(day) 
        months.append(month) 
        years.append(year) 

chart_df["hours"] = hours 
chart_df["days"] = days  
chart_df["months"] = months 
chart_df["years"] = years 

  0%|          | 0/47346 [00:00<?, ?it/s]

In [5]:
datetimes = chart_df["datetime"].values 

seq_len = 24
forecast_horizon = 6
date_chart = {} # datetime object : close prices  

for i in tqdm(range(len(datetimes) - seq_len - forecast_horizon), position=0, leave=True): 
    dt_obj = datetime.strptime(str(datetimes[i]), "%Y-%m-%d %H:%M:%S")
    date_chart[dt_obj] = (chart_df["close"].values[i:i+seq_len], chart_df["close"].values[i+seq_len:i+seq_len+forecast_horizon])

  0%|          | 0/47316 [00:00<?, ?it/s]

In [6]:
past_cont_inputs, past_dates, future_cont_inputs, future_dates, targets = [], [], [], [], [] 

found = False 

for key, value in tqdm(date_chart.items(), position=0, leave=True): 
    if key in random_aug_saved.keys():    
        past_input = [date_chart[key][0]] 
        future_input = [] 
        targets.append(date_chart[key][1])  
    
        similar_dates = random_aug_saved[key] 
        for dt in similar_dates:
            past_input.append(date_chart[dt][0]) 
            future_input.append(date_chart[dt][1]) 
        
        past_input = np.array(past_input) 
        future_input = np.array(future_input)  
        
        past_input = past_input.T 
        future_input = future_input.T 
        
        past_cont_inputs.append(past_input) 
        future_cont_inputs.append(future_input) 
        
        past_date = [[key.month, key.day, key.hour]] 
        for i in range(23): 
            curkey = key + timedelta(hours=1) 
            past_date.append([curkey.month, curkey.day, curkey.hour]) 
        past_date = np.array(past_date) 
        past_dates.append(past_date) 
        
        future_date = [] 
        for i in range(6): 
            curkey = curkey + timedelta(hours=1) 
            future_date.append([curkey.month, curkey.day, curkey.hour]) 
        future_date = np.array(future_date) 
        future_dates.append(future_date) 


  0%|          | 0/47316 [00:00<?, ?it/s]

In [7]:
past_cont_inputs = np.array(past_cont_inputs)
past_dates = np.array(past_dates) 
future_cont_inputs = np.array(future_cont_inputs)  
future_dates = np.array(future_dates) 
targets = np.array(targets) 

past_cont_inputs.shape, past_dates.shape, future_cont_inputs.shape, future_dates.shape, targets.shape

((46316, 24, 4), (46316, 24, 3), (46316, 6, 3), (46316, 6, 3), (46316, 6))

In [8]:
np.save("random_top3_past_cont_inputs", past_cont_inputs) 
np.save("random_top3_past_dates", past_dates) 
np.save("random_top3_future_cont_inputs", future_cont_inputs) 
np.save("random_top3_future_dates", future_dates) 
np.save("random_targets", targets) 