# Frequentist

In [3]:
import numpy as np 
import pandas as pd 
import json 
import ccxt 
import seaborn as sns
import os 
import pandas_ta as ta 
import time
from datetime import datetime, timedelta
import math
from tqdm.auto import tqdm 
import matplotlib.pyplot as plt 
from transformers import * 
import torch 
from torch import Tensor 
from torch.utils.data import * 
import torch.nn as nn 
import torch.nn.functional as F 
from sklearn.utils.class_weight import compute_class_weight 
from sklearn.metrics import f1_score
from imblearn.under_sampling import RandomUnderSampler
from pytorch_metric_learning import miners, losses
from pytorch_metric_learning.distances import CosineSimilarity
from scipy.spatial.distance import cdist 
import pickle

with open("BTC_USDT-1h-12.json") as f: 
        d = json.load(f) 

chart_df = pd.DataFrame(d) 
chart_df = chart_df.rename(columns={0:"timestamp", 1:"open", 2:"high", 3:"low", 4:"close", 5:"volume"})

def process(df): 
        binance = ccxt.binance() 
        dates = df["timestamp"].values 
        timestamp = [] 
        for i in range(len(dates)):
                date_string = binance.iso8601(int(dates[i])) 
                date_string = date_string[:10] + " " + date_string[11:-5] 
                timestamp.append(date_string) 
        df["datetime"] = timestamp
        df = df.drop(columns={"timestamp"}) 
        return df 

chart_df = process(chart_df) 

hours, days, months, years = [],[],[],[] 
for dt in tqdm(chart_df["datetime"]):
        dtobj = pd.to_datetime(dt) 
        hour = dtobj.hour 
        day = dtobj.day 
        month = dtobj.month 
        year = dtobj.year 
        hours.append(hour) 
        days.append(day) 
        months.append(month) 
        years.append(year) 

chart_df["hours"] = hours 
chart_df["days"] = days  
chart_df["months"] = months 
chart_df["years"] = years 

close = chart_df["close"].values 
datetimes = chart_df["datetime"].values 

lookback_window = 24
forecast_window = 6 

date_chart_df = {} 

for i in tqdm(range(lookback_window, len(datetimes)-forecast_window)): 
    dtobj = datetime.strptime(str(datetimes[i]), "%Y-%m-%d %H:%M:%S")  
    date_chart_df[dtobj] = (close[i-lookback_window+1:i+1], close[i+1:i+1+forecast_window])
    
with open("similarity_dict_top_10.pkl", "rb") as f: 
    similarity_dict = pickle.load(f) 
    

  0%|          | 0/47346 [00:00<?, ?it/s]

  0%|          | 0/47316 [00:00<?, ?it/s]

In [9]:
action, full_gt = [], [] 

for key, value in similarity_dict.items(): 
    query_past, query_future = date_chart_df[key]
    
    query_last = query_past[-1] 
    gt_actions = []
    for j in range(len(query_future)): 
        ret = query_future[j] / query_last 
        if ret <= 0.99: 
            gt_actions.append(1) 
        elif ret >= 1.01: 
            gt_actions.append(0) 
    if len(gt_actions) == 0: 
        gt_actions.append(2)  
        
    full_gt.append(gt_actions) 
    
    votes = {"long":0, "short":0, "hold":0} 
    for i in range(len(value)): 
        candidate_past, candidate_future = date_chart_df[value[i]] 
        last = candidate_past[-1] 
        ishold = True 
        for j in range(len(candidate_future)): 
            ret = candidate_future[j] / last 
            if ret <= 0.99: 
                votes["short"] += 1 
                ishold = False  
            elif ret >= 1.01: 
                votes["long"] += 1 
                ishold = False 
        if ishold == True: 
            votes["hold"] += 1
    if votes["hold"] > max(votes["long"], votes["short"]): 
        action.append(2) 
    else: 
        if votes["long"] > votes["short"]: 
            action.append(0) 
        elif votes["short"] > votes["long"]: 
            action.append(1) 
        elif votes["short"] == votes["long"]: 
            if (query_past[-1] - query_past[-2])/query_past[-2] < 0: 
                action.append(0)
            else: 
                action.append(1) 

In [13]:
cnt = 0 
for i in range(len(full_gt)): 
    if action[i] in full_gt[i]:
        cnt += 1
        
cnt / len(full_gt) * 100 

45.285748734024594

# Random

In [37]:
import random

all_acc = [] 

for k in tqdm(range(1000)): 
    action, full_gt = [], [] 

    for key, value in similarity_dict.items(): 
        query_past, query_future = date_chart_df[key]

        query_last = query_past[-1] 
        gt_actions = []
        for j in range(len(query_future)): 
            ret = query_future[j] / query_last 
            if ret <= 0.99: 
                gt_actions.append(1) 
            elif ret >= 1.01: 
                gt_actions.append(0) 
        if len(gt_actions) == 0: 
            gt_actions.append(2)  

        full_gt.append(gt_actions) 

        randint = random.randint(0, 2) 
        action.append(randint) 
    
    cnt = 0 
    for i in range(len(full_gt)): 
        if action[i] in full_gt[i]:
            cnt += 1

    acc = cnt / len(full_gt) * 100  
    
    all_acc.append(acc) 

  0%|          | 0/1000 [00:00<?, ?it/s]

In [38]:
np.mean(all_acc)

36.109549071618034