In [None]:
import sys
sys.path.insert(0, sys.path[0].removesuffix('/src/crypto'))
print(sys.path)

In [None]:
import pandas as pd
from datetime import datetime
from pycaret.time_series import *
from pycaret import show_versions
import os
from src.calcEMA import calc_RSI

show_versions()

In [None]:
# Variables
datadir = './data'
label = 'close'
use_cols = ['open_time', 'close']

### Metadata

<code>
Field Name - Description</br>
open_time - Kline Open time in unix time format</br>
open - Open Price</br>
high - High Price</br>
low	- Low Price</br>
close	- Close Price</br>
volume - Volume</br>
close_time - Kline Close time in unix time format</br>
quote_volume - Quote Asset Volume</br>
count	- Number of Trades</br>
taker_buy_volume - Taker buy base asset volume during this period</br>
taker_buy_quote_volume - Taker buy quote asset volume during this period</br>
ignore - Ignore</br>
</code>

In [None]:
def date_parser(x):
  return pd.to_datetime(x, unit='ms')

def read_data(dir, use_cols = ['open_time', 'close'] ):
  filenames = []

  for file in os.listdir(dir):
      if file.endswith(".csv"):
          filenames.append(os.path.join(dir, file))

  all_cols = ['open_time',	'open',	'high',	'low',	'close',	'volume', 'close_time', 'quote_volume', 'count', 'taker_buy_volume', 'taker_buy_quote_volume', 'ignore']   
  parse_dates = ['open_time']

  dataframes = []

  for filename in filenames:
    df = pd.read_csv(filename, names=all_cols, parse_dates=parse_dates, date_parser=date_parser, sep=',', decimal='.', usecols=use_cols)
    dataframes.append(df)

  # Concatenate all DataFrames into a single DataFrame
  combined_df = pd.concat(dataframes, ignore_index=True)
  combined_df.sort_values(['open_time'], inplace=True)
  combined_df['symbol'] = 'BTCUSDT'  
  combined_df.index = combined_df['open_time']
  combined_df.drop(columns=['open_time'], inplace=True)
  return combined_df

train_data = read_data(datadir, use_cols)
train_data = calc_RSI(train_data, label)
train_data.drop(columns=['symbol'], inplace=True)
train_data.dropna(inplace=True)
train_data = train_data.asfreq('H')
train_data['close'] = train_data['close'].ffill()
train_data['rsi'] = train_data['rsi'].ffill()
print(train_data.info())
print(train_data.shape)
train_data

In [None]:
train_data.tail(20)

In [None]:
categorial_features = ['symbol']
#numeric_features = ['open',	'high',	'low','volume', 'close', 'rsi' ]
numeric_features = use_cols + ['rsi']
date_features = ['open_time']

exp_name = setup(data = train_data,
                 target = label,
                 fold = 3,
                 session_id = 123,
                 n_jobs = -1,
                 )

In [None]:
#best = compare_models(sort = 'MAE') # 
best = 'huber_cds_dt' #'arima' # 'naive' 'auto_arima' 'rf_cds_dt' 'huber_cds_dt'

In [None]:
model = create_model(best)

In [None]:
prediction_holdout = predict_model(model, fh=720)
prediction_holdout.head(5)

In [None]:
prediction_holdout.index = pd.DatetimeIndex(prediction_holdout.index.to_timestamp())

In [None]:
test_dir = './test'
test_data = read_data(test_dir, use_cols=use_cols)
test_data['original_label'] = test_data[label]
test_data = calc_RSI(test_data, label)
test_data.dropna(inplace=True)
#print(test_data.info())
test_data.head()

In [None]:
test_data['prediction_label'] = prediction_holdout['y_pred']

In [None]:
import plotly.express as px

start_date = test_data.index.min().strftime("%Y-%m-%d")
end_date = test_data.index.max().strftime("%Y-%m-%d")
now = datetime.now().strftime("%Y-%m-%d")

filtered_date = test_data.loc[(test_data.index >= start_date) & (test_data.index <= end_date)]

fig1 = px.line(
    test_data, x=test_data.index, y=['original_label', 'prediction_label'], template = 'plotly_dark', 
    range_x=[start_date, end_date], title=f'Data: {end_date} \nPredição: {now}')
fig1.show()

In [None]:
test_data['close'].plot()