In [1]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from okx.api import Account
from okx.api import Trade
from okx.app import OkxSPOT
from okx_api import Market as Market_api
from datetime import datetime
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import yfinance as yf
import os
import time
import math


from dotenv import load_dotenv
load_dotenv()
import warnings
warnings.filterwarnings("ignore")

In [2]:
market = Market_api(key = '', secret = '', passphrase = '', flag = '0')
okxSPOT = OkxSPOT(
    key = "",
    secret = "",
    passphrase = "",
)

## Add 2024 data

In [3]:
full_data = pd.DataFrame(columns = ['open', 'high', 'low', 'close'])

def History_finder(y, m, full_data):
    
    month = f"{y}-{m}-1"
    time_before = datetime.strptime(month, "%Y-%m-%d").timestamp()
    
    if y == 2023:
        
        time_after = datetime.strptime(date_string_after[m - 1], "%Y-%m-%d").timestamp()
        
    else:
        
        time_after = datetime.strptime('2024-1-31', "%Y-%m-%d").timestamp()
        
    result = market.get_history_candles(
        instId =  'BTC-USDT',
        before = str(round(time_before * 1000)), 
        after = str(round(time_after * 1000)), 
        bar = '1D'
    )
    
    data = pd.DataFrame(result['data'], columns = columns)
    data['date'] = pd.to_datetime(data['timestamp'], unit = 'ms')
    data.sort_values(by = 'date', inplace = True)
    data.set_index('date', inplace = True)
    data[['open', 'high', 'low', 'close']] = data[['open', 'high', 'low', 'close']].apply(pd.to_numeric)
    data.drop(['volume', 'timestamp', 'confirm', 'volCcyQuote', 'volCcy'], axis = 1, inplace = True)
    full_data = pd.concat([full_data, data])
    
    return full_data

In [4]:
columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'volCcy', 'volCcyQuote', 'confirm']
date_string_after = ['2023-1-31','2023-2-28','2023-3-31','2023-4-30','2023-5-31','2023-6-30',
                     '2023-7-31','2023-8-31','2023-9-30','2023-10-31','2023-11-30','2023-12-31']
   
for m in range(8, 13):

    full_data = History_finder(2023, m, full_data)

full_data = History_finder(2024, 1, full_data)

print(full_data)

                            open     high      low    close
2023-08-01 16:00:08.192  28951.7  30039.6  28862.1  29324.0
2023-08-02 15:59:44.640  29324.0  29430.4  28923.8  29258.6
2023-08-03 15:59:21.088  29258.6  29371.3  29100.0  29272.1
2023-08-04 15:58:57.536  29272.0  29285.3  28802.2  29039.9
2023-08-05 16:00:45.056  29040.0  29137.3  28989.6  29009.0
...                          ...      ...      ...      ...
2024-01-25 16:00:33.792  39801.1  41588.8  39538.9  41418.1
2024-01-26 16:00:10.240  41418.1  42249.9  41380.4  41820.5
2024-01-27 15:59:46.688  41822.0  42838.2  41741.9  42260.2
2024-01-28 15:59:23.136  42260.3  42648.6  41617.2  42641.6
2024-01-29 15:58:59.584  42641.5  43884.4  42520.0  43377.1

[172 rows x 4 columns]


## Grid search for the best param

In [6]:
def arima_AIC(data, p = 4, d = 4, q = 4):
    
    start_time = time.time()
    
    # MSE
    period = 1
    L = len(data)
    train = data[ : (L - period)]
    test = data[ - period: ]
    mse_r = []
    # AIC
    best_pdq =["AIC_pdq", 10000, "MSE", 10000]
    AIC = []
    
    for i in range(p): # AR
        
        for j in range(1, d): # I
            
            for k in range(q): # MA
                
                model = sm.tsa.arima.ARIMA(data, order = (i,j,k))
                fitted = model.fit()
                # MSE
                forecast = fitted.forecast(step = period, alpha = 0.05)
                mse = mean_squared_error(test, forecast)
                mse_r.append(mse)
                # AIC
                AIC.append(fitted.aic)
                
                if fitted.aic < best_pdq[1]:
                    
                    best_pdq[0] = (i, j, k)
                    best_pdq[1] = fitted.aic
                    
                if mse < best_pdq[1]:
                    
                    best_pdq[2] = (i, j, k)
                    best_pdq[3] = mse
                    
    end_time = time.time()
    print(f"used time : {end_time - start_time}")
    
    return best_pdq[0], best_pdq[2]

best_pdq_AIC, best_pdq_MSE = arima_AIC(full_data['open'], 4, 4, 4)

used time : 4.608656644821167


In [7]:
model = sm.tsa.ARIMA(full_data['open'], order = best_pdq_MSE)
fitted = model.fit()
number_of_steps = 10
forecast = fitted.forecast(steps = number_of_steps)

ten_days_after = forecast.values[9]
one_days_after = forecast.values[0]

percentage_change = ((ten_days_after - one_days_after) / one_days_after) * 100

print(f"forecast price changing after ten days : {round(percentage_change, 2)}%")

forecast price changing after ten days : 1.09%
