In [1]:
import pandas as pd
from datetime import datetime, timezone, timedelta
import calendar

def get_klines_iter(symbol, interval, start, end = None, limit=1000):
    # start and end must be isoformat YYYY-MM-DD
    # We are using utc time zone

    df = pd.DataFrame()

    if start is None:
        print('start time must not be None')
        return
    start = calendar.timegm(datetime.fromisoformat(start).timetuple()) * 1000

    if end is None:
        dt = datetime.now(timezone.utc)
        utc_time = dt.replace(tzinfo=timezone.utc)
        end = int(utc_time.timestamp()) * 1000
        return
    else:
        end = calendar.timegm(datetime.fromisoformat(end).timetuple()) * 1000
    last_time = None

    while len(df) == 0 or (last_time is not None and last_time < end):
        url = 'https://api.binance.com/api/v3/klines?symbol=' + \
              symbol + '&interval=' + interval + '&limit=1000'
        if(len(df) == 0):
            url += '&startTime=' + str(start)
        else:
            url += '&startTime=' + str(last_time)

        url += '&endTime=' + str(end)
        df2 = pd.read_json(url)
        df2.columns = ['Opentime', 'Open', 'High', 'Low', 'Close', 'Volume', 'Closetime',
                       'Quote asset volume', 'Number of trades', 'Taker by base', 'Taker buy quote', 'Ignore']
        dftmp = pd.DataFrame()
        dftmp = pd.concat([df2, dftmp], axis=0, ignore_index=True, keys=None)

        dftmp.Opentime = pd.to_datetime(dftmp.Opentime, unit='ms')
        dftmp['Date'] = dftmp.Opentime.dt.strftime("%d/%m/%Y")
        dftmp['Time'] = dftmp.Opentime.dt.strftime("%H:%M:%S")
        dftmp = dftmp.drop(['Quote asset volume', 'Closetime', 'Opentime',
                      'Number of trades', 'Taker by base', 'Taker buy quote', 'Ignore'], axis=1)
        column_names = ["Date", "Time", "Open", "High", "Low", "Close", "Volume"]
        dftmp.reset_index(drop=True, inplace=True)
        dftmp = dftmp.reindex(columns=column_names)
        string_dt = str(dftmp['Date'][len(dftmp) - 1]) + 'T' + str(dftmp['Time'][len(dftmp) - 1]) + '.000Z'
        utc_last_time = datetime.strptime(string_dt, "%d/%m/%YT%H:%M:%S.%fZ")
        last_time = (utc_last_time - datetime(1970, 1, 1)) // timedelta(milliseconds=1)
        df = pd.concat([df, dftmp], axis=0, ignore_index=True, keys=None)
        # Drop the 'Time' column
        df = df.drop(columns=['Time'])
        df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')
        df = df.set_index('Date')
        df.to_csv('0y_eth_only17andnew.csv', index=True, header=True)
    return df

In [2]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.preprocessing import MinMaxScaler
from get_data import *
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import numpy as np
import random

In [3]:
# Lấy dữ liệu
df = get_klines_iter("BTCBUSD", '1d', '2021-04-27', '2023-04-27')

# Chia dữ liệu thành tập huấn luyện và tập kiểm tra
train_size = int(len(df) * 0.8)
train, test = df[:train_size], df[train_size:]

# Chuẩn hóa dữ liệu
scaler = MinMaxScaler(feature_range=(0, 1))
train_scaled = scaler.fit_transform(train)
test_scaled = scaler.transform(test)

# Hàm chuẩn bị dữ liệu cho mô hình LSTM
def create_dataset(dataset, look_back=1):
   dataX, dataY = [], []
   for i in range(len(dataset) - look_back):
      dataX.append(dataset[i:(i + look_back)])
      dataY.append(dataset[i + look_back, 3]) # Lấy giá trị Close
   return np.array(dataX), np.array(dataY)

def create_dataset_for_test(dataset, look_back=1):
   dataX = []
   dataX.append(dataset[len(dataset) - look_back:len(dataset)])
   return np.array(dataX)

look_back = 50
time_line = test[look_back:]
# Chuẩn bị dữ liệu cho LSTM

trainX, trainY = create_dataset(train_scaled, look_back)
testX, testY = create_dataset(test_scaled, look_back)
test_1 = create_dataset_for_test(test_scaled, look_back)

# Reshape dữ liệu để phù hợp với đầu vào của LSTM
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], trainX.shape[2]))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], testX.shape[2]))
test_1 = np.reshape(test_1, (test_1.shape[0], test_1.shape[1], test_1.shape[2]))

In [4]:
a = test.values

In [5]:
a[-1][3]

29103.4

In [6]:
test

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-12-02,16978.38,17106.55,16784.94,17093.88,129245.20465
2022-12-03,17093.53,17159.48,16861.53,16886.96,90065.33051
2022-12-04,16886.81,17208.60,16881.01,17109.11,102741.43485
2022-12-05,17108.73,17429.88,16869.24,16966.35,138198.94709
2022-12-06,16967.29,17106.31,16908.25,17089.12,135076.56100
...,...,...,...,...,...
2023-04-23,27823.86,27823.87,27335.00,27601.27,5902.23925
2023-04-24,27601.28,28022.74,26967.33,27516.67,11064.07423
2023-04-25,27516.66,28393.93,27203.38,28307.95,10406.20768
2023-04-26,28307.95,30035.64,27248.00,28424.86,27422.60027


In [7]:
testY[-1]

0.2575218616968402

In [8]:
a[-1][3]/testY[-1]

113013.31781400794

In [9]:
testY * a[-1][3]/testY[-1]

array([15324.68673678, 15161.28105766, 15615.00394188, 14995.86852733,
       15927.04749278, 15811.5008525 , 15957.54290586, 15845.87908642,
       17412.09558098, 15415.73670403, 16067.50526448, 17390.56555758,
       16862.15290627, 16733.82349627, 16506.85298617, 15655.79537426,
       15271.22160269, 16318.05627641, 15708.80242275, 13173.5603664 ,
       12797.95198451, 13314.3453421 , 13142.75956291, 13167.23442234,
       14061.46114905, 18687.99487043, 16921.02781321, 19226.09276023,
       19357.30156545, 18579.05775102, 19814.9290841 , 18962.60628338,
       18375.97313313, 17849.13106104, 16196.68540488, 16135.23649311,
       17002.06533797, 16865.07592869, 16103.10506001, 17174.61091557,
       16812.2215754 , 14384.58600885, 14365.62999027, 14550.54387919,
       14498.75839231, 14039.88749844, 12958.827286  , 10032.81639562,
        9702.34035254, 10564.32657815, 13792.39039055, 18388.34144445,
       19620.7662287 , 18779.30660088, 20246.00945131, 25432.9563755 ,
      