## BTC 24년 1월 데이터 추가

In [5]:
def convert_tick_to_ohlcv(data):
    """
    Converts given Binance tick data into 1-hour interval OHLCV (Open, High, Low, Close, Volume) data.
    :param data: DataFrame with Tick data
    :return: DataFrame with the Open, High, Low, Close, Volume values
    """

    data['time'] = pd.to_datetime(data['time'], unit='ms')
    ohlcv = data.resample('1H', on='time').agg({
        'price': ['first', 'max', 'min', 'last'],
        'qty': 'sum'
})

    ohlcv.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    return ohlcv

def calculate_volatility(data, window=20):
    """
    Calculate the rolling volatility using the standard deviation of returns.
    :param data: DataFrame with OHLCV data
    :param window: The number of periods to use for calculating the standard deviation
    :return: DataFrame with the volatility values
    """

    # Calculate daily returns
    data['returns'] = data['Close'].pct_change()

    # Calculate the rolling standard deviation of returns
    data['volatility'] = data['returns'].rolling(window=window).std()

    return data

#### 2024 1월 데이터 concat

In [None]:
import pandas as pd

PATH = '../data/BTCUSDT-trades-2024-01-'

months = [str(month).zfill(2) for month in range(1, 28)]
dfs = []

for month in months:
    df = pd.read_csv(f'{PATH}{month}.csv')  # Use f-string to include the month variable
    dfs.append(df)

# Concatenate the DataFrames into a single DataFrame
result = pd.concat(dfs, ignore_index=True)

result.head()
result.to_csv('../data/BTCUSDT-trades-2024-01.csv')


  df = pd.read_csv(f'{PATH}{month}.csv')  # Use f-string to include the month variable
  df = pd.read_csv(f'{PATH}{month}.csv')  # Use f-string to include the month variable


In [None]:
## 임시로 나눠서 처리

In [2]:
# import pandas as pd
# PATH = '../data/BTCUSDT-trades-2024-01-0'

# months = [str(month).zfill(2) for month in range(1,4)]
# dfs = []

# for month in months:
#     df = pd.read_csv(f'{PATH}{month}.csv')  # Use f-string to include the month variable
#     dfs.append(df)

# # Concatenate the DataFrames into a single DataFrame
# result = pd.concat(dfs, ignore_index=True)

# result.head()
# result.to_csv('../data/BTCUSDT-trades-2024-01.csv')

  df = pd.read_csv(f'{PATH}{month}.csv')  # Use f-string to include the month variable


In [1]:
import pandas as pd
df = pd.read_csv('../data/BTCUSDT-trades-2024-01.csv')
df.drop(columns = 'Unnamed: 0',inplace = True)

  df = pd.read_csv('../data/BTCUSDT-trades-2024-01.csv')


In [7]:
df_1 = convert_tick_to_ohlcv(df)

In [18]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 30 entries, 2024-01-01 00:00:00 to 2024-01-02 05:00:00
Freq: H
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    30 non-null     float64
 1   High    30 non-null     float64
 2   Low     30 non-null     float64
 3   Close   30 non-null     float64
 4   Volume  30 non-null     float64
dtypes: float64(5)
memory usage: 1.4 KB


In [10]:
df_2 = calculate_volatility(df_1, window=20)

In [11]:
df_2

Unnamed: 0_level_0,Open,High,Low,Close,Volume,returns,volatility
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1970-01-01 00:00:00,41837.8,41837.8,41837.8,41837.8,0.160,,
1970-01-01 01:00:00,,,,,0.000,0.000000,
1970-01-01 02:00:00,,,,,0.000,0.000000,
1970-01-01 03:00:00,,,,,0.000,0.000000,
1970-01-01 04:00:00,,,,,0.000,0.000000,
...,...,...,...,...,...,...,...
2024-01-26 22:00:00,41976.9,41994.7,41808.9,41887.6,7326.679,-0.002127,0.006608
2024-01-26 23:00:00,41887.7,41963.7,41779.2,41806.4,4860.486,-0.001939,0.006610
2024-01-27 00:00:00,41806.4,41849.9,41681.8,41772.8,6391.058,-0.000804,0.006643
2024-01-27 01:00:00,41772.8,41924.0,41754.2,41804.9,4578.196,0.000768,0.006611


In [12]:
df_3 = df_2['2024':]

In [14]:
def convert_tick_to_pqi(data):

    data['time'] = pd.to_datetime(data['time'], unit='ms')
    pqi = data.resample('1H', on='time').agg({
    'price': 'mean',
    'quote_qty': 'std',
    'is_buyer_maker': 'sum'})
    
    pqi.columns = ['price','quote_qty','is_buyer_maker']
    return pqi


In [15]:
df_4 = convert_tick_to_pqi(df)

In [16]:
df_5 = df_4['2024':]

In [17]:
df_5

Unnamed: 0_level_0,price,quote_qty,is_buyer_maker
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-01 00:00:00,42462.482375,16968.721894,41778
2024-01-01 01:00:00,42665.175592,14518.515803,40965
2024-01-01 02:00:00,42600.060137,11961.584249,28065
2024-01-01 03:00:00,42431.296439,13903.234752,48188
2024-01-01 04:00:00,42345.917605,17903.325237,31386
...,...,...,...
2024-01-26 22:00:00,41887.660978,17410.642021,47854
2024-01-26 23:00:00,41862.772698,13003.694973,31809
2024-01-27 00:00:00,41773.334298,11312.201136,48735
2024-01-27 01:00:00,41835.167178,10297.630180,30310


In [19]:
df_3

Unnamed: 0_level_0,Open,High,Low,Close,Volume,returns,volatility
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-01-01 00:00:00,42314.0,42603.2,42289.6,42503.5,8459.477,0.015911,0.003558
2024-01-01 01:00:00,42503.5,42832.0,42462.0,42647.9,9043.411,0.003397,0.003599
2024-01-01 02:00:00,42647.9,42676.9,42530.0,42620.4,4653.067,-0.000645,0.003611
2024-01-01 03:00:00,42620.5,42630.0,42270.0,42369.8,8119.880,-0.005880,0.003917
2024-01-01 04:00:00,42369.8,42439.8,42235.2,42436.6,6356.536,0.001577,0.003919
...,...,...,...,...,...,...,...
2024-01-26 22:00:00,41976.9,41994.7,41808.9,41887.6,7326.679,-0.002127,0.006608
2024-01-26 23:00:00,41887.7,41963.7,41779.2,41806.4,4860.486,-0.001939,0.006610
2024-01-27 00:00:00,41806.4,41849.9,41681.8,41772.8,6391.058,-0.000804,0.006643
2024-01-27 01:00:00,41772.8,41924.0,41754.2,41804.9,4578.196,0.000768,0.006611


In [21]:
concatenated_df = pd.concat([df_3, df_5], axis=1)

In [22]:
concatenated_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,returns,volatility,price,quote_qty,is_buyer_maker
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-01-01 00:00:00,42314.0,42603.2,42289.6,42503.5,8459.477,0.015911,0.003558,42462.482375,16968.721894,41778
2024-01-01 01:00:00,42503.5,42832.0,42462.0,42647.9,9043.411,0.003397,0.003599,42665.175592,14518.515803,40965
2024-01-01 02:00:00,42647.9,42676.9,42530.0,42620.4,4653.067,-0.000645,0.003611,42600.060137,11961.584249,28065
2024-01-01 03:00:00,42620.5,42630.0,42270.0,42369.8,8119.880,-0.005880,0.003917,42431.296439,13903.234752,48188
2024-01-01 04:00:00,42369.8,42439.8,42235.2,42436.6,6356.536,0.001577,0.003919,42345.917605,17903.325237,31386
...,...,...,...,...,...,...,...,...,...,...
2024-01-26 22:00:00,41976.9,41994.7,41808.9,41887.6,7326.679,-0.002127,0.006608,41887.660978,17410.642021,47854
2024-01-26 23:00:00,41887.7,41963.7,41779.2,41806.4,4860.486,-0.001939,0.006610,41862.772698,13003.694973,31809
2024-01-27 00:00:00,41806.4,41849.9,41681.8,41772.8,6391.058,-0.000804,0.006643,41773.334298,11312.201136,48735
2024-01-27 01:00:00,41772.8,41924.0,41754.2,41804.9,4578.196,0.000768,0.006611,41835.167178,10297.630180,30310


In [25]:
concatenated_df['quote_qty_cal'] = concatenated_df['Volume']*concatenated_df['price']

In [23]:
df = pd.read_csv('../data/BIT_2023_1차.csv')
df.set_index('time', inplace=True)
df.index=pd.to_datetime(df.index)
dfc=df.copy()

In [24]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,returns,volatility,price,quote_qty,is_buyer_maker,quote_qty_cal
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-01 00:00:00,16537.5,16540.9,16504.0,16527.0,5381.399,,,16524.525741,7264.129209,16494,8.892507e+07
2023-01-01 01:00:00,16527.1,16554.3,16524.1,16550.4,3210.826,0.001416,,16537.215985,6819.889969,8705,5.309812e+07
2023-01-01 02:00:00,16550.5,16557.1,16534.8,16542.4,2399.668,-0.000483,,16545.818136,6030.420093,8468,3.970447e+07
2023-01-01 03:00:00,16542.5,16542.5,16515.0,16529.3,3214.480,-0.000792,,16525.582085,7003.352799,10007,5.312115e+07
2023-01-01 04:00:00,16529.2,16530.4,16508.8,16517.8,3150.954,-0.000696,,16518.286421,7378.899423,9476,5.204836e+07
...,...,...,...,...,...,...,...,...,...,...,...
2023-12-31 19:00:00,42701.7,42741.9,42624.7,42659.9,3944.096,-0.000977,0.003040,42679.290530,14458.392200,23305,1.683312e+08
2023-12-31 20:00:00,42659.9,42724.5,42543.3,42599.1,4730.936,-0.001425,0.002863,42625.853600,12326.119420,28014,2.016602e+08
2023-12-31 21:00:00,42599.2,42717.0,42558.2,42558.9,3794.010,-0.000944,0.002673,42634.523520,13367.479780,21732,1.617558e+08
2023-12-31 22:00:00,42559.0,42629.5,42111.9,42294.8,11952.346,-0.006206,0.003008,42405.779790,19111.648040,67159,5.068486e+08


In [27]:
final = pd.concat([df,concatenated_df], axis = 0)

In [28]:
final

Unnamed: 0_level_0,Open,High,Low,Close,Volume,returns,volatility,price,quote_qty,is_buyer_maker,quote_qty_cal
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-01 00:00:00,16537.5,16540.9,16504.0,16527.0,5381.399,,,16524.525741,7264.129209,16494,8.892507e+07
2023-01-01 01:00:00,16527.1,16554.3,16524.1,16550.4,3210.826,0.001416,,16537.215985,6819.889969,8705,5.309812e+07
2023-01-01 02:00:00,16550.5,16557.1,16534.8,16542.4,2399.668,-0.000483,,16545.818136,6030.420093,8468,3.970447e+07
2023-01-01 03:00:00,16542.5,16542.5,16515.0,16529.3,3214.480,-0.000792,,16525.582085,7003.352799,10007,5.312115e+07
2023-01-01 04:00:00,16529.2,16530.4,16508.8,16517.8,3150.954,-0.000696,,16518.286421,7378.899423,9476,5.204836e+07
...,...,...,...,...,...,...,...,...,...,...,...
2024-01-26 22:00:00,41976.9,41994.7,41808.9,41887.6,7326.679,-0.002127,0.006608,41887.660978,17410.642021,47854,3.068974e+08
2024-01-26 23:00:00,41887.7,41963.7,41779.2,41806.4,4860.486,-0.001939,0.006610,41862.772698,13003.694973,31809,2.034734e+08
2024-01-27 00:00:00,41806.4,41849.9,41681.8,41772.8,6391.058,-0.000804,0.006643,41773.334298,11312.201136,48735,2.669758e+08
2024-01-27 01:00:00,41772.8,41924.0,41754.2,41804.9,4578.196,0.000768,0.006611,41835.167178,10297.630180,30310,1.915296e+08


In [29]:
final.to_csv('../data/BTC_2324.csv',encoding = 'utf-8-sig')