## BTC 24년 1월 데이터 추가

In [1]:
def convert_tick_to_ohlcv(data):
    """
    Converts given Binance tick data into 1-hour interval OHLCV (Open, High, Low, Close, Volume) data.
    :param data: DataFrame with Tick data
    :return: DataFrame with the Open, High, Low, Close, Volume values
    """

    data['time'] = pd.to_datetime(data['time'], unit='ms')
    ohlcv = data.resample('1H', on='time').agg({
        'price': ['first', 'max', 'min', 'last'],
        'qty': 'sum'
})

    ohlcv.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    return ohlcv

def calculate_volatility(data, window=20):
    """
    Calculate the rolling volatility using the standard deviation of returns.
    :param data: DataFrame with OHLCV data
    :param window: The number of periods to use for calculating the standard deviation
    :return: DataFrame with the volatility values
    """

    # Calculate daily returns
    data['returns'] = data['Close'].pct_change()

    # Calculate the rolling standard deviation of returns
    data['volatility'] = data['returns'].rolling(window=window).std()

    return data

#### 2024 1월 데이터 concat

In [None]:
import pandas as pd

PATH = '../data/BTCUSDT-trades-2024-01-'

months = [str(month).zfill(2) for month in range(1, 28)]
dfs = []

for month in months:
    df = pd.read_csv(f'{PATH}{month}.csv')  # Use f-string to include the month variable
    dfs.append(df)

# Concatenate the DataFrames into a single DataFrame
result = pd.concat(dfs, ignore_index=True)

result.head()
result.to_csv('../data/BTCUSDT-trades-2024-01.csv')


  df = pd.read_csv(f'{PATH}{month}.csv')  # Use f-string to include the month variable
  df = pd.read_csv(f'{PATH}{month}.csv')  # Use f-string to include the month variable


In [None]:
PATH = '../data/BTCUSDT-trades-2024-01-0'

months = [str(month).zfill(2) for month in range(1,4)]
dfs = []

for month in months:
    df = pd.read_csv(f'{PATH}{month}.csv')  # Use f-string to include the month variable
    dfs.append(df)

# Concatenate the DataFrames into a single DataFrame
result = pd.concat(dfs, ignore_index=True)

result.head()
result.to_csv('../data/BTCUSDT-trades-2024-01.csv')

  df = pd.read_csv(f'{PATH}{month}.csv')  # Use f-string to include the month variable


In [13]:
import pandas as pd
df = pd.read_csv('../data/BTCUSDT-trades-2024-01.csv')
df.drop(columns = 'Unnamed: 0',inplace = True)

In [14]:
dfc = df.copy()

In [15]:
df.head()

Unnamed: 0,id,price,qty,quote_qty,time,is_buyer_maker
0,4426785098,42314.0,0.033,1396.362,1704067000000.0,False
1,4426785099,42314.0,0.215,9097.51,1704067000000.0,False
2,4426785100,42314.0,0.1,4231.4,1704067000000.0,False
3,4426785101,42314.0,0.512,21664.768,1704067000000.0,False
4,4426785102,42314.0,0.007,296.198,1704067000000.0,False


In [17]:
df1 = convert_tick_to_ohlcv(df)

In [18]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 30 entries, 2024-01-01 00:00:00 to 2024-01-02 05:00:00
Freq: H
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    30 non-null     float64
 1   High    30 non-null     float64
 2   Low     30 non-null     float64
 3   Close   30 non-null     float64
 4   Volume  30 non-null     float64
dtypes: float64(5)
memory usage: 1.4 KB


In [26]:
calculate_volatility(df1, window=20)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,returns,volatility
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1970-01-01 00:00:00,43756.7,46412.7,43756.7,46412.7,0.096,,
1970-01-01 01:00:00,,,,,0.000,0.000000,
1970-01-01 02:00:00,,,,,0.000,0.000000,
1970-01-01 03:00:00,,,,,0.000,0.000000,
1970-01-01 04:00:00,,,,,0.000,0.000000,
...,...,...,...,...,...,...,...
2024-01-27 19:00:00,41785.0,41977.0,41775.1,41896.4,7765.565,0.002668,0.003597
2024-01-27 20:00:00,41896.5,42070.0,41896.4,42049.6,5408.243,0.003657,0.002154
2024-01-27 21:00:00,42049.6,42165.6,42001.8,42137.8,5859.277,0.002098,0.002189
2024-01-27 22:00:00,42137.8,42187.1,42057.6,42135.3,5271.863,-0.000059,0.002191


In [17]:
df1

Unnamed: 0_level_0,Open,High,Low,Close,Volume,returns,volatility
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1970-01-01 00:00:00,43756.7,46412.7,43756.7,46412.7,0.096,,
1970-01-01 01:00:00,,,,,0.000,0.000000,
1970-01-01 02:00:00,,,,,0.000,0.000000,
1970-01-01 03:00:00,,,,,0.000,0.000000,
1970-01-01 04:00:00,,,,,0.000,0.000000,
...,...,...,...,...,...,...,...
2024-01-27 19:00:00,41785.0,41977.0,41775.1,41896.4,7765.565,0.002668,0.003597
2024-01-27 20:00:00,41896.5,42070.0,41896.4,42049.6,5408.243,0.003657,0.002154
2024-01-27 21:00:00,42049.6,42165.6,42001.8,42137.8,5859.277,0.002098,0.002189
2024-01-27 22:00:00,42137.8,42187.1,42057.6,42135.3,5271.863,-0.000059,0.002191


In [14]:
def convert_tick_to_pqi(data):

    data['time'] = pd.to_datetime(data['time'], unit='ms')
    pqi = data.resample('1H', on='time').agg({
    'price': 'mean',
    'quote_qty': 'std',
    'is_buyer_maker': 'sum'})
    
    pqi.columns = ['price','quote_qty','is_buyer_maker']
    return pqi


In [16]:
df2 = convert_tick_to_pqi(data)

TypeError: unsupported operand type(s) for +: 'int' and 'str'