## Imports

In [101]:
import pandas as pd

## Read data

In [102]:
filename = 'price_0x88e6A0c2dDD26FEEb64F039a2c41296FcB3f5640_17846003_17946003_2400'
filepath = f'data/{filename}.csv'

In [103]:
df = pd.read_csv(filepath)
df.head()

Unnamed: 0,block_number,timestamp,sqrt_price_x96,price
0,17846003,1691205083,1853108454851453304155515201922274,0.000547
1,17848403,1691234099,1851322486752172637560000266968502,0.000546
2,17850803,1691263055,1849161700064716265427670227259749,0.000545
3,17853203,1691292083,1850319883285144859934317017619672,0.000545
4,17855603,1691321123,1852406778307200559820017252423304,0.000547


## Create OHLC data

In [104]:
def create_ohlc(
    data: pd.DataFrame,
    freq: str,
    time_col: str = 'timestamp',
    price_col: str = 'price'
) -> pd.DataFrame:
    """
    Resample df to create OHLC data
    
    Parameters:
    - data (pd.DataFrame): Input data frame
    - freq (str): Frequency for resampling (e.g., 'H' for hourly, 'D' for daily, 'M' for monthly, etc.)
    - time_col (str): Name of column with timestamp data
    - price_col (str): Name of column with price data

    Returns:
    - pd.DataFrame: OHLC data.
    """
    
    # Take subset
    data = data.loc[:, [time_col, price_col]]
    data[time_col] = pd.to_numeric(data.loc[:, time_col])
    data[price_col] = pd.to_numeric(data.loc[:, price_col])
    # Convert UNIX timestamps to datetime and set as index
    data[time_col] = pd.to_datetime(data.loc[:, time_col], unit='s')
    data.set_index(time_col, inplace=True)
    
    # Resample and calculate OHLC data
    ohlc_df = data.resample(freq).ohlc()
    ohlc_df = ohlc_df.loc[:, price_col]
    
    return ohlc_df

In [105]:
frequency = 'D'
df_ohlc = create_ohlc(df, frequency)

In [106]:
df_ohlc.head()

Unnamed: 0_level_0,open,high,low,close
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-08-05,0.000547,0.000547,0.000545,0.000545
2023-08-06,0.000545,0.000547,0.000545,0.000546
2023-08-07,0.000544,0.00055,0.000544,0.00055
2023-08-08,0.000547,0.000547,0.000538,0.000538
2023-08-09,0.000539,0.000541,0.000537,0.000541


## Save data

In [107]:
df_ohlc.to_csv(f'data/{filename}_{frequency}.csv')