In [28]:
import pandas as pd

In [29]:
def filter_zero_volumes(df, symbol):
    """
    Filters out consecutive zero volume rows in a dataframe, keeping only the first zero value in each sequence of zeros.
    Args:
        df: A time series of prices and volumes.
        symbol: The trading pair symbol.
    """
    # Ensure 'timestamp' is a datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Identify rows with zero volume
    df['is_zero_volume'] = df['volume'] == 0

    # Mark the first zero volume in consecutive zeros
    df['is_first_zero'] = df['is_zero_volume'] & ~df['is_zero_volume'].shift(1, fill_value=False)

    # Keep zero volume row only if it's the first zero in a sequence of zeros
    df_filtered = df[df['is_first_zero'] | ~df['is_zero_volume']]

    # Drop helper columns
    df_filtered = df_filtered.drop(columns=['is_zero_volume', 'is_first_zero'])

    # Save to CSV
    df_filtered.to_csv(f"./data/{symbol}_5min.csv", index=False)

    return

In [30]:
symbols = ['ASML', 'BRENT', 'EURUSD', 'EUS', 'JPM', 'NFLX', 'NOVOB', 'SPY', 'USDJPY', 'XOM', 'HKG', 'ADS', 'VOW3', 'XAUUSD', 'BTC', 'ETH']
for symbol in symbols:
    df = pd.read_csv(f"./data/{symbol}_5min.csv")
    filter_zero_volumes(df, symbol)

In [31]:
def aggregate_to_daily(df, symbol):
    """
    Aggregates 5-minute data to daily data.
    Args:
        df: A time series of prices and volumes.
        symbol: The trading pair symbol.
    """
    # Ensure 'timestamp' is a datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Aggregate by day
    df_daily = df.resample('D', on='timestamp').agg({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last',
        'volume': 'sum'
    })

    # Drop rows with missing values
    df_daily = df_daily.dropna()

    # Save to CSV
    df_daily.to_csv(f"./data/{symbol}_daily.csv")
    
    return


In [32]:
symbols = ['ASML', 'BRENT', 'EURUSD', 'EUS', 'JPM', 'NFLX', 'NOVOB', 'SPY', 'USDJPY', 'XOM', 'HKG', 'ADS', 'VOW3', 'XAUUSD', 'BTC', 'ETH']
for symbol in symbols:
    df = pd.read_csv(f"./data/{symbol}_5min.csv")
    aggregate_to_daily(df, symbol)