In [1]:
import pandas as pd
import numpy as np
base_path = 'data'

In [5]:
def convert(df, symbol):
    """
    Adds required columns to DataFrame
    """
    df['date'] = pd.to_datetime(df['ts'], unit='s')
    df.insert(0, 'date', df.pop('date'))
    df['tic'] = symbol
    df['day'] = df.date.dt.day_of_week
    df = df.drop('ts', axis=1)
    return df

def load(symbol):
    df = pd.read_csv(f"{base_path}/raw_{symbol}_1min.csv", names=['ts', 'open', 'high', 'low', 'close', 'volume'], header=None)    
    return df

def save(df, symbol, resolution):
    res_name = ""
    if resolution < 60:
        res_name = f"{resolution}min"
    elif resolution < 1440:
        res_name = f"{int(resolution/60)}h"
    else:
        res_name = f"{int(resolution/1440)}d"
    filename = f"{base_path}/{symbol}_{res_name}.csv"
    df.to_csv(filename)
    print(f"{filename} written")

def aggregate(df, resolution_minutes=1) -> pd.DataFrame:
    """
    Aggregates minute data into longer durations    
    """
    resolution_seconds = resolution_minutes * 60
    df['tsgroup'] = pd.DataFrame(df['ts'].div(resolution_seconds), dtype=int).multiply(resolution_seconds)
    agg_list = {'open': lambda x: x.iloc[0], 'high': np.max, 'low': np.min, 'close': lambda x: x.iloc[-1],
                'volume': np.sum, 'ts': lambda x: x.iloc[0]}
    groupped = df.groupby('tsgroup').agg(agg_list)
    # groupped['date'] = pd.to_datetime(groupped['ts'], unit='s')
    # groupped.index = groupped['date']
    df = groupped[['ts', 'open', 'high', 'low', 'close','volume']]    
    df = df.reset_index(drop=True)
    return df

def reformat_symbol(symbol, resolution):
    df = load(symbol)
    df = aggregate(df, resolution)
    df = convert(df, symbol)
    save(df, symbol, resolution)
    return df    

In [3]:
SYMBOLS = ['BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'BNBUSDT', 'XRPUSDT', 'SOLUSDT', 'DOTUSDT', 'DOGEUSDT', 'AVAXUSDT', 'UNIUSDT']
RESOLUTIONS = [1, 5, 60, 1440]
for symbol in SYMBOLS:
    print(symbol)
    for resolution in RESOLUTIONS:
        reformat_symbol(symbol, resolution)

BTCUSDT
data/BTCUSDT_5min.csv written
data/BTCUSDT_1h.csv written
data/BTCUSDT_1d.csv written
ETHUSDT
data/ETHUSDT_5min.csv written
data/ETHUSDT_1h.csv written
data/ETHUSDT_1d.csv written
ADAUSDT
data/ADAUSDT_5min.csv written
data/ADAUSDT_1h.csv written
data/ADAUSDT_1d.csv written
BNBUSDT
data/BNBUSDT_5min.csv written
data/BNBUSDT_1h.csv written
data/BNBUSDT_1d.csv written
XRPUSDT
data/XRPUSDT_5min.csv written
data/XRPUSDT_1h.csv written
data/XRPUSDT_1d.csv written
SOLUSDT
data/SOLUSDT_5min.csv written
data/SOLUSDT_1h.csv written
data/SOLUSDT_1d.csv written
DOTUSDT
data/DOTUSDT_5min.csv written
data/DOTUSDT_1h.csv written
data/DOTUSDT_1d.csv written
DOGEUSDT
data/DOGEUSDT_5min.csv written
data/DOGEUSDT_1h.csv written
data/DOGEUSDT_1d.csv written
AVAXUSDT
data/AVAXUSDT_5min.csv written
data/AVAXUSDT_1h.csv written
data/AVAXUSDT_1d.csv written
UNIUSDT
data/UNIUSDT_5min.csv written
data/UNIUSDT_1h.csv written
data/UNIUSDT_1d.csv written
