In [1]:
import pandas as pd
import numpy as np

In [2]:
base_path = 'data\\'

In [3]:
def load_multiple_files(filenames, resolution_minutes=1, mindate=None, maxdate=None) -> pd.DataFrame:    
    dfs = []    
    for filename in filenames:
        full_filename = f"{base_path}\\{filename}"
        print(f"reading file {full_filename}")
        df = pd.read_csv(full_filename, names=['ts', 'open', 'high', 'low', 'close', 'volume'])
        df['date'] = pd.to_datetime(df['ts'], unit='s')
        df.index = df['date']

        if mindate is not None and maxdate is not None:
            print(f"Filtering by date range {mindate} - {maxdate}")
            df = df.loc[mindate:maxdate]
            print(f"{len(df)} rows in filtered df")
        
        resolution_seconds = resolution_minutes * 60
        df['tsgroup'] = pd.DataFrame(df['ts'].div(resolution_seconds), dtype=int).multiply(resolution_seconds)

        agg_list = {'open': lambda x: x.iloc[0], 'high': np.max, 'low': np.min, 'close': lambda x: x.iloc[-1],
                    'volume': np.sum, 'ts': lambda x: x.iloc[0]}
        groupped = df.groupby('tsgroup').agg(agg_list)
        groupped['date'] = pd.to_datetime(groupped['ts'], unit='s')
        groupped.index = groupped['date']
        groupped = groupped[['ts', 'open', 'high', 'low', 'close','volume']]
        dfs.append(groupped)
    return dfs

def concat_dfs(dfs):
    return pd.concat(dfs)

def save_file(df, target_filename):
    full_filename = f"{base_path}\\{target_filename}"
    df = df.reset_index()
    del df['date']
    df.to_csv(full_filename, index=False, header=False)
    
def get_source_filenames(prefix_name, start_year, end_year):
    filenames = []
    for x in range(start_year, end_year + 1):
        filenames.append(f"{prefix_name}{x}.csv")
    return filenames

def combine_files(prefix_name, start_year, end_year, resolution, target_filename):
    names = get_source_filenames(prefix_name, start_year, end_year)
    dfs = load_multiple_files(names, resolution_minutes=resolution)
    df = concat_dfs(dfs)
    save_file(df, target_filename)

In [4]:
SYMBOLS = ['BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'BNBUSDT', 'XRPUSDT', 'SOLUSDT', 'DOTUSDT', 'DOGEUSDT', 'AVAXUSDT', 'UNIUSDT']
START_YEAR = [2017, 2017, 2018, 2017, 2018, 2020, 2020, 2019, 2020, 2020]

In [12]:
for idx in range(len(SYMBOLS)):
    symbol = SYMBOLS[idx]
    start = START_YEAR[idx]
    end = 2022
    combine_files(f"binance_{symbol}_", start, end, 1, f"raw_{symbol}_1min.csv")

reading file data\\binance_BTCUSDT_2017.csv
reading file data\\binance_BTCUSDT_2018.csv
reading file data\\binance_BTCUSDT_2019.csv
reading file data\\binance_BTCUSDT_2020.csv
reading file data\\binance_BTCUSDT_2021.csv
reading file data\\binance_BTCUSDT_2022.csv
reading file data\\binance_ETHUSDT_2017.csv
reading file data\\binance_ETHUSDT_2018.csv
reading file data\\binance_ETHUSDT_2019.csv
reading file data\\binance_ETHUSDT_2020.csv
reading file data\\binance_ETHUSDT_2021.csv
reading file data\\binance_ETHUSDT_2022.csv
reading file data\\binance_ADAUSDT_2018.csv
reading file data\\binance_ADAUSDT_2019.csv
reading file data\\binance_ADAUSDT_2020.csv
reading file data\\binance_ADAUSDT_2021.csv
reading file data\\binance_ADAUSDT_2022.csv
reading file data\\binance_BNBUSDT_2017.csv
reading file data\\binance_BNBUSDT_2018.csv
reading file data\\binance_BNBUSDT_2019.csv
reading file data\\binance_BNBUSDT_2020.csv
reading file data\\binance_BNBUSDT_2021.csv
reading file data\\binance_BNBUS

In [None]:
combine_files('binance_BTCUSDT_', 2017, 2022, 15, 'binance_BTCUSDT_ALL_15min.csv')
combine_files('binance_DOGEUSDT_', 2019, 2022, 15, 'binance_DOGEUSDT_ALL_15min.csv')
combine_files('binance_FUNUSDT_', 2019, 2022, 15, 'binance_FUNUSDT_ALL_15min.csv')
combine_files('binance_FUNBTC_', 2017, 2022, 15, 'binance_FUNBTC_ALL_15min.csv')