The data was accesssed on https://www.histdata.com/

In [None]:
import os, zipfile, pandas as pd

RAW_DIR, PROC_DIR = 'raw', 'processed'
os.makedirs(PROC_DIR, exist_ok=True)

CURRENCY_ZIPS = {
    'EURUSD': [f'HISTDATA_COM_MT_EURUSD_M120{y}.zip' for y in (22, 23, 24)],
    'GBPUSD': [f'HISTDATA_COM_MT_GBPUSD_M120{y}.zip' for y in (22, 23, 24)],
    'USDJPY': [f'HISTDATA_COM_MT_USDJPY_M120{y}.zip' for y in (22, 23, 24)],
    'USDCHF': [f'HISTDATA_COM_MT_USDCHF_M120{y}.zip' for y in (22, 23, 24)],
}

ASK_FACTOR = 1.00015           # constant spread: bid × 1.00015 → ask

def one_zip_to_df(zip_path: str) -> pd.DataFrame:
    with zipfile.ZipFile(zip_path) as z:
        with z.open(z.namelist()[0]) as f:
            cols = ['date', 'time', 'open', 'high', 'low', 'close', 'vol']
            df = pd.read_csv(f, sep=r',', names=cols, engine='python')
    dt = pd.to_datetime(df['date'] + ' ' + df['time'],
                        format='%Y.%m.%d %H:%M')
    return df.assign(time=dt).set_index('time')[['close']]

for pair, zips in CURRENCY_ZIPS.items():
    dfs = [one_zip_to_df(os.path.join(RAW_DIR, z)) for z in zips
           if os.path.isfile(os.path.join(RAW_DIR, z))]
    if not dfs:
        print(f'No files for {pair}')
        continue

    hourly = (pd.concat(dfs)
                .sort_index()
                .resample('15min')
                .last()
                .dropna()
                .rename(columns={'close': 'bid'}))

    hourly['ask'] = hourly['bid'] * ASK_FACTOR

    out = os.path.join(PROC_DIR, f'{pair}_15min.csv')
    hourly.to_csv(out, float_format='%.5f')
    print('saved', out)