In [None]:
import requests
import pandas as pd
import struct
import lzma
from datetime import datetime, timedelta
import os

In [None]:
#=========================================
# CONFIGURACIÓN
# =========================================
PAIRS = [
    "EURUSD",
    "USDJPY",
    "GBPUSD",
    "USDCHF",
    "AUDUSD",
    "USDCAD",
    "NZDUSD",
    "EURGBP",
    "EURJPY",
    "GBPJPY",
    "AUDJPY"
]

START_DATE = datetime(2025, 1, 1)
END_DATE   = datetime(2025, 12, 31)

BASE_URL = "https://datafeed.dukascopy.com/datafeed"
OUTPUT_DIR = "forex_m1_2025"

os.makedirs(OUTPUT_DIR, exist_ok=True)


In [None]:
# =========================================
# DESCARGA M1 DE UN DÍA
# =========================================
def download_m1_day(pair, date):
    year = date.year
    month = date.month - 1  # Dukascopy usa 0-11
    day = date.day

    url = f"{BASE_URL}/{pair}/{year}/{month:02d}/{day:02d}/BID_candles_min_1.bi5"

    try:
        r = requests.get(url, timeout=30)
        if r.status_code != 200 or len(r.content) == 0:
            return []

        raw = lzma.decompress(r.content)
    except Exception:
        return []

    rows = []
    base_dt = datetime(year, date.month, day)

    for i in range(0, len(raw), 24):
        ts, o, h, l, c, v = struct.unpack(">iiiiiI", raw[i:i+24])

        minute = base_dt + timedelta(seconds=ts)

        rows.append([
            minute,
            o / 100000,
            h / 100000,
            l / 100000,
            c / 100000,
            v
        ])

    return rows

In [None]:
# =========================================
# DESCARGA COMPLETA DEL AÑO
# =========================================
for pair in PAIRS:
    print(f"\n=== PROCESANDO {pair} ===")

    all_rows = []
    current = START_DATE

    while current <= END_DATE:
        day_rows = download_m1_day(pair, current)

        if day_rows:
            all_rows.extend(day_rows)
            print(f"{pair} {current.date()} {len(day_rows)} minutos")
        else:
            print(f"{pair} {current.date()} sin datos")

        current += timedelta(days=1)

    if not all_rows:
        print(f"Sin datos para {pair}")
        continue 

    df = pd.DataFrame(
        all_rows,
        columns=["minute", "open", "high", "low", "close", "volume"]
    )

    df = df.sort_values("minute").reset_index(drop=True)

    output_file = f"{OUTPUT_DIR}/{pair}_M1_2025.csv"
    df.to_csv(output_file, index=False)

    print(f"CSV generado: {output_file}")
    print(f"Filas totales: {len(df)}")

print("\nDESCARGA COMPLETA FINALIZADA")