In [None]:
import numpy as np
import pandas as pd
import os

Process data

In [7]:
TICKERS = ["AAPL", "MSFT", "AMZN", "GOOGL", "META"]
START_DATE = "2015-01-01"
END_DATE = "2024-01-01"
RAW_DIR="../data/raw"
os.makedirs(RAW_DIR, exist_ok=True)

In [11]:
def download_data(tickers, start, end, out_dir):
    for t in tickers:
        stooq_symbol = t.lower() + ".us"
        print(f"Downloading {t} from Stooq ({stooq_symbol})...")

        url = f"https://stooq.com/q/d/l/?s={stooq_symbol}&i=d"
        df = pd.read_csv(url)

        if df is None or df.empty:
            print(f"ERROR: No data returned for {t}")
            continue

        df = df.dropna()
        df = df.sort_values("Date")

        df["Date"] = pd.to_datetime(df["Date"])
        mask = (df["Date"] >= pd.to_datetime(start)) & (df["Date"] <= pd.to_datetime(end))
        df = df.loc[mask]

        out_path = f"{out_dir}/{t}.csv"
        df.to_csv(out_path, index=False)

        print(f"Saved {t}: {df.shape[0]} rows")

download_data(TICKERS, START_DATE, END_DATE, RAW_DIR)

Downloading AAPL from Stooq (aapl.us)...
Saved AAPL: 2264 rows
Downloading MSFT from Stooq (msft.us)...
Saved MSFT: 2264 rows
Downloading AMZN from Stooq (amzn.us)...
Saved AMZN: 2264 rows
Downloading GOOGL from Stooq (googl.us)...
Saved GOOGL: 2264 rows
Downloading META from Stooq (meta.us)...
Saved META: 2264 rows


In [12]:
def compute_indicators(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    # Daily log returns
    df["ret"] = np.log(df["Close"] / df["Close"].shift(1))

    # Rolling volatility (10-day and 20-day)
    df["vol_10"] = df["ret"].rolling(10).std()
    df["vol_20"] = df["ret"].rolling(20).std()

    # RSI(14)
    delta = df["Close"].diff()
    gain = delta.clip(lower=0).rolling(14).mean()
    loss = (-delta.clip(upper=0)).rolling(14).mean()
    rs = gain / (loss + 1e-9)
    df["rsi_14"] = 100 - (100 / (1 + rs))

    # 10-day momentum (rate of change)
    df["mom_10"] = df["Close"] / df["Close"].shift(10) - 1

    return df

In [13]:
def load_and_process_all(tickers, raw_dir):
    dfs = {}
    for t in tickers:
        path = os.path.join(raw_dir, f"{t}.csv")
        if not os.path.exists(path):
            print(f"Missing raw file for {t}, skipping.")
            continue

        df = pd.read_csv(path)
        df["Date"] = pd.to_datetime(df["Date"])
        df = df.set_index("Date")

        df = compute_indicators(df)
        dfs[t] = df

    # Merge
    common_index = None
    for t, df in dfs.items():
        common_index = df.index if common_index is None else common_index.intersection(df.index)
    for t in dfs:
        df = dfs[t].loc[common_index].copy()
        df = df.dropna()
        dfs[t] = df
    common_index = None
    for t, df in dfs.items():
        common_index = df.index if common_index is None else common_index.intersection(df.index)
    for t in dfs:
        dfs[t] = dfs[t].loc[common_index].copy()

    return dfs

In [None]:
PROCESSED_DIR = "../data/processed"
os.makedirs(PROCESSED_DIR, exist_ok=True)

print("Computing indicators and returns...")
dfs = load_and_process_all(TICKERS, RAW_DIR)

for t, df in dfs.items():
    df.reset_index().to_csv(f"{PROCESSED_DIR}/{t}.csv", index=False)
    print(f"Saved processed features for {t}")

returns_df = pd.DataFrame(
    {t: df["ret"] for t, df in dfs.items()},
    index=next(iter(dfs.values())).index
)
returns_df.reset_index().to_csv(f"{PROCESSED_DIR}/returns_matrix.csv", index=False)
print("Saved returns matrix")

returns_df.head()

Computing indicators and returns...
Saved processed features for AAPL
Saved processed features for MSFT
Saved processed features for AMZN
Saved processed features for GOOGL
Saved processed features for META
Saved returns_matrix.csv


Unnamed: 0_level_0,AAPL,MSFT,AMZN,GOOGL,META
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-02-02,0.012251,0.021443,0.027651,-0.010001,-0.012194
2015-02-03,0.000189,0.007717,-0.002527,0.002065,0.005453
2015-02-04,0.007516,0.00575,0.003295,-0.013593,0.003046
2015-02-05,0.007303,0.014466,0.024749,0.00706,-0.000264
2015-02-06,-0.008714,-0.000908,0.001043,0.00762,-0.015192
