In [3]:
import glob
import polars as pl
import zstandard as zstd
import io
from datetime import datetime
import os

def read_zst_csv(path):
    """Read single .csv.zst - minimal memory"""
    dctx = zstd.ZstdDecompressor()
    with open(path, "rb") as f_in:
        with dctx.stream_reader(f_in) as reader:
            decompressed_data = reader.read()
    
    csv_bytes = io.BytesIO(decompressed_data)
    return pl.read_csv(csv_bytes, encoding="utf8-lossy", 
                      low_memory=True, infer_schema_length=10000)

def path_in_date_range(path, start_date, end_date):
    fname = path.rsplit("/", 1)[-1]
    date_str = fname.split(".")[0].split("-")[2]
    file_date = datetime.strptime(date_str, "%Y%m%d").date()
    return start_date <= file_date <= end_date

# Setup
folder = "/Users/nshaffer/Desktop/TSLA L2/XNAS-20251128-V7KRYJ435W"
output_path = f"{folder}/TSLA_2025-08-27_to_2025-11-26.csv"

all_paths = glob.glob(folder + "/*.csv.zst")
start_date = datetime(2025, 8, 27).date()
end_date = datetime(2025, 11, 26).date()

paths = [p for p in all_paths if path_in_date_range(p, start_date, end_date)]
print(f"Found {len(paths)} files")

# Process ONE file at a time, append to CSV
first_file = True
total_rows = 0
mode = 'w' if first_file else 'a'

for i, path in enumerate(paths):
    print(f"[{i+1}/{len(paths)}] {path.rsplit('/', 1)[-1]}")
    
    df = read_zst_csv(path)
    rows_added = df.shape[0]
    total_rows += rows_added
    
    # CSV append works perfectly
    df.write_csv(output_path, include_header=first_file)
    first_file = False
    
    del df
    print(f"  Added {rows_added:,} rows (total: {total_rows:,})")

print(f"\nCOMPLETE! Saved {total_rows:,} rows to {output_path}")


Found 65 files
[1/65] xnas-itch-20251120.mbp-10.csv.zst
  Added 5,562,464 rows (total: 5,562,464)
[2/65] xnas-itch-20251023.mbp-10.csv.zst
  Added 3,899,332 rows (total: 9,461,796)
[3/65] xnas-itch-20250909.mbp-10.csv.zst
  Added 1,774,500 rows (total: 11,236,296)
[4/65] xnas-itch-20251031.mbp-10.csv.zst
  Added 2,986,663 rows (total: 14,222,959)
[5/65] xnas-itch-20251024.mbp-10.csv.zst
  Added 3,074,103 rows (total: 17,297,062)
[6/65] xnas-itch-20250915.mbp-10.csv.zst
  Added 4,172,734 rows (total: 21,469,796)
[7/65] xnas-itch-20251126.mbp-10.csv.zst
  Added 2,786,853 rows (total: 24,256,649)
[8/65] xnas-itch-20251030.mbp-10.csv.zst
  Added 2,290,956 rows (total: 26,547,605)
[9/65] xnas-itch-20250908.mbp-10.csv.zst
  Added 2,736,046 rows (total: 29,283,651)
[10/65] xnas-itch-20250912.mbp-10.csv.zst
  Added 3,957,611 rows (total: 33,241,262)
[11/65] xnas-itch-20251022.mbp-10.csv.zst
  Added 3,322,420 rows (total: 36,563,682)
[12/65] xnas-itch-20251121.mbp-10.csv.zst
  Added 3,582,761 r