In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta, timezone

# Choose stretch: "isis", "godstow", or "wallingford"
STRETCH = "isis"  # change this to "godstow" or "wallingford" when needed

# Mapping from stretch name to API id and CSV basename
STRETCH_CONFIG = {
    "isis":       {"id": 1, "csv_basename": "isis_flag_model_data"},
    "godstow":    {"id": 2, "csv_basename": "godstow_flag_model_data"},
    "wallingford": {"id": 5, "csv_basename": "wallingford_flag_model_data"},
}

BASE_URL = "https://flags.jamesonlee.com/api/flow"
STRETCH_ID = STRETCH_CONFIG[STRETCH]["id"]
CSV_BASENAME = STRETCH_CONFIG[STRETCH]["csv_basename"]

start_date = datetime(2017, 1, 1, tzinfo=timezone.utc)  # or whatever start you want
end_date   = datetime.now(tz=timezone.utc)

all_chunks = []

current = start_date
while current < end_date:
    day_start = current
    day_end   = min(current + timedelta(days=1), end_date)

    start_ms = int(day_start.timestamp() * 1000)
    end_ms   = int(day_end.timestamp() * 1000)

    params = {"start": start_ms, "end": end_ms}
    resp = requests.get(f"{BASE_URL}/{STRETCH_ID}/", params=params, timeout=30)
    resp.raise_for_status()
    data = resp.json()

    if data:
        df_day = pd.DataFrame(data)
        df_day["timestamp"] = pd.to_datetime(df_day["time"])
        all_chunks.append(df_day)

    print(f"{day_start.date()} → {len(data)} rows")

    current = day_end

# Combine all days into one DataFrame
df = pd.concat(all_chunks, ignore_index=True).sort_values("timestamp")

# Map flags / rename columns
flag_map = {
    0: 'Grey',
    1: 'Green',
    15: 'Light Blue',
    2: 'Blue',
    3: 'Amber',
    4: 'Red',
    None: 'Grey',
}
df["flag_status"] = df["flag"].map(flag_map)
df["jameson_differential"] = df["flow"]

out = df[["timestamp", "jameson_differential", "flag_status"]]

2017-01-01 → 0 rows
2017-01-02 → 0 rows
2017-01-03 → 0 rows
2017-01-04 → 0 rows
2017-01-05 → 0 rows
2017-01-06 → 0 rows
2017-01-07 → 0 rows
2017-01-08 → 0 rows
2017-01-09 → 0 rows
2017-01-10 → 0 rows
2017-01-11 → 0 rows
2017-01-12 → 0 rows
2017-01-13 → 0 rows
2017-01-14 → 0 rows
2017-01-15 → 0 rows
2017-01-16 → 0 rows
2017-01-17 → 0 rows
2017-01-18 → 0 rows
2017-01-19 → 0 rows
2017-01-20 → 0 rows
2017-01-21 → 0 rows
2017-01-22 → 0 rows
2017-01-23 → 0 rows
2017-01-24 → 0 rows
2017-01-25 → 0 rows
2017-01-26 → 0 rows
2017-01-27 → 0 rows
2017-01-28 → 0 rows
2017-01-29 → 0 rows
2017-01-30 → 0 rows
2017-01-31 → 0 rows
2017-02-01 → 0 rows
2017-02-02 → 0 rows
2017-02-03 → 0 rows
2017-02-04 → 11 rows
2017-02-05 → 22 rows
2017-02-06 → 18 rows
2017-02-07 → 15 rows
2017-02-08 → 17 rows
2017-02-09 → 16 rows
2017-02-10 → 22 rows
2017-02-11 → 20 rows
2017-02-12 → 23 rows
2017-02-13 → 20 rows
2017-02-14 → 23 rows
2017-02-15 → 18 rows
2017-02-16 → 17 rows
2017-02-17 → 20 rows
2017-02-18 → 23 rows
2017-

  df = pd.concat(all_chunks, ignore_index=True).sort_values("timestamp")


In [7]:
from pathlib import Path

new_data = out.copy()

data_dir = Path("../data")
snapshot_path = data_dir / f"{CSV_BASENAME}_2025_08.csv"
base_path     = data_dir / f"{CSV_BASENAME}.csv"

# if snapshot_path.exists():
#     old_data = pd.read_csv(snapshot_path)
# elif base_path.exists():
#     old_data = pd.read_csv(base_path)
# else:
#     # No existing data yet for this stretch
#     old_data = pd.DataFrame(columns=new_data.columns)

In [4]:
# Merge new and old data for the selected stretch, keeping new data on conflicts
merged_data = out
# merged_data = pd.concat([old_data, new_data], ignore_index=True)
merged_data["timestamp"] = pd.to_datetime(merged_data["timestamp"])
merged_data = (
    merged_data
    .drop_duplicates(subset=["timestamp"], keep="last")
    .sort_values("timestamp")
    .reset_index(drop=True)
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data["timestamp"] = pd.to_datetime(merged_data["timestamp"])


In [5]:
merged_data

Unnamed: 0,timestamp,jameson_differential,flag_status
0,2017-02-04 12:00:00+00:00,0.60358,Red
1,2017-02-04 13:00:00+00:00,0.54858,Red
2,2017-02-04 14:00:00+00:00,0.53919,Red
3,2017-02-04 15:00:00+00:00,0.56720,Red
4,2017-02-04 16:00:00+00:00,0.53889,Red
...,...,...,...
216380,2026-01-20 09:15:00+00:00,0.84859,Red
216381,2026-01-20 09:30:00+00:00,0.86311,Red
216382,2026-01-20 09:45:00+00:00,0.84343,Red
216383,2026-01-20 10:00:00+00:00,0.84388,Red


In [8]:
merged_data.to_csv(base_path, index=False)