In [1]:
from os import environ
import httpx
import importlib
import pathlib
import polars as pl
import src.fetch
import src.ingest
import src.transform
from dotenv import load_dotenv

load_dotenv()

## Polars
pl.Config.set_fmt_str_lengths(100)
pl.Config.set_tbl_rows(100)

polars.config.Config

In [2]:
importlib.reload(src.fetch)
fetcher = src.fetch.Fetch(
    client=httpx.AsyncClient(),
    tiingo_token=environ["TIINGO_TOKEN"],
    start_date="1995-01-01",
    save_dir="data/prod_2024-05-24_2ndHalf",
    response_format="csv",
    failed_tickers_file="failed_tickers.csv",
)

if "duckdb_con" not in locals():
    duckdb_con = src.fetch.get_supported_tickers()

In [3]:
src.transform.create_selected_us_tickers_view(duckdb_con)

In [4]:
duckdb_con.sql("select count(*) from selected_us_tickers")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        16985 │
└──────────────┘

In [5]:
df_us_clean = duckdb_con.table("selected_us_tickers").pl()

In [6]:
half = df_us_clean.shape[0] // 2
df_first = df_us_clean.slice(0, half)
df_first.shape

(8492, 6)

In [7]:
df_second = df_us_clean.slice(half, df_us_clean.shape[0])
df_second.shape

(8493, 6)

In [8]:
columns = ["date", "close", "adjClose", "adjVolume"]

In [9]:
# await fetcher.fetch_all(
#     df=df_first,
#     columns=columns,
#     async_batch_size=250
# )

In [9]:
# Step 1. Wait for the next clock hour to ingest the second half (due to 10k API limit per hour)
await fetcher.fetch_all(
    df=df_second,
    columns=columns,
    async_batch_size=250
)

Fetching 0 to 250 of 8493
Fetching 250 to 500 of 8493


ERROR:src.fetch:Invalid response for HME.


Fetching 500 to 750 of 8493
Fetching 750 to 1000 of 8493


ERROR:src.fetch:Invalid response for RVR.


Fetching 1000 to 1250 of 8493


ERROR:src.fetch:Invalid response for CEAI.


Fetching 1250 to 1500 of 8493
Fetching 1500 to 1750 of 8493


ERROR:src.fetch:Invalid response for LCI.


Fetching 1750 to 2000 of 8493
Fetching 2000 to 2250 of 8493
Fetching 2250 to 2500 of 8493
Fetching 2500 to 2750 of 8493
Fetching 2750 to 3000 of 8493


ERROR:src.fetch:Invalid response for PMC.


Fetching 3000 to 3250 of 8493


ERROR:src.fetch:Invalid response for AGII.


Fetching 3250 to 3500 of 8493


ERROR:src.fetch:Invalid response for CIC-U.


Fetching 3500 to 3750 of 8493


ERROR:src.fetch:Invalid response for IMPX.


Fetching 3750 to 4000 of 8493


ERROR:src.fetch:Invalid response for MVNR.
ERROR:src.fetch:Invalid response for NSH-U.


Fetching 4000 to 4250 of 8493
Fetching 4250 to 4500 of 8493


ERROR:src.fetch:HTTP status error for https://api.tiingo.com/tiingo/daily/BC/PC/prices?startDate=1995-01-01&format=csv&columns=date,close,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c: Client error '404 Not Found' for url 'https://api.tiingo.com/tiingo/daily/BC/PC/prices?startDate=1995-01-01&format=csv&columns=date,close,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404
ERROR:src.fetch:response.text for BC/PC is None.


Fetching 4500 to 4750 of 8493
Fetching 4750 to 5000 of 8493


ERROR:src.fetch:An error occurred while requesting https://api.tiingo.com/tiingo/daily/LANC/prices?startDate=1995-01-01&format=csv&columns=date,close,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c: [Errno 8] nodename nor servname provided, or not known
ERROR:src.fetch:response.text for LANC is None.
ERROR:src.fetch:An error occurred while requesting https://api.tiingo.com/tiingo/daily/NGA-WS/prices?startDate=1995-01-01&format=csv&columns=date,close,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c: [Errno 8] nodename nor servname provided, or not known
ERROR:src.fetch:response.text for NGA-WS is None.


Fetching 5000 to 5250 of 8493
Fetching 5250 to 5500 of 8493


ERROR:src.fetch:Invalid response for TFG.
ERROR:src.fetch:Invalid response for ABDC.


Fetching 5500 to 5750 of 8493
Fetching 5750 to 6000 of 8493
Fetching 6000 to 6250 of 8493
Fetching 6250 to 6500 of 8493


ERROR:src.fetch:An error occurred while requesting https://api.tiingo.com/tiingo/daily/XYLD/prices?startDate=1995-01-01&format=csv&columns=date,close,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c: [Errno 8] nodename nor servname provided, or not known
ERROR:src.fetch:response.text for XYLD is None.
ERROR:src.fetch:Invalid response for ACAMU.


Fetching 6500 to 6750 of 8493
Fetching 6750 to 7000 of 8493
Fetching 7000 to 7250 of 8493
Fetching 7250 to 7500 of 8493
Fetching 7500 to 7750 of 8493


ERROR:src.fetch:HTTP status error for https://api.tiingo.com/tiingo/daily/BC/PA/prices?startDate=1995-01-01&format=csv&columns=date,close,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c: Client error '404 Not Found' for url 'https://api.tiingo.com/tiingo/daily/BC/PA/prices?startDate=1995-01-01&format=csv&columns=date,close,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404
ERROR:src.fetch:response.text for BC/PA is None.


Fetching 7750 to 8000 of 8493


ERROR:src.fetch:Invalid response for ILG.


Fetching 8000 to 8250 of 8493
Fetching 8250 to 8500 of 8493


ERROR:src.fetch:Invalid response for TFM.


# Ingest to DuckDB / Motherduck



In [10]:
importlib.reload(src.ingest)
ingestor = src.ingest.Ingest(
    db_path=f"md:prod?motherduck_token={environ["MOTHERDUCK_TOKEN"]}",
    sql_dir="../sql",
)

In [None]:
# ingestor.create_table(file_name="table__daily_adjusted.sql")

In [15]:
ingestor.con.sql("insert or replace into daily_adjusted select * from 'data/prod_2024-05-24_2ndHalf/*.csv'")

In [16]:
# Step 2 Run this after the next clock hour
# ingestor.ingest_data(dst_table="daily_adjusted", data="data/prod_2024-05-24_2ndHalf/*.csv")

In [None]:
ingestor.cur.sql("SELECT COUNT(*) FROM daily_adjusted")

In [None]:
ingestor.close()

## Last trading day snapshot

This request downloads latest prices for all tickers avaialable in the Tiingo end-of-day API.

In [None]:
response = httpx.get(f"https://api.tiingo.com/tiingo/daily/prices?format=csv=&token={environ['TIINGO_TOKEN']}")
response.status_code

404

In [None]:
path = pathlib.Path("data/daily/snapshot_2024-05-20.csv")
path.write_text(response.text)

23