In [1]:
from os import environ
import httpx
import importlib
import pathlib
import polars as pl
import src.fetch
import src.ingest
import src.transform
from dotenv import load_dotenv

load_dotenv()

## Polars
pl.Config.set_fmt_str_lengths(100)
pl.Config.set_tbl_rows(100)

polars.config.Config

In [2]:
importlib.reload(src.fetch)
fetcher = src.fetch.Fetch(
    client=httpx.AsyncClient(),
    tiingo_token=environ["TIINGO_TOKEN"],
    start_date="1995-01-01",
    save_dir="data/sample2",
    response_format="csv",
    failed_tickers_file="failed_tickers.csv",
)

if "duckdb_con" not in locals():
    duckdb_con = src.fetch.get_supported_tickers()

In [3]:
src.transform.create_selected_us_tickers_view(duckdb_con)

In [4]:
duckdb_con.sql("select count(*) from selected_us_tickers")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        16994 │
└──────────────┘

In [5]:
df_us_clean = duckdb_con.table("selected_us_tickers").pl()

In [6]:
seed = 42
df_sampled = df_us_clean.sample(500, seed=seed)
df_sampled.shape

(500, 6)

In [7]:
await fetcher.fetch_supported_tickers(
    df=df_sampled,
    columns=["date", "adjClose", "adjVolume"],
)

In [13]:
df_zzz = df_us_clean.filter(
    ((pl.col("ticker") == "ZZZ") & (pl.col("exchange") == "NASDAQ"))
    | (pl.col("ticker") == "AAPL")
    | (pl.col("ticker") == "CCAI")
)

await fetcher.fetch_supported_tickers(
    df=df_zzz,
    columns=["date", "adjClose", "adjVolume"],
)

In [10]:
seed=42
df_large = df_us_clean.sample(7022, seed=seed)
df_large.shape

(7022, 6)

In [11]:
await fetcher.fetch_all(
    df=df_large,
    columns=["date", "adjClose", "adjVolume"],
    async_batch_size=400
)

Fetching 0 to 400 of 7022


ERROR:src.fetch:Invalid response for LCI.
ERROR:src.fetch:An error occurred while requesting https://api.tiingo.com/tiingo/daily/DGRS/prices?startDate=2013-07-25&endDate=2024-05-10&format=csv&columns=date,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c: [Errno 8] nodename nor servname provided, or not known
ERROR:src.fetch:response.text for DGRS is None.


Fetching 400 to 800 of 7022


ERROR:src.fetch:Invalid response for AVYA.


Fetching 800 to 1200 of 7022


ERROR:src.fetch:Invalid response for SUNH.
ERROR:src.fetch:Invalid response for VISN.


Fetching 1200 to 1600 of 7022


ERROR:src.fetch:No data found for IGLD.
ERROR:src.fetch:Invalid response for PEAK.


Fetching 1600 to 2000 of 7022
Fetching 2000 to 2400 of 7022
Fetching 2400 to 2800 of 7022


ERROR:src.fetch:No data found for PNT.
ERROR:src.fetch:No data found for PCPCW.
ERROR:src.fetch:No data found for PKDC.


Fetching 2800 to 3200 of 7022


ERROR:src.fetch:Invalid response for STLRU.
ERROR:src.fetch:No data found for MMX.
ERROR:src.fetch:Invalid response for SCACU.


Fetching 3200 to 3600 of 7022


ERROR:src.fetch:No data found for GWGHQ.
ERROR:src.fetch:No data found for HZON-WS.


Fetching 3600 to 4000 of 7022


ERROR:src.fetch:No data found for ALLG-WS.


Fetching 4000 to 4400 of 7022


ERROR:src.fetch:Invalid response for HME.
ERROR:src.fetch:Invalid response for CEAI.
ERROR:src.fetch:No data found for CYPS.


Fetching 4400 to 4800 of 7022


ERROR:src.fetch:No data found for UBP-P-K.


Fetching 4800 to 5200 of 7022


ERROR:src.fetch:Invalid response for AGII.
ERROR:src.fetch:No data found for GSOL.
ERROR:src.fetch:No data found for WORK.
ERROR:src.fetch:No data found for ENSCW.
ERROR:src.fetch:Invalid response for ACAMU.


Fetching 5200 to 5600 of 7022


ERROR:src.fetch:No data found for FRG.


Fetching 5600 to 6000 of 7022


ERROR:src.fetch:HTTP status error for https://api.tiingo.com/tiingo/daily/BC/PB/prices?startDate=2018-12-06&endDate=2024-05-10&format=csv&columns=date,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c: Client error '404 Not Found' for url 'https://api.tiingo.com/tiingo/daily/BC/PB/prices?startDate=2018-12-06&endDate=2024-05-10&format=csv&columns=date,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404
ERROR:src.fetch:response.text for BC/PB is None.
ERROR:src.fetch:Invalid response for LFC.


Fetching 6000 to 6400 of 7022


ERROR:src.fetch:No data found for MEME.
ERROR:src.fetch:An error occurred while requesting https://api.tiingo.com/tiingo/daily/PTRB/prices?startDate=2021-12-08&endDate=2024-05-10&format=csv&columns=date,adjClose,adjVolume&token=72b7d80c10cbdbdc839fe9556a7a004af06cf83c: [Errno 8] nodename nor servname provided, or not known
ERROR:src.fetch:response.text for PTRB is None.
ERROR:src.fetch:Invalid response for FOGO.
ERROR:src.fetch:Invalid response for TFG.


Fetching 6400 to 6800 of 7022


ERROR:src.fetch:Invalid response for CIC-U.
ERROR:src.fetch:No data found for RESI.


Fetching 6800 to 7200 of 7022


ERROR:src.fetch:No data found for AMPIW.
ERROR:src.fetch:Invalid response for ABDC.
ERROR:src.fetch:No data found for SAAS.
ERROR:src.fetch:No data found for KEGX.


# Ingest to DuckDB / Motherduck



In [12]:
importlib.reload(src.ingest)
ingestor = src.ingest.Ingest(
    db_path="duckdb/test.db",
    sql_dir="sql",
)
ingestor.create_table(file_name="create_table.sql")

In [14]:
ingestor.ingest_data(dst_table="daily_adjusted", data="data/sample/*.csv")
ingestor.cur.sql("SELECT COUNT(*) FROM daily_adjusted")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│     15982999 │
└──────────────┘

In [26]:
ingestor.close()

## Last trading day snapshot

This request downloads latest prices for all tickers avaialable in the Tiingo end-of-day API.

In [32]:
response = httpx.get(f"https://api.tiingo.com/tiingo/daily/prices?format=csv&date=2024-05-09&token={environ['TIINGO_TOKEN']}")
response.status_code

200

In [33]:
import pathlib
path = pathlib.Path("data/daily/snapshot_date.csv")
path.write_text(response.text)

3934387