In [1]:
from os import environ
import httpx
import importlib
import pathlib
import polars as pl
import src.fetch
import src.ingest
import src.transform
from dotenv import load_dotenv

load_dotenv()

## Polars
pl.Config.set_fmt_str_lengths(100)
pl.Config.set_tbl_rows(100)

polars.config.Config

In [2]:
importlib.reload(src.fetch)
fetcher = src.fetch.Fetch(
    client=httpx.AsyncClient(),
    tiingo_token=environ["TIINGO_TOKEN"],
    start_date="1995-01-01",
    save_dir="data/sample2",
    response_format="csv",
    failed_tickers_file="failed_tickers.csv",
)

if "duckdb_con" not in locals():
    duckdb_con = src.fetch.get_supported_tickers()

In [3]:
src.transform.create_selected_us_tickers_view(duckdb_con)

In [4]:
duckdb_con.sql("select count(*) from selected_us_tickers")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        16994 │
└──────────────┘

In [5]:
df_us_clean = duckdb_con.table("selected_us_tickers").pl()

In [6]:
half = df_us_clean.shape[0] // 2
df_first = df_us_clean.slice(0, half)
df_first.shape

(8497, 6)

In [7]:
df_second = df_us_clean.slice(half, df_us_clean.shape[0])
df_second.shape

(8497, 6)

In [None]:
await fetcher.fetch_all(
    df=df_first,
    columns=["date", "adjClose", "adjVolume"],
    async_batch_size=500
)

In [None]:
# Wait for the next clock hour to ingest the second half (due to 10k API limit per hour)
await fetcher.fetch_all(
    df=df_second,
    columns=["date", "adjClose", "adjVolume"],
    async_batch_size=500
)

# Ingest to DuckDB / Motherduck



In [12]:
importlib.reload(src.ingest)
ingestor = src.ingest.Ingest(
    db_path="duckdb/test.db",
    sql_dir="sql",
)
ingestor.create_table(file_name="create_table.sql")

In [14]:
ingestor.ingest_data(dst_table="daily_adjusted", data="data/sample/*.csv")
ingestor.cur.sql("SELECT COUNT(*) FROM daily_adjusted")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│     15982999 │
└──────────────┘

In [26]:
ingestor.close()

## Last trading day snapshot

This request downloads latest prices for all tickers avaialable in the Tiingo end-of-day API.

In [32]:
response = httpx.get(f"https://api.tiingo.com/tiingo/daily/prices?format=csv&date=2024-05-09&token={environ['TIINGO_TOKEN']}")
response.status_code

200

In [33]:
path = pathlib.Path("data/daily/snapshot_date.csv")
path.write_text(response.text)

3934387