In [None]:
import requests
import polars as pl
from io import StringIO

api = "https://download.bls.gov/pub/time.series"
endpoints = {
    "ConsumerPriceApparelData": "cu/cu.data.2.Summaries",
    "ConsumerPriceHousingData": "cu/cu.data.12.USHousing",
    "ConsumerPriceMedicalData": "cu/cu.data.15.USMedical",
    "ConsumerPriceRecreationData": "cu/cu.data.16.USRecreation",
    "ProducerPriceData": "wp/wp.data.22.FD-ID",
}
headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/124.0 Safari/537.36"
    )
}

series_ids = {
    "ConsumerPriceApparelData": {
        "Apparel": [
            "CUUR0000SAA",
            "CUSR0000SAA",
        ],
    },
    "ConsumerPriceHousingData": {
        "Household Furnishings and Operations": [
            "CUUR0000SAH3",
            "CUSR0000SAH3",
        ],
    },
    "ConsumerPriceMedicalData": {
        "Physicians' Services": [
            "CUUR0000SEMC01",
            "CUSR0000SEMC01",
        ],
        "Hospital Services": [
            "CUUR0000SEMD01",
            "CUSR0000SEMD01",
        ],
        "Medical Care Commodities": [
            "CUUR0000SAM1",
            "CUSR0000SAM1",
        ],
    },
    "ConsumerPriceRecreationData": {
        "Recreation": [
            "CUUR0000SAR",
            "CUSR0000SAR",
        ],
    },
    "ProducerPriceData": {
        "Finished Goods Less Energy": [
            "WPUFD49208",
            "WPSFD49208",
        ],
        "Finished Goods Less Food and Energy": [
            "WPUFD4131",
            "WPSFD4131",
        ],
    },
}

categories = endpoints.keys()
for cat in categories:
    url = f"{api}/{endpoints[cat]}"

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()

        text="\n".join([" ".join(line.split()) for line in response.text.splitlines()])
        df = pl.read_csv(StringIO(text), separator=" ")
        print(df)
    except:
        print(response.status_code)

shape: (279_383, 5)
┌─────────────┬──────┬────────┬─────────┬────────────────┐
│ series_id   ┆ year ┆ period ┆ value   ┆ footnote_codes │
│ ---         ┆ ---  ┆ ---    ┆ ---     ┆ ---            │
│ str         ┆ i64  ┆ str    ┆ f64     ┆ str            │
╞═════════════╪══════╪════════╪═════════╪════════════════╡
│ CUSR0000SA0 ┆ 1947 ┆ M01    ┆ 21.48   ┆ null           │
│ CUSR0000SA0 ┆ 1947 ┆ M02    ┆ 21.62   ┆ null           │
│ CUSR0000SA0 ┆ 1947 ┆ M03    ┆ 22.0    ┆ null           │
│ CUSR0000SA0 ┆ 1947 ┆ M04    ┆ 22.0    ┆ null           │
│ CUSR0000SA0 ┆ 1947 ┆ M05    ┆ 21.95   ┆ null           │
│ …           ┆ …    ┆ …      ┆ …       ┆ …              │
│ CUUSS49GSAT ┆ 2023 ┆ S03    ┆ 288.728 ┆ null           │
│ CUUSS49GSAT ┆ 2024 ┆ S01    ┆ 283.271 ┆ null           │
│ CUUSS49GSAT ┆ 2024 ┆ S02    ┆ 282.552 ┆ null           │
│ CUUSS49GSAT ┆ 2024 ┆ S03    ┆ 282.911 ┆ null           │
│ CUUSS49GSAT ┆ 2025 ┆ S01    ┆ 278.798 ┆ null           │
└─────────────┴──────┴────────┴─────

In [2]:
sorted(df['period'].unique().to_list())

['M01',
 'M02',
 'M03',
 'M04',
 'M05',
 'M06',
 'M07',
 'M08',
 'M09',
 'M10',
 'M11',
 'M12',
 'M13',
 'S01',
 'S02',
 'S03']

In [10]:
df1 = df.filter(1==1) \
    .filter(
        (pl.col('period').str.starts_with('S')==False)
        & (pl.col('period') != 'M13')
    ) \
    .with_columns(
        pl.col('period').str.slice(-2).cast(pl.Int8)
    ) \
    .with_columns(
        pl.date(pl.col('year'), pl.col('period'), 1).alias('date'),
    ) \
    .with_columns(
        pl.col('date').dt.strftime("%B").alias('MonthName'),
        pl.col('period')#.cast(pl.String)
    ) \
    .filter(
        (1==1)
        & (pl.col('date') >= pl.date(1975, 1, 1)) 
        & (pl.col('series_id').is_in(['CUUR0000SAA','CUSR0000SAA']))
    ) \
    .group_by(['series_id', 'year', 'period', 'date', 'MonthName']) \
    .agg(pl.mean('value')) \
    .sort(['series_id', 'year', 'period']) \
    .pivot(on='series_id', index=['year', 'period', 'date', 'MonthName'], values='value') \
    .select(['year', 'period', 'date', 'MonthName', 'CUUR0000SAA', 'CUSR0000SAA'])

df1

ColumnNotFoundError: unable to find column "CUUR0000SAA"; valid columns: ["year", "period", "date", "MonthName"]

In [3]:
df2 = df.filter(1==1) \
    .filter(
        (pl.col('period').str.starts_with('S')==False)
        & (pl.col('period') == 'M13')
    ) \
    .with_columns(
        pl.col('period').str.slice(-2).cast(pl.Int8)
    ) \
    .filter(
        (pl.col('year') >= 1975)
        & (pl.col('series_id').is_in(['CUUR0000SAA','CUSR0000SAA']))
    ) \
    .with_columns(
        (pl.col('value')*0).alias('CUSR0000SAA'),
        pl.col('value').alias('CUUR0000SAA'),
        # pl.col('period').replace_strict(13, 'Annual'),
        pl.lit(None).alias('date'),
        pl.lit(None).alias('MonthName'),
    ) \
    .select(['year', 'period', 'date', 'MonthName', 'CUUR0000SAA', 'CUSR0000SAA'])
    # .select(['year', 'period', 'CUUR0000SAA', 'CUSR0000SAA'])

df2

year,period,date,MonthName,CUUR0000SAA,CUSR0000SAA
i64,i8,null,null,f64,f64
1975,13,,,72.5,0.0
1976,13,,,75.2,0.0
1977,13,,,78.6,0.0
1978,13,,,81.4,0.0
1979,13,,,84.9,0.0
…,…,…,…,…,…
2020,13,,,118.079,0.0
2021,13,,,120.993,0.0
2022,13,,,127.081,0.0
2023,13,,,130.579,0.0


In [4]:
df3 = pl.concat([df1, df2])
df3.columns = ['Year', 'MonthNum', 'M/YYYY', 'Month', 'Not Seasonally Adjusted', 'Seasonally Adjusted']
df3.filter(1==1) \
    .sort(['Year', 'MonthNum']) \
    .with_columns(
        pl.col('MonthNum').alias('Period'),
        pl.col('M/YYYY').dt.strftime('%m-%Y'),
        pl.col('MonthNum').cast(pl.String).replace({'13': 'Annual'})

    )

Year,MonthNum,M/YYYY,Month,Not Seasonally Adjusted,Seasonally Adjusted,Period
i64,str,str,str,f64,f64,i8
1975,"""1""","""01-1975""","""January""",71.1,71.8,1
1975,"""2""","""02-1975""","""February""",71.5,72.0,2
1975,"""3""","""03-1975""","""March""",71.8,72.1,3
1975,"""4""","""04-1975""","""April""",72.0,72.1,4
1975,"""5""","""05-1975""","""May""",72.3,72.2,5
…,…,…,…,…,…,…
2025,"""3""","""03-2025""","""March""",134.082,131.415,3
2025,"""4""","""04-2025""","""April""",132.544,131.156,4
2025,"""5""","""05-2025""","""May""",131.223,130.599,5
2025,"""6""","""06-2025""","""June""",130.844,131.161,6


In [8]:
df3.head()

Year,MonthNum,M/YYYY,Month,Not Seasonally Adjusted,Seasonally Adjusted
i64,i8,date,str,f64,f64
1975,1,1975-01-01,"""January""",71.1,71.8
1975,2,1975-02-01,"""February""",71.5,72.0
1975,3,1975-03-01,"""March""",71.8,72.1
1975,4,1975-04-01,"""April""",72.0,72.1
1975,5,1975-05-01,"""May""",72.3,72.2
