In [8]:
import requests
from tqdm import tqdm
import polars as pl

area = "01001"
year = "2021"
qrt = "2"
url= f"http://data.bls.gov/cew/data/api/{year}/{qrt}/area/{area}.csv"



In [6]:
def pull_file(url: str, filename: str, verify: bool = True) -> None:
    """
    Pulls a file from a URL and saves it in the filename. Used by the class to pull external files.

    Parameters
    ----------
    url: str
        The URL to pull the file from.
    filename: str
        The filename to save the file to.
    verify: bool
        If True, verifies the SSL certificate. If False, does not verify the SSL certificate.

    Returns
    -------
    None
    """
    chunk_size = 10 * 1024 * 1024

    with requests.get(url, stream=True, verify=verify) as response:
        total_size = int(response.headers.get("content-length", 0))

        with tqdm(
            total=total_size,
            unit="B",
            unit_scale=True,
            unit_divisor=1024,
            desc="Downloading",
        ) as bar:
            with open(filename, "wb") as file:
                for chunk in response.iter_content(chunk_size=chunk_size):
                    if chunk:
                        file.write(chunk)
                        bar.update(
                            len(chunk)
                        )  # Update the progress bar with the size of the chunks

In [36]:
df = pl.read_csv("test.csv")
df = df.filter(pl.col("agglvl_code") == 74)
df = df.select(pl.col(["area_fips", "year", "qtr", "own_code", "industry_code", "month1_emplvl","month2_emplvl", "month3_emplvl", "total_qtrly_wages", "qtrly_estabs"]))
df

area_fips,year,qtr,own_code,industry_code,month1_emplvl,month2_emplvl,month3_emplvl,total_qtrly_wages,qtrly_estabs
i64,i64,i64,i64,str,i64,i64,i64,i64,i64
1001,2021,2,1,"""48-49""",66,67,69,1104361,4
1001,2021,2,1,"""92""",12,16,10,220455,6
1001,2021,2,2,"""44-45""",0,0,0,0,2
1001,2021,2,2,"""61""",0,0,0,0,1
1001,2021,2,2,"""62""",0,0,0,0,3
…,…,…,…,…,…,…,…,…,…
1001,2021,2,5,"""61""",249,247,238,2140390,11
1001,2021,2,5,"""62""",956,957,960,9477935,107
1001,2021,2,5,"""71""",304,335,351,1074434,12
1001,2021,2,5,"""72""",1225,1225,1236,5435576,77


In [7]:
pull_file(url=url, filename="test.csv")

Downloading: 100%|██████████| 131k/131k [00:00<00:00, 770kB/s]
