In [1]:
from pathlib import Path
from ipumspy import IpumsApiClient, UsaExtract, readers

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
apikey = os.environ["apikey"]

# Extract Data From IPUMS

Documentation available at [IPUMS-PY](https://ipumspy.readthedocs.io/en/latest/index.html).

A list of samples is [here](https://usa.ipums.org/usa-action/samples/sample_ids). 

In [3]:
IPUMS_API_KEY = apikey
DOWNLOAD_DIR = Path("../data/test_data")

ipums = IpumsApiClient(IPUMS_API_KEY)

In [4]:
# Submit an API extract request
extract = UsaExtract(
    ["us2012b"],
    ["AGE", "SEX"],
    data_format="csv",
    description="My first IPUMS USA extract!"
)
ipums.submit_extract(extract)
print(f"Extract submitted with id {extract.extract_id}")

# wait for the extract to finish
ipums.wait_for_extract(extract)

Extract submitted with id 6


In [5]:
# Download the extract
ipums.download_extract(extract, download_dir=DOWNLOAD_DIR)

In [6]:
# Get the DDI
ddi_file_path = list(DOWNLOAD_DIR.glob("*.xml"))[0]
ddi_object = readers.read_ipums_ddi(ddi_file_path)

# Get the data
ipums_df = readers.read_microdata(ddi_object, DOWNLOAD_DIR / ddi_object.file_description.filename)

See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.


In [7]:
ipums_df

Unnamed: 0,YEAR,SAMPLE,SERIAL,CBSERIAL,HHWT,GQ,PERNUM,PERWT,SEX,AGE
0,2012,201202,1,101,9.0,4,1,9.0,1,51
1,2012,201202,2,202,16.0,3,1,16.0,2,66
2,2012,201202,3,203,69.0,1,1,69.0,2,33
3,2012,201202,3,203,69.0,1,2,65.0,2,16
4,2012,201202,3,203,69.0,1,3,55.0,1,15
...,...,...,...,...,...,...,...,...,...,...
33052,2012,201202,13819,1493470,110.0,1,2,98.0,1,14
33053,2012,201202,13819,1493470,110.0,1,3,189.0,1,3
33054,2012,201202,13819,1493470,110.0,1,4,129.0,2,36
33055,2012,201202,13820,1493812,69.0,1,1,69.0,2,68
