In [None]:
from os.path import basename, exists


def download(url):
    filename = basename(url)
    if not exists(filename):
        from urllib.request import urlretrieve

        local, _ = urlretrieve(url, filename)
        print("Downloaded " + local)


In [None]:
download("https://github.com/AllenDowney/ThinkStats/raw/v3/nb/thinkstats.py")
download("https://github.com/AllenDowney/ThinkStats/raw/v3/data/2002FemPreg.dct")
download("https://github.com/AllenDowney/ThinkStats/raw/v3/data/2002FemPreg.dat.gz")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import HTML
from thinkstats import decorate

In [None]:
np.ceil(123.45)
pd.__version__
plt.__name__
HTML.__name__
decorate

In [None]:
from statadict import parse_stata_dict


def read_stata(dct_file, dat_file):
    stata_dict = parse_stata_dict(dct_file)
    resp = pd.read_fwf(
        dat_file,
        names=stata_dict.names,
        colspecs=stata_dict.colspecs,
        compression="gzip",
    )
    return resp

In [None]:
dct_file = "2002FemPreg.dct"
dat_file = "2002FemPreg.dat.gz"
preg = read_stata(dct_file, dat_file)

In [None]:
preg["agepreg"] /= 100.0

In [None]:
preg["agepreg"].describe()

In [None]:
preg["birthwgt_oz"] = preg["birthwgt_oz"].replace([97, 98, 99], np.nan)
preg["totalwgt_lb"] = preg["birthwgt_lb"] + preg["birthwgt_oz"] / 16.0

In [None]:
preg["totalwgt_lb"].describe()

In [None]:
subset = preg.query("caseid == 10229")
subset.shape

In [None]:
subset["outcome"].values

In [None]:
preg["birthord"].value_counts(dropna=False).sort_index()

In [None]:
preg["totalwgt_kg"] = preg["totalwgt_lb"] * 0.453592

In [None]:
preg["totalwgt_kg"].describe()