In [None]:
from os.path import basename, exists


def download(url):
    filename = basename(url)
    if not exists(filename):
        from urllib.request import urlretrieve

        local, _ = urlretrieve(url, filename)
        print("Downloaded " + local)


download("https://github.com/AllenDowney/ThinkStats/raw/v3/nb/thinkstats.py")

In [None]:
download("https://github.com/AllenDowney/ThinkStats/raw/v3/nb/relay.py")
download(
    "https://github.com/AllenDowney/ThinkStats/raw/v3/data/Apr25_27thAn_set1.shtml"
)

In [None]:
import pandas as pd

d = pd.Series(range(10))
d

In [None]:
def percentile_rank(x, seq):
    """Percentile rank of x.

    x: value
    seq: sequence of values

    returns: percentile rank 0-100
    """
    return (seq <= x).mean() * 100

In [None]:
from nsfg import get_nsfg_groups

live, firsts, others = get_nsfg_groups()

In [None]:
first_weights = firsts["totalwgt_lb"].dropna()
first_weights.mean()

In [None]:
other_weights = others["totalwgt_lb"].dropna()
other_weights.mean()

In [None]:
from empiricaldist import Pmf

first_pmf = Pmf.from_seq(first_weights, name="first")
other_pmf = Pmf.from_seq(other_weights, name="other")

In [None]:
from thinkstats import decorate, two_bar_plots

two_bar_plots(first_pmf, other_pmf, width=0.06)
decorate(xlabel="Weight (pounds)", ylabel="PMF")

In [None]:
first_cdf = first_pmf.make_cdf()
other_cdf = other_pmf.make_cdf()

In [None]:
first_cdf.plot(ls="--")
other_cdf.plot(alpha=0.5)
decorate(xlabel="Weight (pounds)", ylabel="CDF")

In [None]:
from nsfg import read_stata

dct_file = "2002FemPreg.dct"
dat_file = "2002FemPreg.dat.gz"

preg = read_stata(dct_file, dat_file)

In [None]:
birthwgt_lb = preg["birthwgt_lb"]
birthwgt_oz = preg["birthwgt_oz"]

In [None]:
from empiricaldist import FreqTab, Hist

Hist.from_seq(birthwgt_oz).tail(5), FreqTab.from_seq(birthwgt_oz).tail(5)

In [None]:
import numpy as np

In [None]:
np.random.random(size=10)