In [None]:
from empiricaldist import Pmf

In [None]:
seq = [1, 2, 2, 3, 5]

In [None]:
pmf = Pmf.from_seq(seq)

In [None]:
pmf

In [None]:
pmf.qs, pmf.ps

In [None]:
import pandas as pd

ranges = pd.interval_range(start=5, end=50, freq=5, closed="left")
ranges.name = "class size"

data = pd.DataFrame(index=ranges)
data["count"] = [8, 8, 14, 4, 6, 12, 8, 3, 2]
data

In [None]:
data["count"].sum()

In [None]:
sizes = ranges.left + 2
sizes, sum(sizes), len(sizes), sum(sizes) / len(sizes)

In [None]:
counts = data["count"]
actual_pmf = Pmf(counts, sizes, name="actual")
actual_pmf.normalize()

In [None]:
actual_pmf.mean()

In [None]:
(
    actual_pmf.ps,
    actual_pmf.qs,
    actual_pmf.ps * actual_pmf.qs,
    (actual_pmf.ps * actual_pmf.qs).sum(),  # 均值
)

In [None]:
from nsfg import get_nsfg_groups

live, firsts, others = get_nsfg_groups()

In [None]:
first_pmf = Pmf.from_seq(firsts["prglngth"], name="firsts")
other_pmf = Pmf.from_seq(others["prglngth"], name="others")


In [None]:
from thinkstats import decorate, two_bar_plots

two_bar_plots(first_pmf, other_pmf)
decorate(xlabel="Weeks", ylabel="Probability", xlim=[20, 50])

## Exercise

In [None]:
from empiricaldist import FreqTab
from nsfg import read_fem_resp

resp = read_fem_resp()
resp.shape

In [None]:
ftab_numbabes = FreqTab.from_seq(resp["numbabes"], name="numbabes")
ftab_numbabes

In [None]:
pmf_numbabes = Pmf.from_seq(resp["numbabes"], name="numbabes")
pmf_numbabes.bar()
decorate(xlabel="Number of Babies", ylabel="Probability", xlim=[-1, 10])


In [None]:
import numpy as np

In [None]:
numbabes = resp["numbabes"].replace(97, np.nan)

In [None]:
deviations = numbabes - pmf_numbabes.mean()
skewness = np.mean(deviations**3) / numbabes.std(ddof=0) ** 3
skewness

In [None]:
def pmf_skewness(pmf):
    # 离差
    deviations = pmf.qs - pmf.mean()
    # 离差的三次方
    cube_deviations = deviations**3
    # 乘以概率并求和：Σ p(x) * (x - μ)³
    weighted_sum = np.sum(pmf.ps * cube_deviations)
    # 除以标准差的三次方
    return weighted_sum / pmf.std() ** 3


pmf_skewness(Pmf.from_seq(numbabes, name="numbabes"))


In [None]:
pmf_numkdhh = Pmf.from_seq(resp["numkdhh"], name="numkdhh")
pmf_numkdhh

In [None]:
def bias(pmf, name):
    # multiply each probability by class size
    ps = pmf.ps * pmf.qs

    # make a new Pmf and normalize it
    new_pmf = Pmf(ps, pmf.qs, name=name)
    new_pmf.normalize()
    return new_pmf

In [None]:
observed_pmf_numkdhh = bias(pmf_numkdhh, "observed_numkdhh")


In [None]:
from thinkstats import two_bar_plots

two_bar_plots(pmf_numkdhh, observed_pmf_numkdhh)
decorate(xlabel="Class size", ylabel="PMF")