In [8]:
import numpy as np

import pandas as pd

In [12]:
# len versus shape speed comparison

df = pd.DataFrame(np.arange(1_000_000).reshape(10_000, 100))

In [13]:
%timeit df.shape[0]

472 ns ± 6.28 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [14]:
%timeit len(df)

330 ns ± 2.97 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [16]:
len(df)

10000

In [17]:
df.shape

(10000, 100)

In [2]:
MIN_AGE = 50
MAX_AGE = 65

AGE_50 = 50
AGE_53 = 53
AGE_56 = 56
AGE_59 = 59
AGE_62 = 62

AGE_55 = 55
AGE_60 = 60
AGE_65 = 65

GOOD_HEALTH = 0
MEDIUM_HEALTH = 1
BAD_HEALTH = 2

In [3]:
AGE_BINS = [(AGE_50, AGE_55), (AGE_55, AGE_60), (AGE_60, AGE_65)]
AGE_BINS_FINE = [
    (AGE_50, AGE_53),
    (AGE_53, AGE_56),
    (AGE_56, AGE_59),
    (AGE_59, AGE_62),
    (AGE_62, AGE_65),
]

In [36]:
NO_WORK = [0, 1, 2, 3]
PART_TIME = [4, 5, 6, 7]
FULL_TIME = [8, 9, 10, 11]
WORK = PART_TIME + FULL_TIME


NO_CARE = [0, 4, 8]
FORMAL_CARE = [1, 3, 5, 7, 9, 11]  # % 2 == 1
INFORMAL_CARE = [2, 3, 6, 7, 10, 11]
ANY_CARE = FORMAL_CARE + INFORMAL_CARE
COMBINATION_CARE = [3, 7, 11]

In [37]:
COMBINATION_CARE

[3, 7, 11]

In [46]:
NO_INFORMAL_CARE = list(set(ANY_CARE) - (set(INFORMAL_CARE)))

In [47]:
NO_INFORMAL_CARE

[1, 5, 9]

In [5]:
def is_not_working(lagged_choice):
    return lagged_choice in NO_WORK


def is_part_time(lagged_choice):
    return lagged_choice in PART_TIME


def is_full_time(lagged_choice):
    return lagged_choice in FULL_TIME


def is_formal_care(lagged_choice):
    return lagged_choice in FORMAL_CARE


def is_informal_care(lagged_choice):
    # intensive only here
    return lagged_choice in INFORMAL_CARE

In [19]:
def create_simulation_df(sim_dict):
    n_periods, n_agents, n_choices = sim_dict["taste_shocks"].shape

    keys_to_drop = ["taste_shocks"]
    dict_to_df = {key: sim_dict[key] for key in sim_dict if key not in keys_to_drop}

    return pd.DataFrame(
        {key: val.ravel() for key, val in dict_to_df.items()},
        index=pd.MultiIndex.from_product(
            [np.arange(n_agents)],
            names=["id"],
        ),
    )

In [None]:
def compute_wealth_and_wage(df, params, wage_offer_func):
    df["wealth"] = df["savings"] + df["consumption"]

    df["wage"] = wage_offer_func(df["age"], params)

    return df

In [33]:
def simulate_moments(df):
    """Df has multiindex ["period", "agent"]
    necessary?

    or "agent", "period" as columns.
    "age" is also a column

    .loc needed below?!

    """
    no_work_by_age_bin = [
        len(
            df.loc[
                (df["age"] > age_bin[0])
                & (df["age"] <= age_bin[1])
                & (df["lagged_choice"] in NO_WORK)
            ],
        )
        / len(df.loc[(df["age"] > age_bin[0]) & (df["age"] <= age_bin[1])])
        for age_bin in AGE_BINS
    ]

    # share working by age
    share_working_by_age = get_share_by_age(df, lagged_choice=WORK)  # 15
    share_working_full_time_by_age = get_share_by_age(df, lagged_choice=FULL_TIME)  # 15

    # Income by age
    net_income_by_age = get_income_by_age(df)

    # share working by caregiving type (and age bin) --> to be checked

    share_working_informal_care_by_age_bin = get_share_by_informal_care_type_by_age_bin(
        df,
        lagged_choice=WORK,
        care_type=INFORMAL_CARE,
    )
    share_working_no_informal_care_by_age_bin = (
        get_share_by_informal_care_type_by_age_bin(
            df,
            lagged_choice=WORK,
            care_type=INFORMAL_CARE,
        )
    )

    share_working_full_time_informal_care_by_age_bin = (
        get_share_by_informal_care_type_by_age_bin(
            df,
            lagged_choice=PART_TIME,
            care_type=INFORMAL_CARE,
        )
    )
    share_working_full_time_no_informal_care_by_age_bin = (
        get_share_by_informal_care_type_by_age_bin(
            df,
            lagged_choice=PART_TIME,
            care_type=NO_INFORMAL_CARE,
        )
    )

    share_working_part_time_informal_care_by_age_bin = (
        get_share_by_informal_care_type_by_age_bin(
            df,
            lagged_choice=FULL_TIME,
            care_type=INFORMAL_CARE,
        )
    )
    share_working_part_time_no_informal_care_by_age_bin = (
        get_share_by_informal_care_type_by_age_bin(
            df,
            lagged_choice=FULL_TIME,
            care_type=NO_INFORMAL_CARE,
        )
    )

    # parent child: mother
    only_informal_care_by_mother_health_couple = (
        get_caregiving_status_by_parental_health(
            df,
            care_choice=INFORMAL_CARE,
            parent="mother",
            is_other_parent_alive=True,
        )
    )
    only_informal_care_by_mother_health_single = (
        get_caregiving_status_by_parental_health(
            df,
            care_choice=INFORMAL_CARE,
            parent="mother",
            is_other_parent_alive=False,
        )
    )

    only_formal_care_by_mother_health_couple = get_caregiving_status_by_parental_health(
        df,
        care_choice=FORMAL_CARE,
        parent="mother",
        is_other_parent_alive=True,
    )
    only_formal_care_by_mother_health_single = get_caregiving_status_by_parental_health(
        df,
        care_choice=FORMAL_CARE,
        parent="mother",
        is_other_parent_alive=False,
    )

    combination_care_by_mother_health_couple = get_caregiving_status_by_parental_health(
        df,
        care_choice=COMBINATION_CARE,
        parent="mother",
        is_other_parent_alive=True,
    )
    combination_care_by_mother_health_single = get_caregiving_status_by_parental_health(
        df,
        care_choice=COMBINATION_CARE,
        parent="mother",
        is_other_parent_alive=False,
    )

    # parent child: father
    only_informal_care_by_father_health_couple = (
        get_caregiving_status_by_parental_health(
            df,
            care_choice=INFORMAL_CARE,
            parent="father",
            is_other_parent_alive=True,
        )
    )
    only_informal_care_by_father_health_single = (
        get_caregiving_status_by_parental_health(
            df,
            care_choice=INFORMAL_CARE,
            parent="father",
            is_other_parent_alive=False,
        )
    )

    only_formal_care_by_father_health_couple = get_caregiving_status_by_parental_health(
        df,
        care_choice=FORMAL_CARE,
        parent="father",
        is_other_parent_alive=True,
    )
    only_formal_care_by_father_health_single = get_caregiving_status_by_parental_health(
        df,
        care_choice=FORMAL_CARE,
        parent="father",
        is_other_parent_alive=False,
    )

    combination_care_by_father_health_couple = get_caregiving_status_by_parental_health(
        df,
        care_choice=COMBINATION_CARE,
        parent="father",
        is_other_parent_alive=True,
    )
    combination_care_by_father_health_single = get_caregiving_status_by_parental_health(
        df,
        care_choice=COMBINATION_CARE,
        parent="father",
        is_other_parent_alive=False,
    )

    # work transitions
    no_work_to_no_work = get_work_transition(df, NO_WORK, NO_WORK)
    no_work_to_part_time = get_work_transition(df, NO_WORK, PART_TIME)
    no_work_to_full_time = get_work_transition(df, NO_WORK, FULL_TIME)

    part_time_to_no_work = get_work_transition(df, PART_TIME, NO_WORK)
    part_time_to_part_time = get_work_transition(df, PART_TIME, PART_TIME)
    part_time_to_full_time = get_work_transition(df, PART_TIME, FULL_TIME)

    full_time_to_no_work = get_work_transition(df, FULL_TIME, NO_WORK)
    full_time_to_part_time = get_work_transition(df, FULL_TIME, PART_TIME)
    full_time_to_full_time = get_work_transition(df, FULL_TIME, FULL_TIME)

    # caregiving transitions
    no_care_to_no_care = get_work_transition(df, NO_CARE, NO_CARE)
    no_care_to_informal_care = get_work_transition(df, NO_CARE, INFORMAL_CARE)
    no_care_to_formal_care = get_work_transition(df, NO_CARE, FORMAL_CARE)

    informal_care_to_no_care = get_work_transition(df, INFORMAL_CARE, NO_CARE)
    informal_care_to_informal_care = get_work_transition(
        df,
        INFORMAL_CARE,
        INFORMAL_CARE,
    )
    informal_care_to_formal_care = get_work_transition(df, INFORMAL_CARE, FORMAL_CARE)

    formal_care_to_no_care = get_work_transition(df, FORMAL_CARE, NO_CARE)
    formal_care_to_informal_care = get_work_transition(df, FORMAL_CARE, INFORMAL_CARE)
    formal_care_to_formal_care = get_work_transition(df, FORMAL_CARE, FORMAL_CARE)

In [None]:
def get_share_by_age(df, lagged_choice):
    return [
        len(df[(df["age"] == age) & (df["lagged_choice"].isin(lagged_choice))])
        / len(df[(df["age"] == age)])
        for age in range(MIN_AGE + 1, MAX_AGE + 1)
    ]

In [None]:
def get_share_by_age_bin(df, lagged_choice):
    return [
        len(
            df[
                (df["age"] > age_bin[0])
                & (df["age"] <= age_bin[1])
                & (df["lagged_choice"].isin(lagged_choice))
            ],
        )
        / len(df[(df["age"] > age_bin[0]) & (df["age"] <= age_bin[1])])
        for age_bin in AGE_BINS
    ]

In [None]:
def get_share_by_informal_care_type_by_age_bin(df, lagged_choice, care_type):
    """Really lagged choice or rather current (end of period) choice?"""
    return [
        len(
            df[
                (df["age"] > age_bin[0])
                & (df["age"] <= age_bin[1])
                & (df["lagged_choice"].isin(care_type))
                & (df["lagged_choice"].isin(lagged_choice))
            ],
        )
        / len(
            df[
                (df["age"] > age_bin[0])
                & (df["age"] <= age_bin[1])
                & (df["lagged_choice"].isin(care_type))
            ],
        )
        for age_bin in AGE_BINS
    ]

In [None]:
def _get_share_by_informal_care_type(df, lagged_choice, care_type):
    """Really lagged choice or rather current (end of period) choice?"""
    return [
        len(
            df[
                (df["lagged_choice"].isin(care_type))
                & (df["lagged_choice"].isin(lagged_choice))
            ],
        )
        / len(df[(df["lagged_choice"].isin(care_type))]),
    ]

In [None]:
def get_income_by_age(df, lagged_choice):
    """Net income in absolute (non-log) terms

    After taxes and transfers as reported in SHARE.
    """
    df["working_hours"] = (
        df[(df["lagged_choice"].isin(PART_TIME))] * PART_TIME_HOURS
        + df[(df["lagged_choice"].isin(FULL_TIME))] * FULL_TIME_HOURS
    )
    df["income"] = df["working_hours"] * df["wage"]

    return [
        df.loc[(df["age"] == age), "income"] for age in range(MIN_AGE + 1, MAX_AGE + 1)
    ]

In [None]:
def get_wealth_beginning_of_period_by_age_bin(df):
    """savings_current_period = resources_beginning_of_period - consumption

    beginning or end of period wealth?
    end of period wealth = savings

    """
    return [
        df.loc[(df["age"] > age_bin[0]) & (df["age"] <= age_bin[1]), "wealth"].mean()
        for age_bin in AGE_BINS
    ]

In [22]:
def get_share_by_informal_care_type_by_age_bin(
    df,
    lagged_choice,
    is_informal_care,
):
    return [
        len(
            df[
                (df["lagged_choice"].isin(INFORMAL_CARE) * is_informal_care)
                & (df["age"] > age_bin[0])
                & (df["age"] <= age_bin[1])
                & (df["lagged_choice"].isin(lagged_choice))
            ],
        )
        / len(
            df[
                (df["lagged_choice"].isin(INFORMAL_CARE) * is_informal_care)
                & (df["age"] > age_bin[0])
                & (df["age"] <= age_bin[1])
            ],
        )
        for age_bin in AGE_BINS
    ]

In [None]:
def get_caregiving_status_by_parental_health(
    df,
    care_choice,
    parent,
    is_other_parent_alive,
):
    other_parent = ("father") * (parent == "mother") + ("mother") * (parent == "father")

    return [
        len(
            df[
                (df[f"{parent}_health"] == health)
                & (df[f"{other_parent}_alive"] == is_other_parent_alive)
                & (df["choice"].isin(care_choice))
            ],
        )
        for health in [GOOD_HEALTH, MEDIUM_HEALTH, BAD_HEALTH]
    ]

In [23]:
parent = "mother"

In [24]:
other_parent = ("father") * (parent == "mother") + ("mother") * (parent == "father")

In [25]:
other_parent

'father'

In [32]:
"father" * 1 + "father" * 0 + "" + ""

'father'

In [20]:
def get_work_transition(lagged_choice, current_choice):
    """df[(df["lagged_choice"].isin(NO_WORK)) & (df["choice"].isin(NO_WORK))]"""
    return len(
        df[
            (df["lagged_choice"].isin(lagged_choice))
            & (df["choice"].isin(current_choice))
        ],
    ) / len(df[(df["lagged_choice"].isin(lagged_choice))])

In [None]:
def get_care_transition(lagged_choice, current_choice):
    """ """
    return len(
        df[
            (df["lagged_choice"].isin(lagged_choice))
            & (df["choice"].isin(current_choice))
        ],
    ) / len(df[(df["lagged_choice"].isin(lagged_choice))])

In [21]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
1,100,101,102,103,104,105,106,107,108,109,...,190,191,192,193,194,195,196,197,198,199
2,200,201,202,203,204,205,206,207,208,209,...,290,291,292,293,294,295,296,297,298,299
3,300,301,302,303,304,305,306,307,308,309,...,390,391,392,393,394,395,396,397,398,399
4,400,401,402,403,404,405,406,407,408,409,...,490,491,492,493,494,495,496,497,498,499
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,999500,999501,999502,999503,999504,999505,999506,999507,999508,999509,...,999590,999591,999592,999593,999594,999595,999596,999597,999598,999599
9996,999600,999601,999602,999603,999604,999605,999606,999607,999608,999609,...,999690,999691,999692,999693,999694,999695,999696,999697,999698,999699
9997,999700,999701,999702,999703,999704,999705,999706,999707,999708,999709,...,999790,999791,999792,999793,999794,999795,999796,999797,999798,999799
9998,999800,999801,999802,999803,999804,999805,999806,999807,999808,999809,...,999890,999891,999892,999893,999894,999895,999896,999897,999898,999899
