In [1]:
import pandas as pd

In [2]:
rsp_df = pd.read_csv("../Data/processed/dataset_2016-19")

In [4]:
# Create target variable


def create_target(x):
    """Creates a target variable based on if resettlement was fully successful or not"""

    # CRITERIA:
    # Subject is either employed or in education / training, unless they are not eligible
    # Subject makes an income above the poverty level, if employed
    # Subject is either a permanent legal resident or is planning to adjust status

    poverty_thresholds = [11880, 16020, 20160, 24300, 28440]  # 2016 values

    # Check that subject is not unemployed
    if x["ui_emprate"] in ["unemployed", "don't know and/or refused"]:
        return 0

    # If employed, check that target makes wage above poverty threshold for given household size
    if x["ui_emprate"] == "employed":
        try:
            if (
                float(x["ui_qn8a_annual"]) + float(x["ui_qn10a_annual"])
                < poverty_thresholds[int(x["numppl"] - 1)]
            ):
                return 0
        except ValueError:
            return 0

    # Check subject's residency status
    if x["ui_lpr"] not in [
        "already adjusted lpr status",
        "plans to adjust lpr status in future",
    ]:
        return 0

    return 1


rsp_df["t_resettlement"] = rsp_df.apply(lambda x: create_target(x), axis=1)

In [5]:
# Show breakdown of target variable counts
rsp_df["t_resettlement"].value_counts()

t_resettlement
1    2141
0    1141
Name: count, dtype: int64

In [6]:
# Write output

rsp_df.to_csv("../Data/processed/dataset_2016-19_target", index=False)