# Predict care givers’ depression

## Initial Configuration

In [67]:
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")

In [68]:
# modify dataset values for better model training
data = pd.read_pickle("../data/data.pkl")
data["BB1"] = data["BB1"] - 1
data["G1eA"] = data["G1eA"].replace({0: 1, 1: 0})
data["G1fA"] = data["G1fA"].replace({-1: 4})
data["G1gA"] = data["G1gA"].replace({0: 1, 1: 0})
data["G1hA"] = data["G1hA"].replace({0: 1, 1: 0})
data["G1iA"] = data["G1iA"].replace({0: 1, 1: 0})
print(len(data))

25394


In [69]:
# split dataset based on "G1eA" value
data_no_helper = data[data["G1eA"] == 2]
data_has_helper = data[data["G1eA"] != 2]

# discard observations with some missing criteria
null_cols = ["G1gA", "G1hA", "G1iA"]
data_discard = data_has_helper[
    (data_has_helper[null_cols[0]] == -1)
    | (data_has_helper[null_cols[1]] == -1)
    | (data_has_helper[null_cols[2]] == -1)
]
data_has_helper = data_has_helper[~data_has_helper.index.isin(data_discard.index)]

print(f"{len(data_has_helper)} {len(data_no_helper)} {len(data_discard)}")

24672 707 15


- `sex`  
    - `0` represents **male**
    - `1` represents **female**  
- `live_with_client`
    - `0` represents **No**
    - `1` represents **Yes**
    - `2` represents **No such helper**  
- `relationship`
    - `0` represents **Child or child-in-law or grandchild**
    - `1` represents **Spouse** 
    - `2` represents **Other relative**
    - `3` represents **Friend/neighbor**
    - `4` repressents **Others (e.g. maid)**
- `advice`
    - `0` represents **No**
    - `1` represents **Yes**
- `iadl`
    - `0` represents **No**
    - `1` represents **Yes**
- `adl`
    - `0` represents **No**
    - `1` represents **Yes**

In [70]:
# modify column names
modify_column_names = {
    "BB1": "sex",
    "G1eA": "live_with_client",
    "G1fA": "relationship",
    "G1gA": "advice",
    "G1hA": "iadl",
    "G1iA": "adl",
}
data_has_helper.rename(columns=modify_column_names, inplace=True)
data_no_helper.rename(columns=modify_column_names, inplace=True)