In [2]:
import pandas as pd
import numpy as np
import janitor

%load_ext nb_black

from IPython.core.display import display, HTML

display(HTML("<style>.container {width:90% !important;}</style>"))

<IPython.core.display.Javascript object>

## Goal: Merge cleaned and standardized income projections from CBO with base CPS file

#### Last edited: 10/17/21

In [3]:
cps = pd.read_csv(
    "https://raw.githubusercontent.com/ssloate/masters_project/main/data/demographics/prelim_demographic_projections.csv"
)
inc = pd.read_csv(
    "https://raw.githubusercontent.com/ssloate/masters_project/main/data/cbo/short_term_economic_projections_clean_subset.csv",
    index_col=False,
)

<IPython.core.display.Javascript object>

In [4]:
# replace 999999s with NaNs

to_replace_8 = [
    "incwage",
    "incbus",
    "incfarm",
    "incretir",
]  # 99999999 indicates missing
to_replace_7 = [
    "incint",
    "incvet",
    "incsurv",
    "incdisab",
    "incdivid",
    "incrent",
    "incasist",
    "incother",
]  # 9999999 indicates missing
to_replace_6 = [
    "incss",
    "incwelfr",
    "incssi",
    "incunemp",
    "incwkcom",
    "inceduc",
    "incchild",
]  # 999999 indicates missing


for i in to_replace_8:
    cps[i].replace(99999999, np.nan, inplace=True)

for i in to_replace_7:
    cps[i].replace(9999999, np.nan, inplace=True)

for i in to_replace_6:
    cps[i].replace(999999, np.nan, inplace=True)

<IPython.core.display.Javascript object>

In [5]:
# stretch out inc dataset
inc = inc.append(
    [inc] * (len(cps) - 1), ignore_index=True
)  # makes the one-row dataset as long as the cps dataset

# add inc dataset to cps data set
cps_inc = cps.join(inc)

# make copy
cps_inc_copy = cps_inc.copy()

<IPython.core.display.Javascript object>

In [6]:
# rename base year income columns with '2019' prefix
taxable_incomes = [
    "incwage",
    "incbus",
    "incfarm",
    "incretir",
    "incint",
    "incunemp",
    "incdivid",
]

cps_inc.rename(
    columns={
        "incwage": "2019_incwage",
        "incbus": "2019_incbus",
        "incfarm": "2019_incfarm",
        "incretir": "2019_incretir",
        "incint": "2019_incint",
        "incunemp": "2019_incunemp",
        "incdivid": "2019_incdivid",
    },
    inplace=True,
)

<IPython.core.display.Javascript object>

In [7]:
# multiply income columns

for year in range(2020, 2032): #start in 2020, since base dataset is 2015-2019
    for income in taxable_incomes:
        if income == "incwage":
            cps_inc[f"{year}_{income}"] = cps_inc[f'{year - 1}_incwage'] + (
                cps_inc[f'{year - 1}_incwage'] * cps_inc[f"{year - 1}_wage"]
            )  # original wage income, plus wage income multiplied by wage income growth in that year
        elif income == "incbus":
            cps_inc[f"{year}_{income}"] = cps_inc[f'{year - 1}_incbus'] + (
                cps_inc[f'{year - 1}_incbus'] * cps_inc[f"{year - 1}_nonfarm_prop"])
        elif income == "incfarm":
            cps_inc[f"{year}_{income}"] = cps_inc[f'{year - 1}_incfarm'] + (
                cps_inc[f'{year - 1}_incfarm'] * cps_inc[f"{year-1}_farm_prop"])
        elif income == "incretir":
            cps_inc[f"{year}_{income}"] = cps_inc[f'{year - 1}_incretir'] + (cps_inc[f'{year - 1}_incretir'] * cps_inc[f"{year-1}_tot_personal"])
        elif income == "incint":
            cps_inc[f"{year}_{income}"] = cps_inc[f'{year - 1}_incint'] + (
                cps_inc[f'{year - 1}_incint'] * cps_inc[f"{year-1}_int"])
        elif income == "incunemp":
            cps_inc[f"{year}_{income}"] = cps_inc[f'{year - 1}_incunemp'] + (
                cps_inc[f'{year - 1}_incunemp'] * cps_inc[f"{year-1}_tot_personal"])
        elif income == "incdivid":
            cps_inc[f"{year}_{income}"] = cps_inc[f'{year - 1}_incdivid'] + (
                cps_inc[f'{year - 1}_incdivid'] * cps_inc[f"{year-1}_div"])

####### ASSUMPTION: retirement income growing at total rate
####### Need to code in cap gains income and find cap gains growth rate!




<IPython.core.display.Javascript object>

In [8]:
# check on a few random years
assert cps_inc["2021_incwage"].equals(
    (cps_inc["2020_incwage"] * cps_inc["2020_wage"]) + cps_inc["2020_incwage"]
)
assert cps_inc["2030_incdivid"].equals(
    (cps_inc["2029_incdivid"] * cps_inc["2029_div"]) + cps_inc["2029_incdivid"]
)

<IPython.core.display.Javascript object>

In [9]:
# export

cps_inc.to_csv(
    "/Users/samsloate/Desktop/807 MP/masters_project/data/cps/cps_w_incomes_clean.csv",
    index=False,
)

<IPython.core.display.Javascript object>

In [10]:
# CPS Income columns are:

# 'inctot': total pre-tax personal income or losses from all sources
#'incwage': Wage and salary income. INCLUDES oincwage
#'incbus': net pre-income-tax non-farm business and/or professional practice income
#'incfarm': net pre-income-tax earnings as a tenant farmer, sharecropper, or operator of his or her own farm
#'incss': pre-tax SS income the respondent received from Social Security
#'incwelfr': pre-tax welfare income
#'incretir': pre-tax income from all retirement income sources
#'incssi': pre-tax SSI income
#'incint': pre-tax income (if any) the respondent received from interest on saving accounts, certificates of deposit, money market funds, bonds, treasury notes, IRAs, and/or other investments which paid interest."
#'incunemp': unemployment income
#'incwkcom': workers comp
#'incvet': VA income
#'incsurv: survivors benfits income
#'incdisab': disbility income
#'incdivid': pre-tax income from stocks and mutual funds
#'incrent': income received from rent (after expenses) and from money paid by estates, trusts, and royalties
#'inceduc': income received from educational assistance
#'incchild': income from child support payments
#'incasist': regular financial assistance from friends or relatives
#'incother': pre-tax income for the previous calendar year that was not reported in other categories

# adjginc: individual's total gross (pre-tax) income from taxable sources
# minus certain items, such as individual retirement plan contributions
# payments to a Keogh plan or a deductible Individual Retirement Account),
# alimony paid, medical savings accounts, and non-reimbursed employee business expenses.
# taxinc: adjusted gross income (ADJGINC) minus allowable itemized deductions
# (or a standard allowance amount) and exemptions for the taxpayer and his or her dependents.
# Taxable income is the amount used in the calculation of an individual's income tax liability.

<IPython.core.display.Javascript object>

In [11]:
#####
# Code Assumptions
#####

# 1. retirement income growing at total rate
# 2. 2020 income times the 2020 growth rate equals 2021 income (NOT that 2020 income times 2021 growth rate equals 2021 income)
# 3. NC follows national trend. All income components grow at same rate, regardless of income level 

#####
# Need to Do
#####

# 1. Code in cap gains income and find cap gains growth rate
# 2. Confirm that certain sources of income are not taxed and can therefore be omitted
# 3. Look into top-coding: INCWAGE has two topcoded components: OINCWAGE and INCLONGJ.

<IPython.core.display.Javascript object>

In [12]:
cps_inc

Unnamed: 0,set,agegroup,race,sex,basewt,asecwt,v1,serial,cpsid,asecwth,...,2030_incint,2030_incunemp,2030_incdivid,2031_incwage,2031_incbus,2031_incfarm,2031_incretir,2031_incint,2031_incunemp,2031_incdivid
0,0aian1f,0 to 2,aian,f,396.0,1981.0,826284.0,41674.0,2.018010e+13,1905.0,...,,,,,,,0.0,,,
1,0aian1f,0 to 2,aian,f,604.0,2616.0,646021.0,40648.0,0.000000e+00,2713.0,...,,,,,,,,,,
2,0aian1f,0 to 2,aian,f,426.0,2128.0,826360.0,41723.0,0.000000e+00,1978.0,...,,,,,,,0.0,,,
3,0aian1f,0 to 2,aian,f,603.0,2613.0,645866.0,40587.0,2.018010e+13,2962.0,...,,,,,,,,,,
4,0aian1f,0 to 2,aian,f,1207.0,1694.0,277109.0,41700.0,2.015030e+13,1811.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21202,85asian2m,,,,,,,,,,...,,,,,,,,,,
21203,85black2f,,,,,,,,,,...,,,,,,,,,,
21204,85black2m,,,,,,,,,,...,,,,,,,,,,
21205,85other2f,,,,,,,,,,...,,,,,,,,,,


<IPython.core.display.Javascript object>