In [22]:
# Install microdf in Google Colab: https://colab.research.google.com/notebooks/intro.ipynb
# !pip install git+https://github.com/PSLmodels/microdf.git

Collecting git+https://github.com/PSLmodels/microdf.git
  Cloning https://github.com/PSLmodels/microdf.git to /tmp/pip-req-build-xrqygvuu
  Running command git clone -q https://github.com/PSLmodels/microdf.git /tmp/pip-req-build-xrqygvuu


In [23]:
import pandas as pd
import numpy as np
import microdf as mdf
import plotly.express as px

In [24]:
person = pd.read_csv(
    'https://github.com/ngpsu22/Maryland/raw/main/usa_00012.csv.gz',
    dtype = {
        'SERIAL': 'str',
        'STATEFIP':'str',
        'COUNTYFIP':'str',
        }
                     )
person.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3239553 entries, 0 to 3239552
Data columns (total 8 columns):
 #   Column     Dtype  
---  ------     -----  
 0   SERIAL     object 
 1   STATEFIP   object 
 2   COUNTYFIP  object 
 3   PERWT      float64
 4   AGE        int64  
 5   HIUNPERS   int64  
 6   FTOTINC    int64  
 7   POVERTY    int64  
dtypes: float64(1), int64(4), object(3)
memory usage: 197.7+ MB


In [25]:
person.columns = person.columns.str.lower()

In [26]:
# Sort to just Maryland
person = person[person['statefip'] == '24']

In [27]:
person = person.replace(9999999,0)

In [28]:
# Calculate OPM Threshold
opm_base = 12_490
opm_inc = 4_420
person['pov_thresh'] = opm_base + (opm_inc * (person.hiunpers - 1))

In [29]:
person['child'] = person.age < 18
person['young_child'] = person.age < 5
person['baby'] = person.age == 0

In [30]:
# Use groupby to calculate total babies, young children, and children in each family
spmu = person.groupby(['serial'])[['child', 'young_child', 'baby']].sum()
spmu.columns = ['spm_children', 'spm_young_children', 'spm_babies']
# merge back onto the person dataframe
person = person.merge(spmu, left_on =['serial'], right_index=True)

In [31]:
def pov(reform, countyfip):
  if countyfip == 'Maryland':
      tp = person.copy(deep=True) 
  else:
    tp = person[person.countyfip==countyfip].copy(deep=True)

  if reform == 'All Children':
    tp['total_ca'] = tp.spm_children * 100 * 12
  
  if reform == 'Young Children':
    tp['total_ca'] = tp.spm_young_children * 100 * 12
  
  if reform == 'Babies':
    tp['total_ca'] = tp.spm_babies * 1_000

  tp['new_income'] = tp.total_ca + tp.ftotinc
  tp['still_poor'] = tp.new_income < tp.pov_thresh

  #populations
  population = (tp.perwt).sum()
  child_population = (tp.child * tp.perwt).sum()
  young_child_population = (tp.young_child * tp.perwt).sum()
  baby_population = (tp.baby * tp.perwt).sum()

  #orginal poverty rates
  tp['poor'] = tp.ftotinc < tp.pov_thresh

  total_poor = (tp.poor * tp.perwt).sum()
  total_pov_rate = (total_poor / population)

  total_child_poor = (tp.child * tp.poor * tp.perwt).sum()
  child_pov_rate = (total_child_poor / child_population)

  total_young_child_poor = (tp.young_child * tp.poor * tp.perwt).sum()
  young_child_pov_rate = (total_young_child_poor / young_child_population)

  total_baby_poor = (tp.baby * tp.poor * tp.perwt).sum()
  baby_pov_rate = (total_baby_poor / baby_population)

  # new poverty rates
  new_total_poor = (tp.still_poor * tp.perwt).sum()
  new_total_pov_rate = (new_total_poor / population)

  new_total_child_poor = (tp.child * tp.still_poor * tp.perwt).sum()
  new_child_pov_rate = (new_total_child_poor / child_population)

  new_total_young_child_poor = (tp.young_child * tp.still_poor * tp.perwt).sum()
  new_young_child_pov_rate = (new_total_young_child_poor / young_child_population)

  new_total_baby_poor = (tp.baby * tp.still_poor * tp.perwt).sum()
  new_baby_pov_rate = (new_total_baby_poor / baby_population)

  # percent change
  total_pov_change = ((new_total_poor - total_poor) / (total_poor) * 100).round(1)
  child_pov_change = ((new_total_child_poor - total_child_poor) / (total_child_poor) * 100).round(1)
  young_child_pov_change = ((new_total_young_child_poor - total_young_child_poor) / (total_young_child_poor) * 100).round(1)
  baby_pov_change = ((new_total_baby_poor - total_baby_poor) / (total_baby_poor) * 100).round(1)
  
  return pd.Series([total_pov_change,
          child_pov_change,
          young_child_pov_change,
          baby_pov_change,
          population,
          child_population,
          young_child_population,
          baby_population
                    ])

In [32]:
counties = person.countyfip.unique().tolist()
summary = mdf.cartesian_product({
                       'reform':['All Children', 'Young Children', 'Babies'],
                       'countyfip': ['Maryland'] + counties})

In [33]:
def pov_row(row):
  return pov(row.reform, row.countyfip)

In [34]:
summary[['total_pov_change',
          'child_pov_change',
          'young_child_pov_change',
          'baby_pov_change',
         'population',
         'child_population',
         'young_child_population',
         'baby_population']] = summary.apply(pov_row, axis=1)



In [35]:
summary

Unnamed: 0,reform,countyfip,total_pov_change,child_pov_change,young_child_pov_change,baby_pov_change,population,child_population,young_child_population,baby_population
0,All Children,Maryland,-5.8,-11.7,-10.9,-10.6,6045680.0,1331209.0,356194.0,64250.0
1,All Children,31,-14.7,-26.0,-28.1,-18.6,1051117.0,241996.0,64039.0,12238.0
2,All Children,0,-4.1,-11.3,-14.2,0.0,659813.0,136105.0,32263.0,6448.0
3,All Children,510,-2.6,-5.6,-3.4,0.0,592700.0,119518.0,36541.0,5615.0
4,All Children,3,-6.8,-17.0,-10.6,,579812.0,128337.0,35305.0,6250.0
5,All Children,17,-7.6,-19.5,-51.7,,163136.0,38862.0,8883.0,1076.0
6,All Children,5,-5.8,-10.2,-1.3,0.0,827171.0,178159.0,48009.0,8079.0
7,All Children,33,-3.6,-6.8,-1.1,0.0,909328.0,201632.0,59103.0,10640.0
8,All Children,27,-0.8,0.0,0.0,0.0,325382.0,78910.0,19487.0,4762.0
9,All Children,15,-17.3,-28.5,-23.5,0.0,102627.0,22219.0,5656.0,714.0
