Runing the following to install microdf in colab:

In [None]:
# Install microdf
# !pip install git+https://github.com/PSLmodels/microdf.git

Collecting git+https://github.com/PSLmodels/microdf.git
  Cloning https://github.com/PSLmodels/microdf.git to /tmp/pip-req-build-_zt74wzk
  Running command git clone -q https://github.com/PSLmodels/microdf.git /tmp/pip-req-build-_zt74wzk


In [None]:
import pandas as pd
import numpy as np
import microdf as mdf
import plotly.express as px

In [None]:
person = pd.read_stata(
    "https://www2.census.gov/programs-surveys/supplemental-poverty-measure/datasets/spm/spm_2019_pu.dta",
    columns=[
        "serialno",
        "sporder",
        "wt",
        "age",
        "spm_id",
        "spm_povthreshold",
        "spm_resources",
        "st",
        "puma"
    ],
)

In [None]:
# Cleanup
person.columns = person.columns.str.lower()
person = person.rename(columns={'serialno': 'serial', 'sporder':'pernum'})

In [None]:
person = person.astype({"serial":'int', "pernum":'int',
                          "wt":'int', "age":'int',
                          "spm_id":'int', "spm_povthreshold":'int',
                          "spm_resources": "int"}) 

In [None]:
# Sort to just Maryland
person = person[person['st'] == 24]

In [None]:
# assign random district
person['district'] = np.random.randint(1, 48, person.shape[0])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
# Assign random county
person['county'] = np.random.randint(1, 25, person.shape[0])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
# Replace NIUs
person = person.replace(9999999,0)

In [None]:
# Define age groups
person['child'] = person.age < 18
person['young_child'] = person.age < 5
person['baby'] = person.age == 0

In [None]:
# Use groupby to calculate total babies, young children, and children in each spm unit
spmu = person.groupby(['spm_id'])[['child', 'young_child', 'baby']].sum()
spmu.columns = ['spm_children', 'spm_young_children', 'spm_babies']
# merge back onto the person dataframe
person = person.merge(spmu, left_on =['spm_id'], right_index=True)

In [None]:
# Consider three reforms
#1 a $100 universal child allowance (0-17)
#2 a $100 universal young child allowance (0-4)
#3 a $1,000 baby bonus given upon the birth of a child

def pov(reform, district):
  if district == 'Maryland':
      tp = person.copy(deep=True) 
  else:
    tp = person[person.district==district].copy(deep=True)

  if reform == 'All Children':
    tp['total_ca'] = tp.spm_children * 100 * 12
  
  if reform == 'Young Children':
    tp['total_ca'] = tp.spm_young_children * 100 * 12
  
  if reform == 'Babies':
    tp['total_ca'] = tp.spm_babies * 1_000

  tp['new_resources'] = tp.total_ca + tp.spm_resources	
  tp['still_poor'] = tp.new_resources < tp.spm_povthreshold

  # populations
  population = (tp.wt).sum()
  child_population = (tp.child * tp.wt).sum()
  young_child_population = (tp.young_child * tp.wt).sum()
  baby_population = (tp.baby * tp.wt).sum()

  # orginal poverty rates
  tp['poor'] = tp.spm_resources < tp.spm_povthreshold

  total_poor = (tp.poor * tp.wt).sum()
  total_pov_rate = (total_poor / population)

  total_child_poor = (tp.child * tp.poor * tp.wt).sum()
  child_pov_rate = (total_child_poor / child_population)

  total_young_child_poor = (tp.young_child * tp.poor * tp.wt).sum()
  young_child_pov_rate = (total_young_child_poor / young_child_population)

  total_baby_poor = (tp.baby * tp.poor * tp.wt).sum()
  baby_pov_rate = (total_baby_poor / baby_population)

  # new poverty rates
  new_total_poor = (tp.still_poor * tp.wt).sum()
  new_total_pov_rate = (new_total_poor / population)

  new_total_child_poor = (tp.child * tp.still_poor * tp.wt).sum()
  new_child_pov_rate = (new_total_child_poor / child_population)

  new_total_young_child_poor = (tp.young_child * tp.still_poor * tp.wt).sum()
  new_young_child_pov_rate = (new_total_young_child_poor / young_child_population)

  new_total_baby_poor = (tp.baby * tp.still_poor * tp.wt).sum()
  new_baby_pov_rate = (new_total_baby_poor / baby_population)

  # percent change
  total_pov_change = ((new_total_poor - total_poor) / (total_poor) * 100).round(1)
  child_pov_change = ((new_total_child_poor - total_child_poor) / (total_child_poor) * 100).round(1)
  young_child_pov_change = ((new_total_young_child_poor - total_young_child_poor) / (total_young_child_poor) * 100).round(1)
  baby_pov_change = ((new_total_baby_poor - total_baby_poor) / (total_baby_poor) * 100).round(1)
  
  return pd.Series([total_pov_change,
          child_pov_change,
          young_child_pov_change,
          baby_pov_change,
          population,
          child_population,
          young_child_population,
          baby_population,
          total_pov_rate,
          child_pov_rate,
          young_child_pov_rate,
          baby_pov_rate,
          new_total_pov_rate,
          new_child_pov_rate,
          new_young_child_pov_rate,
          new_baby_pov_rate,
                    ])

In [None]:
districts = person.district.unique().tolist()
summary = mdf.cartesian_product({
                       'reform':['All Children', 'Young Children', 'Babies'],
                       'district': ['Maryland'] + districts})

In [None]:
def pov_row(row):
  return pov(row.reform, row.district)

In [None]:
summary[['total_pov_change',
          'child_pov_change',
          'young_child_pov_change',
          'baby_pov_change',
         'population',
         'child_population',
         'young_child_population',
         'baby_population',
          'total_pov_rate',
          'child_pov_rate',
          'young_child_pov_rate',
          'baby_pov_rate',
          'new_total_pov_rate',
          'new_child_pov_rate',
          'new_young_child_pov_rate',
          'new_baby_pov_rate',]] = summary.apply(pov_row, axis=1)



In [None]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [None]:
summary.to_csv('skeleton_district_data.csv')

In [None]:
summary

Unnamed: 0,reform,district,total_pov_change,child_pov_change,young_child_pov_change,baby_pov_change,population,child_population,young_child_population,baby_population,total_pov_rate,child_pov_rate,young_child_pov_rate,baby_pov_rate,new_total_pov_rate,new_child_pov_rate,new_young_child_pov_rate,new_baby_pov_rate
0,All Children,Maryland,-9.6,-20.4,-20.3,-19.6,5905181.0,1328582.0,356194.0,64250.0,0.129266,0.148904,0.159096,0.179984,0.116842,0.118525,0.126726,0.144638
1,All Children,9,-7.9,-15.5,-3.4,0.0,128170.0,30547.0,6229.0,1295.0,0.143076,0.212263,0.298764,0.2,0.131817,0.179363,0.28865,0.2
2,All Children,36,-8.8,-5.9,-18.5,,118111.0,25978.0,7076.0,916.0,0.147522,0.1723,0.20195,0.0,0.134509,0.162099,0.1645,0.0
3,All Children,31,-8.7,-26.5,-21.3,0.0,123836.0,29648.0,6155.0,434.0,0.138441,0.165745,0.173517,0.633641,0.126425,0.121897,0.136637,0.633641
4,All Children,45,-13.5,-26.6,-43.0,0.0,129847.0,29560.0,8558.0,998.0,0.124031,0.156563,0.162889,0.05511,0.107288,0.114953,0.092779,0.05511
5,All Children,43,-10.3,-18.6,-31.5,0.0,128282.0,29760.0,8281.0,1548.0,0.124834,0.18172,0.323391,0.365633,0.111917,0.147849,0.221592,0.365633
6,All Children,32,-5.2,-8.9,-13.3,0.0,127150.0,26958.0,9632.0,1567.0,0.146834,0.141776,0.215947,0.239311,0.139158,0.129127,0.187189,0.239311
7,All Children,16,-10.4,-27.9,-13.8,,122772.0,27415.0,7270.0,538.0,0.127912,0.142148,0.16575,0.0,0.114611,0.102426,0.142916,0.0
8,All Children,28,-8.7,-13.3,0.0,0.0,125571.0,28702.0,5695.0,535.0,0.127131,0.171451,0.166813,0.185047,0.116126,0.1487,0.166813,0.185047
9,All Children,15,-10.8,-17.4,-12.5,0.0,124990.0,28565.0,7709.0,1691.0,0.130954,0.155365,0.211441,0.263749,0.116857,0.128339,0.185108,0.263749
