# Notebook to replicate 2018 ALF study table

In [447]:
import numpy as np
import numpy.random as npr
import pandas as pd

import json
import requests

import pickle

import time

import matplotlib
import matplotlib.pyplot as plt

import glob
import os

## Importing

In [448]:
df = pd.read_csv('../alf-datasets/national/national-dataset-48states-with-coords-and-county-8-28-21.csv')

In [449]:
path = "../alf-datasets/national/county-data/"
# test = pd.read_csv()

In [450]:
# gets county names, state, fips code, etc...
county_basic_info_df = pd.read_csv(glob.glob(os.path.join(path, '*'))[0]).iloc[:,:4]

# gets info for each dataset in county-data folder
county_dfs = pd.concat([pd.read_csv(loc).iloc[:,4] for loc in glob.glob(os.path.join(path, '*'))], axis=1)

county_df = pd.concat([county_basic_info_df, county_dfs], axis=1)

county_df['Formatted FIPS'] = county_df['FIPS Code'].astype(str).str.zfill(5)

In [451]:
# county_df

## Calculating ALF penetration and separating zero and non-zero ALF penetration counties

In [452]:
# dictionary of 65+ population per county FIPS
county_pop_dict = dict(zip(county_df['Formatted FIPS'], county_df['Estimated number of people 65 or older, between 2015-2019.']))

# calculating ALF penetration and adding to county_df
county_df['ALF Penetration'] = [1000 * df[df['County FIPS'] == float(fips)]['Capacity'].sum() / county_pop_dict[fips] if fips in county_pop_dict and fips != '00nan' else 0 for fips in county_df['Formatted FIPS']]

In [453]:
print('There are {} counties total'.format(county_df.shape[0]))

There are 3245 counties total


In [454]:
county_df_non_zero = county_df[~county_df['ALF Penetration'].isna()]
county_df_non_zero = county_df_non_zero[county_df_non_zero['ALF Penetration'] != 0]

county_df_zero = county_df[county_df['ALF Penetration'] == 0]

In [455]:
print('There are {} counties with non-zero ALF penetration'.format(county_df_non_zero.shape[0]))
print('There are {} counties 0 ALF penetration'.format(county_df_zero.shape[0]))

There are 1846 counties with non-zero ALF penetration
There are 1385 counties 0 ALF penetration


## Caluclating quartiles

In [456]:
quartiles = np.quantile(county_df_non_zero['ALF Penetration'].values, [0, 1/4, 2/4, 3/4])
quartiles



array([ 0.06235969, 12.06632066, 21.08905164, 31.74016622])

## Calculating averages and std per quartile

In [469]:
# county_df_zero.shape
#county_df_non_zero[county_df_non_zero['ALF Penetration'].between(quartiles[0], quartiles[1])].shape
# county_df_non_zero[county_df_non_zero['ALF Penetration'].between(quartiles[1], quartiles[2])].shape
# county_df_non_zero[county_df_non_zero['ALF Penetration'].between(quartiles[2], quartiles[3])].shape
# county_df_non_zero[county_df_non_zero['ALF Penetration'] > quartiles[3]].shape

In [458]:
names = county_df_non_zero[county_df_non_zero['ALF Penetration'] > quartiles[3]].mean().index.values

In [459]:
a1, a2 = county_df_zero.mean(), county_df_zero.std()
b1, b2 = county_df_non_zero[county_df_non_zero['ALF Penetration'].between(quartiles[0], quartiles[1])].mean(), county_df_non_zero[county_df_non_zero['ALF Penetration'].between(quartiles[0], quartiles[1])].std()
c1, c2 = county_df_non_zero[county_df_non_zero['ALF Penetration'].between(quartiles[1], quartiles[2])].mean(), county_df_non_zero[county_df_non_zero['ALF Penetration'].between(quartiles[1], quartiles[2])].std()
d1, d2 = county_df_non_zero[county_df_non_zero['ALF Penetration'].between(quartiles[2], quartiles[3])].mean(), county_df_non_zero[county_df_non_zero['ALF Penetration'].between(quartiles[2], quartiles[3])].std()
e1, e2 = county_df_non_zero[county_df_non_zero['ALF Penetration'] > quartiles[3]].mean(), county_df_non_zero[county_df_non_zero['ALF Penetration'] > quartiles[3]].std()


## Putting data into table and storing it

In [460]:
all_quartile_columns = [a1,a2,b1,b2,c1,c2,d1,d2,e1,e2]
all_quartile_columns_cleaned = [col.values.reshape(1,-1) for col in all_quartile_columns]

In [461]:
table = pd.DataFrame(np.concatenate((all_quartile_columns_cleaned)).T, index = names)

In [462]:
table.to_csv('../alf-datasets/national/replicated-table-raw.csv')