In [1]:
import os
import pandas as pd
import numpy as np
from datetime import timedelta

# Function to convert space values to -1
def space_to_int(value):
    if value == ' ':
        return -1
    else:
        return int(value)  

# Path to data file and information to process data file
data_file = 'C:/Users/Lucas/Documents/NDACAN SRI/Data Request/fc_all.csv'
columns_to_use = (0,3,6,7,42,94)
data_types = {'RecNumbr':str, 'InAtEnd':np.bool}
convert_dict = {i:space_to_int for i in ['CURPLSET'] }

# Load data with reasonable chunksize iterator
chunksize_param = 500000

data_load = pd.read_csv(
    data_file,
    usecols=columns_to_use,
    dtype=data_types,
    converters=convert_dict,
    chunksize=chunksize_param)

data_list = []

for i, data in enumerate(data_load):
    print(str(i*chunksize_param))
    data_list.append(data)

print('Concatenating...')
data_df = pd.concat(data_list)
print('Complete!')

# Rename the first column because of byte-order-mark thing at beginning
data_df.rename(columns={'\ufeffDataYear':'DataYear'},inplace=True)

# Test that it all worked
data_df.head()

0
500000
1000000
1500000
2000000
2500000
3000000
3500000
4000000
4500000
5000000
5500000
6000000
6500000
7000000
7500000
8000000
8500000
9000000
9500000
10000000
10500000
11000000
Concatenating...
Complete!


Unnamed: 0,DataYear,St,FIPSCODE,RecNumbr,CURPLSET,InAtEnd
0,2000,AL,8,1907,5,True
1,2000,AL,8,1997,3,False
2,2000,AL,1073,2014,5,False
3,2000,AL,8,2074,6,False
4,2000,AL,8,3214,5,True


In [34]:
# Number of children in care at end of FFY by Year / State&FIPS with kin and all

data_df['Kin'] = (data_df['CURPLSET'] == 2) & (data_df['InAtEnd'])
data_df['All'] = (data_df['CURPLSET'].isin([1,2,3,4,5])) & (data_df['InAtEnd'])
dataset_name = 'inatend_kin_by_year_fips'

result_df = data_df.groupby(['DataYear','St','FIPSCODE'])['Kin','All'].sum()

# Percent of children in care at end of FFY by Year / State&FIPS with kin
result_df['PercentKin'] = result_df['Kin'] / result_df['All']
result_df = result_df.unstack(0)

# Merge on FIPS code labels
fips_file = 'C:/Users/Lucas/Documents/NDACAN SRI/Data Request/FIPS_final.csv'
fips_df = pd.read_csv(fips_file)

result_df = result_df.merge(fips_df[['St','FIPSCODE','County_Name']], how='left', right_on=['St','FIPSCODE'], left_index=True)
result_df.fillna('--', inplace=True)
result_df.set_index(['St','FIPSCODE','County_Name'], inplace=True)

result_df.columns = pd.MultiIndex.from_tuples(list(result_df.columns))
result_df = result_df.stack()

# Save to csv file
output_name = 'C:/Users/Lucas/Documents/NDACAN SRI/Analysis/' + dataset_name + '.csv'
result_df.to_csv(output_name)