In [288]:
import warnings
import agate
from pathlib import Path
from shutil import copyfile
import os
import csv
from decimal import *

warnings.filterwarnings(action='ignore')

g_table_geos = agate.Table.from_csv('../data/county-geos.csv')

# If we already have a geo column
def join_tables_geo(table_one, table_two, output_fp):
    table_join = table_one.join(table_two, 'geo', 'geo',
                            inner=True)
    
    table_join.exclude(['county2']).to_csv(output_fp)
    
# If we already have a geo column in one spreadsheet
# table_one is the table with the geo column
def join_tables_geo_one(table_one, table_two, classify_column, output_fp):
    table_two_geos = table_two.join(g_table_geos, classify_column, 'county', inner=True)
    table_join = table_one.join(table_two_geos, 'geo', 'geo', inner=True)
    
    table_join.exclude(['county2']).to_csv(output_fp)
    
# Join two tables without the geo column
def join_tables(fp, classify_column, classify_column_two, select_columns, rename_columns):            
    try:
        # Take our global table
        # And our table getting passed through the function
        # give them both geo ids
        # then join to make one big, beautiful table
        table_one = agate.Table.from_csv(fp, encoding='utf-8-sig')
        table_two = agate.Table.from_csv(g_table_merge_fp, encoding='utf-8-sig')
        
        table_one.to_csv('../tmp/table-one.csv')
        table_two.to_csv('../tmp/table-two.csv')
        
        table_geos = g_table_geos.join(table_one, 'county', classify_column,
                                inner=True)
        table_geos_two =  g_table_geos.join(table_two, 'county', classify_column_two,
                                inner=True)
        
        table_geos.to_csv('../tmp/table-geos.csv')
        table_geos_two.to_csv('../tmp/table-geos-two.csv')
        
        table_join = table_geos.join(table_geos_two, 'geo', 'geo')

        # If something didn't join right, print it out
        if table_join:
            table_join_null = table_join.where(lambda row: row[classify_column] == None)

            if table_join_null:
                print("Not everything joined. Table saved to tmp dir.")
                table_join.to_csv('../tmp/table-not-joined.csv')


        print("Saving joined table")
        table_final = table_join.exclude(
            ['geo2', 'county2', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'geo_two', 'geo_two2', 'GISJOIN2']
        )
        
        # If we've include columns to only include
        if select_columns:
            table_final = table_final.select(select_columns)
            
        # If we're renaming columns
        if rename_columns:
            table_final = table_final.rename(column_names = rename_columns)

        table_final.to_csv(g_table_output)
        
    except ValueError:
        return False
    
def dict_check_add(key, cat, add, categories_dict, classification, row):
    try:
        if classification == 'ob-hospitals':
            if cat == 'NAV' or cat == '' or int(cat) == 0:
                cat = 'No OB hospital(s)'
            else:
                cat = 'OB hospital(s)'
        
        dicts = [ categories_dict[cat], categories_dict['Total'] ]
        
        # if key == 'prenatal_eligible':
            # print(categories_dict['Total'])
            
        for c_dict in dicts:
            # Ignore if we don't have full results for county
            if add == '*' or add == '' or add == '-':
                add_up = False
            else:
                add_up = True
                
            if classification == 'ob-hospitals' and key == 'prenatal_eligible':
                add = round( float(row[3]) / (1 - ( (100 -  float(add) ) / 100) ) )
                    
            if add_up:
                if key not in c_dict:
                    c_dict[key] = float(Decimal(add))
                else:
                    c_dict[key] += float(Decimal(add))
    except ValueError:
        return False
          
# Calculate maternal mortality, prenatal rates
# For rural/urban
# Border/non-border
def csv_calcuations(input_f, output_f, categories_dict, classification):
    try:
        with open(input_f, 'r') as inf, open(output_f, 'w') as outf:
            outcsv = csv.writer(outf, delimiter=',')
            
            # Skip header
            has_header = csv.Sniffer().has_header(inf.read(20048))
            inf.seek(0)
            incsv = csv.reader(inf)
            if has_header:
                next(incsv)
            
            for row in incsv:
                if '10-14' in input_f:
                    categories = ['count', 'births', 'deaths', 'prenatal_eligible', 'prenatal', 'race_female_total', 'race_female_white', 'race_female_black', 'race_female_hispanic', 'race_female_other']
                else:
                    categories = ['count', 'prenatal', 'prenatal_eligible', 'births', 'deaths']
                
                cat_loc = 3
                classify_row = row[2]
                
                if row[cat_loc]:
                    for category in categories:
                        if category is not 'count':
                            dict_check_add(category, classify_row, row[cat_loc], categories_dict, classification, row)
                            cat_loc += 1
                        else:
                            dict_check_add(category, classify_row, 1, categories_dict, classification, row)           

            # Create csv
            if '10-14' in input_f:
                csv_header = ['category', 'count', 'births', 'deaths', 'prenatal_eligible', 'prenatal', 'race_female_white', 'race_female_black', 'race_female_hispanic', 'race_female_other', 'race_female_white_perc', 'race_female_black_perc', 'race_female_hispanic_perc', 'race_female_other_perc', 'mortality_rate', 'prenatal_rate']
            else: 
                csv_header = ['category', 'count', 'births', 'deaths', 'prenatal_eligible', 'prenatal', 'mortality_rate', 'prenatal_rate']
            outcsv.writerow(csv_header)

            for key, value in categories_dict.items():
                mortality_rate = round((value['deaths'] / value['births']) * 100000, 2)
                prenatal_rate = round((value['prenatal'] / value['prenatal_eligible']) * 100, 2)
                
                if '10-14' in input_f:
                    races = ['white', 'black', 'hispanic', 'other']
                    races_perc = {}
                
                    for race in races:
                        percent = round(value['race_female_' + race] / value['race_female_total'] * 100, 2)
                        
                        races_perc[race] = percent
                
                print(key)
                print(value)
                
                print("Row created")
                if '10-14' in input_f:
                    outcsv.writerow([ key, value['count'], value['births'], value['deaths'], value['race_female_white'], value['race_female_black'], value['race_female_hispanic'], value['race_female_other'], value['prenatal_eligible'], value['prenatal'], races_perc['white'], races_perc['black'], races_perc['hispanic'], races_perc['other'], mortality_rate, prenatal_rate ])
                else:
                    outcsv.writerow([ key, value['count'], value['births'], value['deaths'], value['prenatal_eligible'], value['prenatal'], mortality_rate, prenatal_rate ])
    except ValueError:
        return False

In [82]:
categories = ['urbanization', 'border']

for category in categories:
    g_table_merge_fp = '../data/county-geos.csv'
    table_one = agate.Table.from_csv('../edits/mortality-rate/03-rates-population-10-14.csv')
    table_two = agate.Table.from_csv('../edits/' + category + '-status/01-' + category + '-status.csv')
    g_table_output = '../edits/mortality-rate/04-rates-population-10-14-' + category + '.csv'
    classify_column = 'county'

    join_tables_geo_one(table_one, table_two, classify_column, g_table_output)

    g_table_merge_fp = g_table_output
    fp = '../edits/prenatal/02-sum-eligible.csv'
    g_table_output = '../edits/02-mortality-prenatal-' + category + '-10-14-population.csv'
    select_columns = ['geo', 'county', category.capitalize() + ' Status', 'births_2010_2014', 'deaths_2010_2014', 'prenatal_eligible_2010_2014', 'prenatal_sum_2010_2014', 'race_female_total_2010_2014', 'race_female_white_2010_2014', 'race_female_black_2010_2014', 'race_female_hispanic_2010_2014', 'race_female_other_2010_2014']

    join_tables(fp, classify_column, classify_column_two, select_columns, False)

Saving joined table
Saving joined table


In [22]:
year = '2014'

g_table_merge_fp = '../edits/hospitals/01-county-totals-14.csv'
fp = '../edits/mortality-rate/04-rates-population-10-14-urbanization.csv'
classify_column = 'county'
classify_column_two = 'county'
select_columns = ['geo', 'county', 'Obstetrics Beds', 'births_' + year, 'deaths_' + year, 'rate_' + year, 'Urbanization Status']
g_table_output = '../edits/02-mortality-ob-hospitals-14-population.csv'

join_tables(fp, classify_column, classify_column_two, select_columns, False)

Saving joined table


In [68]:
year = '2014'

fp = '../edits/02-mortality-ob-hospitals-14-population.csv'
g_table_merge_fp = '../data/prenatal/raw/' + year + '-county.csv'
classify_column = 'county'
classify_column_two = 'Geographic Area'
select_columns = ['geo', 'county', 'Obstetrics Beds', 'Total Number', 'Total Percent', 'births_' + year, 'deaths_' + year, 'rate_' + year, 'Urbanization Status']
g_table_output = '../edits/02-mortality-prenatal-ob-hospitals-14-population.csv'
rename_columns = {'Geographic Area': 'county', 'Total Number': 'prenatal_' + year, 'Total Percent': 'prenatal_percent_' + year}

join_tables(fp, classify_column, classify_column_two, select_columns, rename_columns)

Saving joined table


In [289]:
# all_categories = ['urbanization', 'border', 'ob-hospitals']
# all_categories = ['urbanization', 'border']
# all_categories = ['urbanization', 'ob-hospitals']
all_categories = ['ob-hospitals']

for a_category in all_categories:
    if a_category == 'ob-hospitals':
        years = '14'
    else:
        years = '10-14'
        
    g_table_input = '../edits/02-mortality-prenatal-' + a_category + '-' + years + '-population.csv'
    g_table_output = '../output/mortality-prenatal-' + a_category + '-' + years + '-population-split.csv'
    
    # Categories we want to divide by
    # These are the values in the spreadsheets
    categories_dict = {}
    
    if a_category is 'urbanization':
        categories_dict['Urban'] = {}
        categories_dict['Rural'] = {}
    elif a_category is 'border':
        categories_dict['Border'] = {}
        categories_dict['Non-Border'] = {}
    elif a_category == 'ob-hospitals':
        categories_dict['OB hospital(s)'] = {}
        categories_dict['No OB hospital(s)'] = {}
    categories_dict['Total'] = {}
    
    csv_calcuations(g_table_input, g_table_output, categories_dict, a_category)

OB hospital(s)
{'count': 111.0, 'prenatal': 217086.0, 'prenatal_eligible': 351868.0, 'births': 371639.0, 'deaths': 124.0}
Row created
No OB hospital(s)
{'count': 143.0, 'prenatal': 15912.0, 'prenatal_eligible': 26087.0, 'births': 27542.0, 'deaths': 15.0}
Row created
Total
{'count': 254.0, 'prenatal': 232998.0, 'prenatal_eligible': 14134.0, 'births': 399181.0, 'deaths': 139.0}
Row created
