In [1]:
import warnings
import agate
from pathlib import Path
from shutil import copyfile
import os

warnings.filterwarnings(action='ignore')
    
def join_tables(fp, classify_column, i, columns_i):            
    try:
        table = agate.Table.from_csv(fp, encoding='utf-8-sig')
        
        # Not our first time through
        # Join tables
        if Path(g_table_merge_fp).is_file():
            print("file exists")
            # Take our global table
            # And our table getting passed through the function
            # give them both geo ids
            # then join to make one big, beautiful table
            table_one = agate.Table.from_csv(g_table_merge_fp, encoding='utf-8-sig')
            # table_one.print_table()
            table_geos = table_one.join(g_table_geos, classify_column, 'county',
                                    inner=True,
                                    columns=columns_i)
            
            # table_geos.select(('geo')).print_table()
            table_geos_two = table.join(g_table_geos, classify_column, 'county',
                                    inner=True,
                                    columns=columns_i)
            
            table_join = table_geos.join(table_geos_two, 'geo', 'geo')
            # If something didn't join right, print it out
            if table_join:
                table_join_null = table_join.where(lambda row: row[classify_column] == None)

                if table_join_null:
                    print("Not everything joined. Table saved to tmp dir.")
                    table_join.to_csv('../tmp/table-not-joined.csv')
                
            print("saving joined table")
            table_join.exclude(
                ['geo2', 'geo2_2', 'geo2_3', 'county2', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l']
            ).to_csv(g_table_merge_fp)
        # First time through
        # Don't join tables
        else:
            print("file doesn't exist")
            copyfile(fp, g_table_merge_fp)
        
    except ValueError:
        return False

g_table_geos = agate.Table.from_csv('../data/county-geos.csv')

ss_array = ['2010', '2011', '2012', '2013', '2014']
all_categories = ['mortality-rate', 'population']
  
for category in all_categories:
    print('---');
    print('Category: ' + category);
        
    g_table_merge_fp = '../edits/' + category + '/01-merge.csv'
        
    # Remove file so we can create new one
    if Path(g_table_merge_fp).is_file():
        os.remove(g_table_merge_fp)
        
    for i in ss_array:
        print(i);
    
        fp = '../data/' + category + '/' + i + '-county.csv'
        classify_column = 'county'
        
        if category == 'mortality-rate':
            columns = ['geo', 'births_' + i, 'deaths_' + i, 'rate_' + i]
        elif category == 'population':
            columns = ['geo', 'race_female_total_' + i, 'race_female_white_' + i, 'race_female_black_' + i, 'race_female_hispanic_' + i, 'race_female_other_' + i]
        
        join_tables(fp, classify_column, i, columns)

---
Category: mortality-rate
2010
file doesn't exist
2011
file exists
saving joined table
2012
file exists
saving joined table
2013
file exists
saving joined table
2014
file exists
saving joined table
---
Category: population
2010
file doesn't exist
2011
file exists
saving joined table
2012
file exists
saving joined table
2013
file exists
saving joined table
2014
file exists
saving joined table


In [112]:
def join_tables_geo(table_one, table_two, output_fp):
    table_join = table_one.join(table_two, 'geo', 'geo',
                            inner=True)
    
    table_join.exclude(['county2']).to_csv(output_fp)
    
table_one = agate.Table.from_csv('../edits/mortality-rate/01-merge.csv')
table_two = agate.Table.from_csv('../edits/population/01-merge.csv')
output_fp = '../edits/mortality-rate/02-merge-population.csv'

join_tables_geo(table_one, table_two, output_fp)

In [110]:
g_table = agate.Table.from_csv('../edits/mortality-rate/02-merge-population.csv')
g_table_fp = '../edits/mortality-rate/03-rates-population-10-14.csv'

class Rate(agate.Computation):
    def get_computed_data_type(self, table):
        return agate.Number()

    def run(self, table):
        new_column = []
        
        for row in table.rows:
            sums = dict()
            
            # Convert nones to years
            years = ['2010', '2011', '2012', '2013', '2014']
            
            for year in years:
                # Categories
                for column in columns_add:
                    if column is not 'rate' and 'perc' not in column:
                        c_row = row[column + '_' + year]
                    
                        if c_row == None or c_row == '-':
                            if column in sums:
                                sums[column] += 0
                            else:
                                sums[column] = 0
                        else:
                            if column in sums:
                                sums[column] += c_row
                            else:
                                sums[column] = c_row
            
            if column_add is 'births' or 'race_female' in column_add:
                if 'perc' not in column_add:
                    new_column.append( sums[column_add] )
                # If we're calculating percentages for races
                else:
                    column_trim = column_add.replace('_perc', '')
                    percent = round(sums[column_trim] / sums['race_female_total'] * 100, 2)
                    new_column.append(percent)
            elif column_add is 'deaths' or column_add is 'rate':
                # if there are zero of this demographic, append zero
                if sums['deaths'] == 0:
                    new_column.append(0)
                # otherwise calculate percent
                else:
                    if column_add is 'deaths':
                        new_column.append(sums['deaths'])
                    elif column_add is 'rate':
                        percent = round(sums['deaths'] / sums['births'] * 100000, 2)
                        new_column.append(percent)
            elif 'race' in column_add:
                print('RACE')

        print('Adding column: ' + column_add + '_2010_2014')
        return new_column

columns_add = ['births', 'deaths', 'rate', 'race_female_total', 'race_female_white', 'race_female_white_perc', 'race_female_black', 'race_female_black_perc', 'race_female_hispanic', 'race_female_hispanic_perc', 'race_female_other', 'race_female_other_perc']
column_add = columns_add[0]

table_add = g_table.compute([
    ( column_add + '_2010_2014', Rate() )
])

for index, column in enumerate(columns_add):
    if index > 0:
        column_add = column
    
        table_add = table_add.compute([
          ( column + '_2010_2014', Rate() )
        ])

        if index is len(columns_add) - 1:
            table_add.to_csv(g_table_fp)

Adding column: births_2010_2014
Adding column: deaths_2010_2014
Adding column: rate_2010_2014
Adding column: race_female_total_2010_2014
Adding column: race_female_white_2010_2014
Adding column: race_female_white_perc_2010_2014
Adding column: race_female_black_2010_2014
Adding column: race_female_black_perc_2010_2014
Adding column: race_female_hispanic_2010_2014
Adding column: race_female_hispanic_perc_2010_2014
Adding column: race_female_other_2010_2014
Adding column: race_female_other_perc_2010_2014


In [15]:
fp = '../data/' + category + '/' + i + '-county.csv'
classify_column = 'county'
    
join_tables(fp, classify_column, i, columns)