In [1]:
import agate
import warnings
from decimal import *

warnings.filterwarnings(action='ignore')

g_table_geos = agate.Table.from_csv('../data/county-geos.csv')

def is_float(string):
    try:
        # True if string is a number contains a dot
        return float(string) and '.' in string
    # String is not a number
    except ValueError:
        return False

# Get percent of filters in the table
# The filters and table are parameters
# f is short for filter
def perc_calc(table, f_col, fs):
  try:
    f_sum = 0
    f_tables = []
    f_string = ""
    
    for i in fs:
        f_table = table.aggregate( agate.Count(f_col, i) )
        f_tables.append(f_table)
        f_sum += f_table
    
    for index,i in enumerate(fs):
        f_perc = round(f_tables[index] / f_sum * 100, 2)
        f_string += i + ': ' + str(f_perc) + ', '
    
    return f_string

  except ValueError:
    return False

def perc_global(table_one, table_two, classify_column, classify_column_two, perc_column, filter_column, filters, limit):
    try:
        # Join with geos
        table_geos = table_one.join(g_table_geos, classify_column, 'county', inner=True)
        table_defs_geos = table_two.join(g_table_geos, classify_column_two, 'county', inner=True)
        table_join = table_geos.join(table_defs_geos, 'geo', 'geo', inner=True)
    
        # If something didn't join right, print it out
        if table_join:
            table_join_null = table_join.where(lambda row: row[filter_column] == None)
            
            if table_join_null:
                return "not everything joined"
        
        # Select only a few rows, get only integer or floats
        # And order
        table_filter = table_join.select((classify_column, perc_column, filter_column))
        table_filter_digit = table_filter.where(lambda row: row[perc_column].isdigit() or is_float(row[perc_column]) )
        table_order = table_filter_digit.order_by(lambda row: row[perc_column])
        table_top = table_order.limit(limit)
        
        print('Entire table')
        print( perc_calc(table_order, filter_column, filters) )
        print('-')
        print( 'Top ' + str(limit) )
        print( perc_calc(table_top, filter_column, filters) )
    except ValueError:
        return False
    
class AddBuckets(agate.Computation):
    def get_computed_data_type(self, table):
        return agate.Number()
    
    def run(self, table):
        new_column = []
        
        for index, row in enumerate(table.rows):
            match = False
            
            for i in filters_all:
                split = i.split('-')
                row_data = int(row[filter_column_og])
                
                if row_data >= int(split[0]) and row_data < ( int(split[1]) + 1):
                    new_column.append(i)
                    match = True
            
            if not match:
                print(row[filter_column_og])
                
        return new_column
    
class AddPercent(agate.Computation):
    def get_computed_data_type(self, table):
        return agate.Number()
    
    def run(self, table):
        new_column = []
        
        for index, row in enumerate(table.rows):
            perc = round(row[table_two_perc] / row[table_two_perc_total] * 100, 2)
            
            new_column.append(perc)
                
        return new_column

In [2]:
table_one = agate.Table.from_csv('../data/prenatal/prenatal-county-historical.csv')
table_two = agate.Table.from_csv('../data/prenatal/rural-urban-defs.csv')
classify_column = 'county'
classify_column_two = 'county'
perc_column = 'percent_2014'
filter_column =  'Urbanization Status'
filters = ['Rural', 'Urban']
limit = 20

perc_global(table_one, table_two, classify_column, classify_column_two, perc_column, filter_column, filters, limit)

Entire table
Rural: 65.67, Urban: 34.33, 
-
Top 20
Rural: 80.0, Urban: 20.0, 


In [3]:
table_one = agate.Table.from_csv('../data/prenatal/prenatal-county-historical.csv')
table_two = agate.Table.from_csv('../data/prenatal/rural-urban-defs.csv')
classify_column = 'county'
classify_column_two = 'county'
perc_column = 'percent_2014'
filter_column =  'Border Status'
filters =  ['Border', 'Non-Border']
limit = 20

perc_global(table_one, table_two, classify_column, classify_column_two, perc_column, filter_column, filters, limit)

Entire table
Border: 12.02, Non-Border: 87.98, 
-
Top 20
Border: 30.0, Non-Border: 70.0, 


In [4]:
table_one = agate.Table.from_csv('../data/prenatal/prenatal-county-historical.csv')
table_two = agate.Table.from_csv('../data/uninsured/ACS_15_5YR_S2701.csv')
classify_column = 'county'
classify_column_two = 'Geography'
filter_column_og =  'Percent Insured; Estimate; Civilian noninstitutionalized population'
perc_column = 'percent_2014'
filter_column =  'bucket'
filters_all =  ['0-70', '71-100']
filters_shown =  ['0-70', '71-100']
limit = 20

table_two_add = table_two.compute([
  ( filter_column, AddBuckets() )
])

perc_global(table_one, table_two_add, classify_column, classify_column_two, perc_column, filter_column, filters_shown, limit)

Entire table
0-70: 5.98, 71-100: 94.02, 
-
Top 20
0-70: 20.0, 71-100: 80.0, 


In [5]:
table_one = agate.Table.from_csv('../data/prenatal/prenatal-county-historical.csv')
table_two = agate.Table.from_csv('../data/poverty/nhgis0015_ds215_20155_2015_county.csv')
table_two_perc = 'ADNHE002'
table_two_perc_total = 'ADNHE001'
classify_column = 'county'
classify_column_two = 'COUNTY'
filter_column_og =  'percent'
perc_column = 'percent_2014'
filter_column =  'bucket'
filters_all =  ['0-19', '20-100']
filters_shown =  ['0-19', '20-100']
limit = 20

table_two_add = table_two.compute([
    ( filter_column_og, AddPercent() ),
])

table_two_add_two = table_two_add.compute([
    ( filter_column, AddBuckets() )
])

perc_global(table_one, table_two_add_two, classify_column, classify_column_two, perc_column, filter_column, filters_shown, limit)

Entire table
0-19: 79.91, 20-100: 20.09, 
-
Top 20
0-19: 60.0, 20-100: 40.0, 
