In [72]:
from bokeh.io import (
    output_notebook,
    show,
    )

from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    LinearColorMapper,
    LogColorMapper
    )

from bokeh.palettes import Greys256 as palette

from bokeh.plotting import figure

import json

import numpy as np
import pandas as pd
import sqlite3


def db_connect():
    conn = sqlite3.connect('output/sales_new_seed.db')
    return conn


In [73]:
year_query = "SELECT * FROM sales AS s WHERE strftime('%Y-%m', s.sale_date) = '2013-01'"

base_query = "SELECT * FROM sales LIMIT 100"

batch_month_totals = '''
                        SELECT sale_date,
                        SUM(sale_value) AS sale_value
                        FROM sales
                        WHERE strftime('%Y-%m', sale_date) = '2014-01'
                        GROUP BY sale_date;'''

sum_for_month_by_store = '''
                        SELECT sale_date,
                        sales.store_id,
                        SUM(sale_value) AS sale_value
                        FROM sales
                        INNER JOIN stores ON stores.store_id = sales.store_id
                        WHERE strftime('%Y-%m', sale_date) = '2016-01'
                        GROUP BY sales.store_id;'''

sum_for_month_by_county = '''
                        SELECT stores.county_id AS county_id,
                        counties.county_name AS county_name,
                        SUM(sale_value) as sale_value 
                        FROM sales 
                        INNER JOIN stores ON stores.store_id = sales.store_id
                        INNER JOIN counties ON counties.county_id = stores.county_id
                        WHERE strftime('%Y', sale_date) = '2012'
                        GROUP BY stores.county_id ORDER BY stores.county_id;'''

sql_frame = pd.read_sql_query(sum_for_month_by_county, db_connect())

# convert penny values to dollars
sql_frame.loc[:,'sale_value'] /= 100

print('sql_loaded')

sql_loaded


In [74]:
values = [x for x in sql_frame['sale_value'].tolist()]

county_ids = [int(county) for county in sql_frame['county_id']]

county_names = [county for county in sql_frame['county_name']]

data = {
    'sale_sum': values,
    'county_ids': county_ids,
    'county_names': county_names   
}

data_source = ColumnDataSource(data)

sql_frame.head()


Unnamed: 0,county_id,county_name,sale_value
0,1,Adair,413691.48
1,2,Adams,96674.89
2,3,Allamakee,785647.44
3,4,Appanoose,687274.18
4,5,Audubon,163577.03


In [75]:
output_notebook()

hover = HoverTool(tooltips=[
    ('county', '@county_names'),
    ('2016 Sales Sum', '@sale_sum')
])

bar_county_sales_plot = figure(plot_width=600, plot_height=600, tools=[hover])

bar_county_sales_plot.vbar(x='county_ids', width=0.5, bottom=0, top='sale_sum', source=data_source)

show(bar_county_sales_plot)

In [76]:
def counties_by_state_number(raw_data):
    counties = []
    for state in raw_geo_data['features']:
        # as soon as the Iowa feauture is found, nuke all the other states
        if state['properties']['STATE'] == '19':
            counties.append(state)
    return counties

def extract_single_county_patch(raw_geojson):
        
    manual_x = []
    manual_y = []
    
    for coord in raw_geojson[0]:
        manual_x.append(coord[0])
        manual_y.append(coord[1])
    
    return (manual_x, manual_y)
    

def manually_build_patches(raw_geojson):
    manual_xs = []
    manual_ys = []
    names = []
    
    for feature in raw_geojson['features']:
        names.append(feature['properties']['NAME'])
        manual_x, manual_y = extract_single_county_patch(feature['geometry']['coordinates'])
        manual_xs.append(manual_x)
        manual_ys.append(manual_y)
        names.append
    
    return (manual_xs, manual_ys, names)

def build_sale_val_array(df, target_map_data):
    
    county_df = df.set_index('county_name')
    max_value = df['sale_value'].max()
    arranged_sale_values = []
    
    for county in target_map_data:
        
        county_sale_value = county_df.loc[county][1]
        arranged_sale_values.append(county_sale_value)
    
    return arranged_sale_values

with open('input\geojson\counties_high_res_gz_2010_us_050_00_500k.json', 'r') as raw_state_json:
    # returns python object from the raw json file data
    raw_geo_data = json.loads(raw_state_json.read())

# replace raw geo data with only Iowa counties
raw_geo_data['features'] = counties_by_state_number(raw_geo_data['features'])

# build the lists of data in the correct orders for our ColumnData
manual_x, manual_y, county_names = manually_build_patches(raw_geo_data)

# biulds a sale_value array aligned in the correct order to match the geo data counties
sale_values = build_sale_val_array(sql_frame, county_names)

# source dictionary to pass to ColumnDataSource 
manual_data_for_map = {'xs': manual_x, 'ys': manual_y, 'county_name': county_names, 'sale_value': sale_values}

# map data
manual_col_map_data = ColumnDataSource(manual_data_for_map)

hover = HoverTool(tooltips=[
    ("name", "@county_name"),
    ("annual salers", "@sale_value")
])

color_mapper = LinearColorMapper(palette=palette)

iowa_counties_map = figure(title="Iowa Counties Manual",
                           tools=[hover], x_axis_location=None,
                           y_axis_location=None,
                           width=500,
                           height=300
                          )

iowa_counties_map.grid.grid_line_color = None

iowa_counties_map.patches('xs', 'ys', 
                          fill_color={
                              'field': 'sale_value',
                              'transform': color_mapper
                          },
                          fill_alpha=0.7, line_color='white',
                          line_width=0.5,
                          source=manual_col_map_data
                         )

output_notebook()
show(iowa_counties_map)
