In [54]:
# import bokeh

from bokeh.models import (
    ColumnDataSource,
    DatetimeTickFormatter,
    HoverTool,
)
from bokeh.io import output_notebook, show, save
from bokeh.plotting import figure
import numpy as np
import pandas as pd
import sqlite3


def db_connect():
    conn = sqlite3.connect('output/sales_new_seed.db')
    return conn


In [68]:
year_query = "SELECT * FROM sales AS s WHERE strftime('%Y-%m', s.sale_date) = '2013-01'"

base_query = "SELECT * FROM sales LIMIT 100"

batch_month_totals = '''
                        SELECT sale_date,
                        SUM(sale_value) AS sale_value
                        FROM sales
                        WHERE strftime('%Y-%m', sale_date) = '2014-01'
                        GROUP BY sale_date;'''

sum_for_month_by_store = '''
                        SELECT sale_date,
                        sales.store_id,
                        SUM(sale_value) AS sale_value
                        FROM sales
                        INNER JOIN stores ON stores.store_id = sales.store_id
                        WHERE strftime('%Y-%m', sale_date) = '2016-01'
                        GROUP BY sales.store_id;'''

sum_for_month_by_county = '''
                        SELECT stores.county_id AS county_id,
                        counties.county_name AS county_name,
                        SUM(sale_value) as sale_value 
                        FROM sales 
                        INNER JOIN stores ON stores.store_id = sales.store_id
                        INNER JOIN counties ON counties.county_id = stores.county_id
                        WHERE strftime('%Y', sale_date) = '2016'
                        GROUP BY stores.county_id ORDER BY stores.county_id;'''

sql_frame = pd.read_sql_query(sum_for_month_by_county, db_connect())

values = [(x/100)/1000 for x in sql_frame['sale_value'].tolist()]

# dates = [ts.date() for ts in sql_frame['sale_date'].tolist()]

# stores = [store for store in sql_frame['store_id'].tolist()]

county_ids = [int(county) for county in sql_frame['county_id']]

county_names = [county for county in sql_frame['county_name']]

# sql_frame.sort_values(by=['sale_date']).head()

data = {
    'sale_sum': values,
    'county_ids': county_ids,
    'county_names': county_names   
}

data_source = ColumnDataSource(data)

sql_frame.head()


99


Unnamed: 0,county_id,county_name,sale_value
0,1,Adair,39038291
1,2,Adams,7507097
2,3,Allamakee,77719846
3,4,Appanoose,73808681
4,5,Audubon,12380714


In [69]:
output_notebook()

hover = HoverTool(tooltips=[
    ('county', '@county_names'),
    ('2016 Sales Sum', '@sale_sum')
])

plot = figure(plot_width=600, plot_height=600, tools=[hover])

plot.vbar(x='county_ids', width=0.5, bottom=0, top='sale_sum', source=data_source)
# plot.xaxis.formatter=DatetimeTickFormatter(days=['%m/%d/%Y'])

show(plot)