# Title will come here.

In [21]:
from bokeh.resources import INLINE
import bokeh.io
BOKEH_RESOURCES=INLINE
bokeh.io.output_notebook(INLINE) 

import holoviews as hv
from holoviews import opts, dim
import pandas as pd
import numpy as np
import ast

In [2]:
df = pd.read_csv('cbg_patterns.csv')
df = df[~df.census_block_group.isnull()]
df.census_block_group = df.census_block_group.astype(int)
df.visitor_home_cbgs = df.visitor_home_cbgs.apply(lambda x: ast.literal_eval(x))
df.top_brands = df.top_brands.apply(lambda x: set(ast.literal_eval(x)))
df.index = df.census_block_group

In [3]:
top_brands = df.top_brands
brand_dict = {}
for brand_list in top_brands:
    for brand in brand_list:
        brand_dict[brand] = brand_dict.get(brand, 0) + 1
top_10_brands =set(sorted(list(brand_dict), key=lambda key: brand_dict[key])[-20:])

In [4]:
df.top_brands = df.top_brands.apply(lambda x: x.intersection(top_10_brands))
df = df[df.top_brands.map(lambda d: len(d)) > 0]
places_of_interest = set(df.index)
df.visitor_home_cbgs = df.visitor_home_cbgs.apply(lambda x: {k:v for k,v in x.items() if int(k) in places_of_interest})

In [5]:
brand_matrix = pd.DataFrame(0, index = top_10_brands, columns = top_10_brands)
for i in range(0, len(df)):
    visits = df.iloc[i].visitor_home_cbgs
    for dest_brand in df.iloc[i].top_brands:
        for visit in visits.items():
            visitor_home = int(visit[0])
            visit_frequency = visit[1]
            for orig_brand in df.loc[visitor_home].top_brands:
                brand_matrix[dest_brand][orig_brand] += visit_frequency

In [6]:
brand_names = list(brand_matrix.index)
brand_array = brand_matrix.to_numpy()
links_array = np.empty([brand_array.shape[0] * brand_array.shape[1], 3], dtype=int) 
for i in range(brand_array.shape[0]):
    for j in range(brand_array.shape[1]):
        index = i * brand_array.shape[1] + j
        links_array[index][0] = i
        links_array[index][1] = j
        links_array[index][2] = brand_array[i][j] if i != j else 0

column_names = ["source", "target", "value"]
links = pd.DataFrame(links_array, columns = column_names)

In [22]:
hv.extension('bokeh')
hv.output(size=200)
renderer = hv.renderer('bokeh')

In [23]:
brands = [{'name': name.capitalize()} for name in brand_names] 
nodes = hv.Dataset(pd.DataFrame(brands), 'index')
chord = hv.Chord((links, nodes)).select(value=(1, None))
hv.renderer('bokeh').save(chord, 'chords')

In [24]:
chord.opts(
    opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), 
               labels='name', node_color=dim('index').str()))