# Choropleth Test

In [3]:
import datetime
import pandas as pd
import numpy as np


import plotly
import plotly.express as px
from plotly import graph_objects as go


# Import Data

In [4]:
df = pd.read_csv("sanzo_dist_test.csv")
df["id"]  = df["retailer_name"] + " " + df["address"]

In [5]:
import pgeocode
nomi = pgeocode.Nominatim('us')

def fourtofive (x):
    if len(x)<5: 
        return "0" + x
    else:
        return x

df['zip_code5'] = df['zip_code'].astype(str)
df['zip_code5'] = df['zip_code5'].map(lambda x : fourtofive(x))



df['lat'] = (nomi.query_postal_code(df['zip_code5'].astype(str).tolist()).latitude)
df['long'] = (nomi.query_postal_code(df['zip_code5'].astype(str).tolist()).longitude)
df = df.sort_values(by="fct_date")



# Clean Data

In [6]:
#get long/lat from zip code
retailer_li = df["id"].unique().tolist()
dates = df["fct_date"].unique().tolist()

df_retailers = df.drop("fct_date", axis=1).drop_duplicates()

df_retailers
    



Unnamed: 0.1,Unnamed: 0,normalized_url,brand_source_id,retailer_name,store_source_id,address,city,state,zip_code,country,latitude,longitude,store_info,id,zip_code5,lat,long
712,712,drinksanzo.com,,Foodtown,1952,1291 Broadway,Brooklyn,NY,11221,USA,40.690968,-73.92630,<b>Foodtown</b><br><br>1291 Broadway<br>Brookl...,Foodtown 1291 Broadway,11221,40.6907,-73.9274
727,727,drinksanzo.com,,Foodtown,53,1420 Fulton St,Brooklyn,NY,11216,USA,40.679730,-73.94439,<b>Foodtown</b><br><br>1420 Fulton St<br>Brook...,Foodtown 1420 Fulton St,11216,40.6794,-73.9496
697,697,drinksanzo.com,,Foodtown,1952,1291 Broadway,Brooklyn,NY,11221,USA,40.690968,-73.92630,<b>Foodtown</b><br><br>1291 Broadway<br>Brookl...,Foodtown 1291 Broadway,11221,40.6907,-73.9274
1,1,drinksanzo.com,,Food Cellar,3019,4-85 47th Rd,Long Island City,NY,11101,USA,40.745300,-73.95593,<b>Food Cellar</b><br><br>4-85 47th Rd<br>Long...,Food Cellar 4-85 47th Rd,11101,40.7446,-73.9345
724,724,drinksanzo.com,,Foodtown,53,1420 Fulton St,Brooklyn,NY,11216,USA,40.679730,-73.94439,<b>Foodtown</b><br><br>1420 Fulton St<br>Brook...,Foodtown 1420 Fulton St,11216,40.6794,-73.9496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278,278,drinksanzo.com,,Whole Foods Market,WHB292935,200 Harker Pl Ste 100,Annapolis,MD,21401,US,38.981700,-76.53990,<b>Whole Foods Market</b><br><br>200 Harker Pl...,Whole Foods Market 200 Harker Pl Ste 100,21401,38.9898,-76.5501
277,277,drinksanzo.com,,Whole Foods Market,WHB292929,1425 Central Ave,Albany,NY,12205,US,42.707600,-73.82110,<b>Whole Foods Market</b><br><br>1425 Central ...,Whole Foods Market 1425 Central Ave,12205,42.7198,-73.8207
549,549,drinksanzo.com,,Whole Foods Market,WHB293353,110 Bloomingdale Rd,White Plains,NY,10605,US,41.027300,-73.75820,<b>Whole Foods Market</b><br><br>110 Bloomingd...,Whole Foods Market 110 Bloomingdale Rd,10605,41.0141,-73.7552
288,288,drinksanzo.com,,Whole Foods Market,WHB168857,2024 W Broad St,Richmond,VA,23220,US,37.557600,-77.46150,<b>Whole Foods Market</b><br><br>2024 W Broad ...,Whole Foods Market 2024 W Broad St,23220,37.5498,-77.4588


# Visualize

In [7]:
#scatter geo
df_s = df[df['fct_date']=="2021-03"]


fig = px.scatter_geo(
        data_frame = df_s,
        lat = 'lat',
        lon = 'long',
        color = 'retailer_name',
        animation_frame="fct_date", 
        animation_group="id"
        
)

fig.update_layout(
        geo_scope='usa'
)
fig.show()


In [75]:
import random
random.seed(10)

def gen_colors (x):
    buffer =[]
    for i in range(x):
        buffer.append( "%06x" % random.randint(0, 0xFFFFFF))
    return buffer

retailers = df['retailer_name'].unique().tolist()
retailer_colors = gen_colors(len(retailers))

retailer_color_di = dict(zip(retailers,retailer_colors))

color_di_df = pd.DataFrame.from_dict(retailer_color_di.items())
color_di_df[1] = '#' +color_di_df[1]
color_di_df['x'] = None

df['color'] = '#' + df['retailer_name'].map(retailer_color_di)

data_slider = []

for date in df['fct_date'].unique():
    df_segmented =  df[(df['fct_date']== date)]

    for col in df_segmented.columns:
        df_segmented[col] = df_segmented[col].astype(str)
    data_each_yr = dict(
                        type='scattergeo',
                        lat = df_segmented['lat'],
                        lon = df_segmented['long'],
                        text = df_segmented['id'],
                        marker = dict(
                            autocolorscale = True,
                            color = df_segmented['color']
                        ),
                        showlegend = False,
                        mode = 'markers',
                            

        )
    data_slider.append(data_each_yr)
    
    


steps = []
for i in range(len(data_slider)):
    step = dict(method='restyle',
                args=['visible', [False] * len(data_slider)],
                label="Period: {}".format(df['fct_date'].unique()[i])
    )
    step['args'][1][i] = True
    steps.append(step)

sliders = [dict(active=0, pad={"t": 1}, steps=steps)]

layout = dict(title ='Sanzo Distribution Map',
            sliders=sliders,
            geo = dict(
                scope='usa',
                projection=dict( type='albers usa' )
                ),
            #paper_bgcolor='rgba(0,0,0,0)',
            plot_bgcolor='rgba(0,0,0,0)'
            
)


legend = px.scatter(x=color_di_df[0],y=color_di_df['x'],color= color_di_df[0], color_discrete_sequence=color_di_df[1])

legend.update_xaxes(visible=False)
legend.update_yaxes(visible=False)
legend.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})

legend.write_image("bg.png")

fig = go.Figure(data=data_slider)
fig.layout = layout

import base64

def _get_image(path):
    with open(path, 'rb') as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode()
    encoded_img = f'data:image/png;base64,{encoded_string}'
    return encoded_img



pic=_get_image('bg.png')

fig.add_layout_image(
        dict(
            source=pic,
            xref='paper', yref='paper',
            x=0.65, y=-0.55,
            sizex=0.6, sizey=1.6,
            xanchor='center', yanchor='bottom'
        )
    )

fig.show()
#fig = dict(data=data_slider, layout=layout)

#plotly.offline.iplot(fig)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

