In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly

## Choropleth map

Making choropleth maps requires two main types of input:

1. Geometry information:
    1. This can either be a supplied GeoJSON file (or the URL to a GeoJSON file) where each feature has either an id field or some identifying value in properties; or
    1. one of the built-in geometries within plotly: US states and world countries (see below)
    
    
    
    
2. A list of values indexed by feature identifier.


The GeoJSON data is passed to the `geojson` argument, and the data is passed into the `color` argument of px.choropleth (z if using graph_objects), in the same order as the IDs are passed into the location argument.

In [17]:
df = pd.read_csv("../data/sports_providers.csv", decimal=",")

df[["x", "y"]] = df[["x", "y"]].astype("float")

In [7]:
###############
# Geo geojson
###############
import json

with open("../geojson.json") as f:
    districts = json.load(f)

district_code = {}
for d in districts["features"]:
    district_code[d["properties"]["Stadsdeel"]]=d["properties"]["Stadsdeel_code"]

df.loc[:, "Stadsdeel"] = df.Stadsdeel.str.replace("Zuid-Oost","Zuidoost")
df.loc[:, "district_code"] = df.Stadsdeel.map(district_code)

### Filter out all providers outside amsterdam
df = df[~df["district_code"].isna()]

### save csv to csv
# df.to_csv("../data/sports_provider_Amsterdam.csv", index=False)

In [21]:
sports_lst = df.Sport_en.unique().tolist()
len(sports_lst)

86

In [23]:
category_dict={}
for sport in sports_lst:
    if "dance" in sport:
        category_dict[sport]="Dance"

In [40]:
## Correct some translation
df.loc[df["Sport_en"] == "the box", "Sport_en"] = "boxing"

df.loc[df["Sport_en"] == "street dance", "Sport_en"]="dance"

for sport in df.Sport_en.unique().tolist():
    if "skat" in sport:
        df.loc[df["Sport_en"] == sport, "Sport_en"] = "skating"

df.loc[df["Sport_en"] == "diving", "Sport_en"]="underwater sports"

for sport in df.Sport_en.unique().tolist():
    if "fit" in sport:
        df.loc[df["Sport_en"] == sport, "Sport_en"] = "fitness"

df.loc[df["Sport_en"] == "mountain sports", "Sport_en"]="climbing"
df.loc[df["Sport_en"] == "klimsport", "Sport_en"]="climbing"


# ## save csv to csv
# df.to_csv("../data/sports_provider_Amsterdam.csv", index=False)

In [89]:
df=pd.read_csv("../data/sports_provider_Amsterdam.csv")
sports_lst = df.Sport_en.unique().tolist()
len(sports_lst)

80

### `px.choropleth` parameters

- locations 
- color
- color_continuous_scale
- locations
- featureidkey
- projection
- scope
- labels

In [2]:
df = pd.read_csv("../data/sports_provider_Amsterdam.csv")
df.head()

Unnamed: 0,Sport,Naam,Website,Naam accommodatie,Adres accommodatie,Postcode accommodatie,Plaats accommodatie,Stadsdeel,Aangepast sporten,KvK,Stadspas,x,y,Sport_en,district_code
0,aangepast sporten,Reade,www.reade.nl,Jan van Breemen instituut,Admiraal Helfrichstraat 1,1056AA,Amsterdam,Nieuw-West,ja,,,4.844408,52.37074,adapted sports,F
1,aangepast sporten,Friendship Sports Centre,www.friendshipsportscentre.nl,Friendship Sports Centre,Beemsterstraat 652,1027ED,Amsterdam,Noord,ja,34152839.0,,4.959847,52.400252,adapted sports,N
2,aangepast sporten,Only Friends,www.onlyfriends.nl,Sportpark De Weeren,Beemsterstraat 652,1027ED,Amsterdam,Noord,ja,50538063.0,ja,4.959847,52.400252,adapted sports,N
3,aangepast sporten,PWA Manege | Vereniging Paardrijden Gehandicapten,www.pwamanege.nl,Prins Willem-Alexander manege,Loosdrechtdreef 9,1108AZ,Amsterdam,Zuidoost,ja,40533448.0,,4.993231,52.314257,adapted sports,T
4,aangepast sporten,Terminators,www.amsterdamterminators.nl,Reade sporthal,Overtoom 283,1054HW,Amsterdam,West,ja,40539513.0,,4.866849,52.360379,adapted sports,E


In [5]:
district_count = pd.DataFrame(df.groupby("Stadsdeel").count()["Naam"]).reset_index()
district_count = district_count.rename(columns={"Stadsdeel": "District", "Naam": "Count"})
district_count

Unnamed: 0,District,Count
0,Centrum,61
1,Nieuw-West,178
2,Noord,119
3,Oost,207
4,West,127
5,Westpoort,1
6,Zuid,196
7,Zuidoost,78


In [12]:
import plotly.express as px
import json

with open("../geojson.json") as f:
    districts = json.load(f)
    
fig=px.choropleth(
                      district_count,
                      geojson=districts,
                      locations="District",
#                       color="Count",
#                       color_continuous_scale=pallette,
#                       range_color=(0,207),
#                       labels={"Count": "Number of Sports Providers"},
                      hover_name="District",
#                       hover_data=["Count"],
                      featureidkey="properties.Stadsdeel",
                      projection="mercator",
#                       title = "Sports Providers Accross Amsterdam<br>(Hover for the count in each district)"
)
fig.update_geos(visible=False, # hide the base map and frame.
                fitbounds="locations") #automatically zoom the map to show just the area of interest.
fig.show()

In [46]:
import plotly.express as px
import plotly.graph_objects as go

import json
import pandas as pd

with open("../geojson.json") as f:
    geojson = json.load(f)
    
df = pd.read_csv("../data/sports_provider_Amsterdam.csv")

sport_template = dict(
    layout = go.Layout(font=dict(
                            family="Old Standard TT",
                            ),
                       paper_bgcolor="white",
                       hoverlabel=dict(
                                   bordercolor="black",
                                   bgcolor ="white",
                                   font_size=15,
                                   font_family="Rockwell"),

                       title=dict(xanchor="center",
                                  yanchor="top",
                                  yref="paper",
                                  ),
                       coloraxis_colorbar=dict(
                                  outlinewidth = 0)
                      ))

def plot_district_choropleth(pallete="magenta", 
                             df=df, 
                             geojson=geojson,
                             template=sport_template):
    '''
    pallette examples:
        - blues, magenta, burg, purpor, teal, inferno,
          purp, tealgrn
    '''
    district_count = pd.DataFrame(df.groupby("Stadsdeel").count()["Naam"]).reset_index()
    district_count = district_count.rename(columns={"Stadsdeel": "District", "Naam": "Count"})
    district_count["text"] = district_count["District"] + '<br>' + '<br>'+district_count["Count"].astype(str) + " Sports Providers in the District."
#     district_count["Count"]

    fig=px.choropleth(district_count,
                      geojson=geojson,
                      locations="District",
                      color="Count",
                      color_continuous_scale=pallette,
                      range_color=(1,207),
                      labels={"Count": "Number of Sports Providers"},
                      hover_name="District",
                      hover_data=["Count"],
                      featureidkey="properties.Stadsdeel",
                      projection="mercator",
                      title = "Amsterdam Sports Provider Concentration<br>(Hover for break down)")
    fig.update_geos(visible=False, # hide the base map and frame.
                    fitbounds="locations") #automatically zoom the map to show just the area of interest.
   
    fig.update_traces(go.Choropleth(
                    hovertemplate = district_count["text"],
                    marker_line_color='white',
                ))

    fig.update_layout(margin={"r":0,"t":100,"l":0,"b":0},
                      template=template)

    return fig

In [48]:
fig=plot_district_choropleth("")
fig.show()

In [76]:
import numpy as np
def plot_treemap_all(pallete="curl", df=df, template=sport_template):
    fig = px.treemap(df,
                     path = ["All", "Sport_en"],
                     color = "Sport_count",
                     color_continuous_scale=pallete,
                     range_color = [1, 75],
                     hover_name = "Sport_en",
                     color_continuous_midpoint=np.average(df["Sport_count"]),
                     maxdepth=3
                    )

    fig.update_traces(go.Treemap(
        textinfo = "label",
        texttemplate = "%{label}<br><br>Percentage: %{percentParent:.1%} <br>Count: %{value}<br>",
        hovertemplate = "  %{label}  ",
        outsidetextfont = {"size": 20},
    )
                     )

    fig.update_layout(
        title = dict(
            text = "Sports Providers in Amsterdam <br> (Click Area to Expand)"),
        coloraxis_colorbar=dict(
            title="Counts",
            tickvals=[10,30, 50, 70]),
        template=template
    )
    return fig

In [77]:
plot_treemap_all()

In [82]:
def plot_treemap_district(pallete="brwnyl", df=df, template=sport_template):

    fig = px.treemap(df,
                     path = ["All", 'Stadsdeel',"Sport_en"],
                     color = "sport_count_in_district",
                     color_continuous_scale=pallete,
                     range_color = [1, 22],
                     hover_name = "Sport_en",
                     color_continuous_midpoint=np.average(df["sport_count_in_district"]),
                     maxdepth=3
                    )

    fig.update_traces(go.Treemap(
        hovertemplate = "%{label}",
        texttemplate = "%{label}<br><br>Percentage: %{percentParent:.1%} <br>Count: %{value}<br>",
        outsidetextfont = {"size": 20}
    )
                     )

    fig.update_layout(
        title = dict(
            text = "Sports Providers in different City Districts<br>(Click Area to Expand)",),
        coloraxis_colorbar=dict(
            title="Counts",
        ),
        margin=dict(l=20, t=100),
        template=template,
    )
    
    return fig

In [84]:
plot_treemap_district("curl")

### folium

In [13]:
district_count

Unnamed: 0,District,Count
0,Centrum,61
1,Nieuw-West,178
2,Noord,119
3,Oost,207
4,West,127
5,Westpoort,1
6,Zuid,196
7,Zuidoost,78


In [15]:
# Import libraries
import pandas as pd
import numpy as np
from folium.features import DivIcon
import folium
  
# Initialize the map: 52.3676° N, 4.9041° E
m = folium.Map(location=[52.3676, 4.9041], zoom_start=11)
 
# Add the color for the chloropleth:
m.choropleth(
 geo_data=districts,
 name='choropleth',
 data=district_count,
 columns=['District', 'Count'],
 key_on='feature.properties.Stadsdeel',
 fill_color='OrRd',
 fill_opacity=0.7,
 line_color="white",
 legend_name='Number of Sports Providers'
)
folium.LayerControl().add_to(m)


for district in districts["features"]:
    
    lon, lat = np.array(district["geometry"]["coordinates"]).mean(axis=1).flatten()
    name = district["properties"]["Stadsdeel"]
    folium.map.Marker(
        [lat, lon],
        icon=DivIcon(
            icon_size=(8,8),
            icon_anchor=(15,10),
            html=f'<div style="font-size: 10pt; text_algnment=center;">{name}</div>',
            )
        ).add_to(m)

# Save to html
# m.save('../plots/choropleth_folium.html')


In [16]:
m

## scatter geo

In [19]:
import plotly.express as px
px.set_mapbox_access_token(open("../mapbox_token").read())
fig = px.scatter_mapbox(df, lat="y", lon="x", 
                        color="Sport_en",
                        hover_name="Naam",
                        animation_group="Sport_en",
                        color_continuous_scale=px.colors.cyclical.IceFire, size_max=15, zoom=10)
fig.show()

**Reflection**:
>It's quite messy.


**To Do**:
- [ ] aggregate some categories.
- [ ] add filter function to display one category of sports at a time.

### Add animation_frame

In [107]:
import plotly.express as px
px.set_mapbox_access_token(open("../mapbox_token").read())
fig = px.scatter_mapbox(df, lat="y", lon="x", 
                        color="Sport_en",
                        hover_name="Naam",
                        animation_frame="Sport_en",
                        title="sports in amsterdam",
                        color_continuous_scale=px.colors.cyclical.IceFire, size_max=15, zoom=10)
fig.show()

**Reflection**:
- the mechanism of slider is not consistant with syntax meaning of the category.

**To do**:
- [ ] add a drop down button to choose only certain categories.

### set up one trace and a button option for each individual trace

**Buttons(a list of dict):**
> - args
> - method
> - label
> - visibel

**Updatemenu(a list of dict):**
> - buttons
> - direction
> - showactive


In [30]:
df=pd.read_csv("../data/sports_provider_Amsterdam.csv")
df.head()

Unnamed: 0,Sport,Naam,Website,Naam accommodatie,Adres accommodatie,Postcode accommodatie,Plaats accommodatie,Stadsdeel,Aangepast sporten,KvK,Stadspas,x,y,Sport_en,district_code,Count,Meta_Cat,Sub_Cat,All
0,aangepast sporten,Reade,www.reade.nl,Jan van Breemen instituut,Admiraal Helfrichstraat 1,1056AA,Amsterdam,Nieuw-West,ja,,,4.844408,52.37074,adapted sports,F,1,physical sports,others,All
1,aangepast sporten,Friendship Sports Centre,www.friendshipsportscentre.nl,Friendship Sports Centre,Beemsterstraat 652,1027ED,Amsterdam,Noord,ja,34152839.0,,4.959847,52.400252,adapted sports,N,1,physical sports,others,All
2,aangepast sporten,Only Friends,www.onlyfriends.nl,Sportpark De Weeren,Beemsterstraat 652,1027ED,Amsterdam,Noord,ja,50538063.0,ja,4.959847,52.400252,adapted sports,N,1,physical sports,others,All
3,aangepast sporten,PWA Manege | Vereniging Paardrijden Gehandicapten,www.pwamanege.nl,Prins Willem-Alexander manege,Loosdrechtdreef 9,1108AZ,Amsterdam,Zuidoost,ja,40533448.0,,4.993231,52.314257,adapted sports,T,1,physical sports,others,All
4,aangepast sporten,Terminators,www.amsterdamterminators.nl,Reade sporthal,Overtoom 283,1054HW,Amsterdam,West,ja,40539513.0,,4.866849,52.360379,adapted sports,E,1,physical sports,others,All


In [40]:
import plotly.graph_objects as go

px.set_mapbox_access_token(open("../mapbox_token").read())
fig = px.scatter_mapbox(df,
                        lat="y", 
                        lon="x", 
                        color="Sport_en",
#                         opacity=0.5,
                        hover_name="Naam",
                        hover_data=["Sport", "Naam accommodatie","Postcode accommodatie", "Website"],
                        color_continuous_scale=px.colors.cyclical.IceFire, 
                        size_max=15, 
                        zoom=10)

fig.update_layout(title_text="The Distribution of Sport Providers across Amsterdam",
                  showlegend=False)

# button with one option for each dataframe
buttons= []

for cat in df.Sub_Cat.unique().tolist():
    buttons.append(dict(method="restyle",
                        label=cat,
                        visible=True,
                        args=[{"lat": [df[df.Sub_Cat == cat]["y"]],
                               "lon": [df[df.Sub_Cat == cat]["x"]],
                               "color":[df[df.Sub_Cat == cat]["Naam"]]
                             }]
                       )
                  )

# some adjustments to the updatemenus
updatemenu=[]
updatemenu.append(dict(buttons=buttons,
                      direction="down",
                      showactive=True
                      )
                 )

# add dropdown menus to the figure
fig.update_layout(updatemenus=updatemenu)
fig.show()

### Turn Legend into Buttons
[doc](https://plotly.com/python/legend/)

In [36]:
import plotly.express as px
px.set_mapbox_access_token(open("../mapbox_token").read())
fig = px.scatter_mapbox(df, lat="y", lon="x", 
                        color="Sub_Cat",
                        hover_name="Naam",
                        animation_group="Sport_en",
                        color_continuous_scale=px.colors.cyclical.IceFire, size_max=15, zoom=10)
fig.update_traces(visible='legendonly')
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1,
    bgcolor="LightSteelBlue",
    bordercolor="Black",
    borderwidth=2
))
fig.show()