### Choropleth maps with plotly
Plotly comes with two built-in geometries which do not require an external GeoJSON file
Let's use it for the covid data. We see also how to use animation
Dataset: COVID 19 cases

In [2]:
import pandas as pd
import plotly.express as px
import numpy as np

covid = pd.read_csv('owid-covid-data.csv')
mask = covid['date'] > '2020-04-01'
covid=covid.loc[mask]


In [3]:
covid.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
38,AFG,Asia,Afghanistan,2020-04-02,240.0,43.0,22.857,4.0,0.0,0.143,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
39,AFG,Asia,Afghanistan,2020-04-03,275.0,35.0,26.286,8.0,4.0,0.714,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
40,AFG,Asia,Afghanistan,2020-04-04,300.0,25.0,27.571,10.0,2.0,0.857,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
41,AFG,Asia,Afghanistan,2020-04-05,338.0,38.0,31.429,12.0,2.0,1.143,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
42,AFG,Asia,Afghanistan,2020-04-06,368.0,30.0,31.714,15.0,3.0,1.571,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511


In [4]:
#plot
fig = px.choropleth(covid, locations="iso_code",
                    color="new_cases_smoothed",
                    hover_name="location",
                    animation_frame="date",
                    title = "Covid Cases plotted using Plotly")
 
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

#We can save the result as an HTML page
fig.write_html("myplot.html")



Exercises:
1) What is wrong in the visualization? try to fix it!
2) What's wrong with colors in animation? Try to fix it

documentation: https://plotly.github.io/plotly.py-docs/generated/plotly.express.choropleth.html

Different geographical plots are available. scatter_geo use dots for countries

In [5]:
mask = covid['date'] == '2021-03-28'
covids=covid.loc[mask]
covids =covids.dropna(subset=['total_deaths_per_million'])
covids =covids[covids['continent'] != 0]
covids =covids[covids['continent'] != ""]
 

fig = px.scatter_geo(covids, locations="iso_code",  animation_frame="date", 
                     color="continent",
                     title = "Covid Cases plotted using Plotly",
                     hover_name="location", size = covids['total_deaths_per_million'])

fig.update_geos(lataxis_showgrid=True, lonaxis_showgrid=True)

fig.show()

In [6]:
df = px.data.gapminder()
df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4


In [7]:
#combine choropleth and scatter (with animation)
#use gapminder data from plotly express distribution

df = px.data.gapminder()
fig = px.choropleth(df, locations="iso_alpha",
                    color="lifeExp", # lifeExp is a column of gapminder
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma,
                    animation_frame='year')
fig2 = px.scatter_geo(df, locations="iso_alpha",
                    size="gdpPercap", # gdpPercap is a column of gapminder
                    hover_name="country", # column to add to hover information
                    animation_frame='year')

fig.add_trace(fig2.data[0])
for i, frame in enumerate(fig.frames):
    fig.frames[i].data += (fig2.frames[i].data[0],)
fig.show()

px.line_geo: rows are points, which are then split into groups and connected by lines

In [12]:
import plotly.express as px
import geopandas as gpd
import shapely.geometry
import numpy as np

geo_df = gpd.read_file("zip://ne_50m_rivers_lake_centerlines.zip")

lats = []
lons = []
names = []

for feature, name in zip(geo_df.geometry, geo_df.name):
    if isinstance(feature, shapely.geometry.linestring.LineString):
        linestrings = [feature]
    elif isinstance(feature, shapely.geometry.multilinestring.MultiLineString):
        linestrings = feature.geoms
    else:
        continue
    for linestring in linestrings:
        x, y = linestring.xy
        lats = np.append(lats, y)
        lons = np.append(lons, x)
        names = np.append(names, [name]*len(y))
        lats = np.append(lats, None)
        lons = np.append(lons, None)
        names = np.append(names, None)

fig = px.line_geo(lat=lats, lon=lons, hover_name=names)
fig.show()

ModuleNotFoundError: No module named 'geopandas'

In [10]:
import pandas as pd
import requests
import plotly.express as px

regions = ['Piemonte', 'Trentino-Alto Adige', 'Lombardia', 'Puglia', 'Basilicata', 
           'Friuli Venezia Giulia', 'Liguria', "Valle d'Aosta", 'Emilia-Romagna',
           'Molise', 'Lazio', 'Veneto', 'Sardegna', 'Sicilia', 'Abruzzo',
           'Calabria', 'Toscana', 'Umbria', 'Campania', 'Marche']

# Create a dataframe with the region names
df = pd.DataFrame(regions, columns=['NOME_REG'])
# For demonstration, create a column with the length of the region's name
df['name_length'] = df['NOME_REG'].str.len()

# Read the geojson data with Italy's regional borders [enter image description here][2]from github

italy_regions_geo = 'regioni.geojson'

# Choropleth representing the length of region names
fig = px.choropleth(data_frame=df, 
                    geojson=italy_regions_geo, 
                    locations='NOME_REG', # name of dataframe column
                    featureidkey='properties.NOME_REG',  # path to field in GeoJSON feature object with which to match the values passed in to locations
                    color='name_length',
                    color_continuous_scale="Magma",
                    scope="europe",
                   )
fig.update_geos(showcountries=False, showcoastlines=False, showland=False, fitbounds="locations")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


In [14]:
vaccini= pd.read_csv('vaccini-summary-latest.csv')

In [15]:
# Choropleth representing the length of region names
fig = px.choropleth(data_frame=vaccini, 
                    geojson=italy_regions_geo, 
                    locations='nome_area', # name of dataframe column
                    featureidkey='properties.NOME_REG',  # path to field in GeoJSON feature object with which to match the values passed in to locations
                    color='percentuale_somministrazione',
                    color_continuous_scale="Magma",
                    scope="europe",
                   )
fig.update_geos(showcountries=False, showcoastlines=False, showland=False, fitbounds="locations")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


what's wrong in the previous visualization?