#### Spatial Data

In [1]:
# import libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

#### Worldwide Choropleth Map

In [2]:
# read the internet data usage
df = pd.read_csv('internet_usage.csv')

##### Sanity Checks

In [3]:
df.shape

(6056, 4)

In [4]:
df.head()

Unnamed: 0,Country,Code,Year,Individuals using the Internet (% of population)
0,Afghanistan,AFG,1990,0.0
1,Afghanistan,AFG,2001,0.004723
2,Afghanistan,AFG,2002,0.004561
3,Afghanistan,AFG,2003,0.087891
4,Afghanistan,AFG,2004,0.105809


In [5]:
df.tail()

Unnamed: 0,Country,Code,Year,Individuals using the Internet (% of population)
6051,Zimbabwe,ZWE,2012,12.0
6052,Zimbabwe,ZWE,2013,15.5
6053,Zimbabwe,ZWE,2014,16.36474
6054,Zimbabwe,ZWE,2015,22.742818
6055,Zimbabwe,ZWE,2016,23.119989


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6056 entries, 0 to 6055
Data columns (total 4 columns):
 #   Column                                            Non-Null Count  Dtype  
---  ------                                            --------------  -----  
 0   Country                                           6056 non-null   object 
 1   Code                                              4912 non-null   object 
 2   Year                                              6056 non-null   int64  
 3   Individuals using the Internet (% of population)  6056 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 189.4+ KB


In [7]:
# rename column
df.rename(columns = {'Individuals using the Internet (% of population)': 'Internet_Usage'}, inplace = True)

In [8]:
# check column name change
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6056 entries, 0 to 6055
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Country         6056 non-null   object 
 1   Code            4912 non-null   object 
 2   Year            6056 non-null   int64  
 3   Internet_Usage  6056 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 189.4+ KB


In [9]:
# get data only for the year 2016
df_2016 = df.query('Year == 2016')

In [10]:
# create a choropleth using Plotly
fig = px.choropleth(data_frame = df_2016,
                    locations = 'Code',
                    color = 'Internet_Usage',
                    hover_name = 'Country', # this makes it interactive
                    color_continuous_scale = px.colors.sequential.Jet,
                    width = 1000,
                    height = 1000
)
fig.show()

In [11]:
# add features, focus on specific continent
fig = px.choropleth(data_frame = df_2016,
                    locations = 'Code',
                    color = 'Internet_Usage',
                    hover_name = 'Country',
                    color_continuous_scale = px.colors.sequential.Jet
)

fig.update_layout(title_text = 'Internet Usage Across the World in 2016',
                  width = 1000,
                  height = 1000,
                  geo_scope = 'asia')

fig.show()

In [12]:
# set the projection type
fig = px.choropleth(data_frame = df_2016,
                    locations = 'Code',
                    color = 'Internet_Usage',
                    hover_name = 'Country',
                    color_continuous_scale = px.colors.sequential.Jet
)

fig.update_layout(title_text = 'Internet Usage Across the World in 2016',
                  width = 1000,
                  height = 1000,
                  geo = dict(projection = {'type': 'hammer'}))

fig.show()

In [13]:
# add animation to our plot
fig = px.choropleth(data_frame = df,
                    locations = 'Code',
                    color = 'Internet_Usage',
                    hover_name = 'Country',
                    animation_frame = 'Year',
                    color_continuous_scale = px.colors.sequential.Jet
)

fig.update_layout(title_text = 'Internet Usage Across the World in 2016',
                  width = 1000,
                  height = 600,
                  geo = dict(projection = {'type': 'hammer'}))

fig.show()

In [14]:
# sort the dataset by Year
df.sort_values(by = ['Year'], inplace = True)

In [15]:
fig = px.choropleth(data_frame = df,
                    locations = 'Code',
                    color = 'Internet_Usage',
                    hover_name = 'Country',
                    animation_frame = 'Year',
                    color_continuous_scale = px.colors.sequential.Jet
)

fig.update_layout(title_text = 'Internet Usage Across the World in 2016',
                  width = 1000,
                  height = 600,
                  geo = dict(projection = {'type': 'hammer'}))

fig.show()

In [16]:
# read country code data
df_code = pd.read_csv('country_codes.tsv', sep = "\t")

In [17]:
# create a us state cholorpleth map
df_state = pd.read_csv('us_state_population.tsv', sep = '\t')

In [18]:
df_state = pd.melt(df_state, 
                   id_vars = ['State', 'Code'],
                   var_name = 'Year',
                   value_name = 'Population'
)

In [19]:
df_state.head()

Unnamed: 0,State,Code,Year,Population
0,Alabama,AL,2010,4785448
1,Alaska,AK,2010,713906
2,Arizona,AZ,2010,6407774
3,Arkansas,AR,2010,2921978
4,California,CA,2010,37320903


In [25]:
# initialize the figure
fig = go.Figure(data=go.Choropleth(
        locations = df_state['Code'],
        z = df_state['Population'].astype(int),
        locationmode = 'USA-states',
        colorscale = "Blues",
        colorbar_title = 'Population'
    )
)
    

In [26]:
# update layout
fig.update_layout(title_text = 'US Population across States',
                  geo_scope = 'usa'
)

fig.show()