# Exercise 2 - Interactive charting with plotly

## 4. Internet map

###  a) Recreate this visualization using plotly, make it as close as possible to the image.

In [2]:
import pandas as pd

df = pd.read_csv("data/landline-internet-subscriptions.csv")

# Drop entries without a country code (ISO 3)
df_clean = df.dropna(subset=['Code']).copy()

# Ensure types
df_clean['Year'] = df_clean['Year'].astype(int)
df_clean['Fixed broadband subscriptions'] = df_clean['Fixed broadband subscriptions'].astype(float)

# Get full list of countries and years
countries = df_clean[['Entity', 'Code']].drop_duplicates()
years = list(range(1998, 2024))  # inclusive

# Create full grid of countries x years
full_index = pd.MultiIndex.from_product(
    [countries['Entity'], years],
    names=['Entity', 'Year']
).to_frame(index=False)

# Merge country codes back into the grid
full_index = full_index.merge(countries, on='Entity', how='left')

# Merge your actual data into the full grid
df_full = full_index.merge(df_clean, on=['Entity', 'Year', 'Code'], how='left')


In [3]:
df_full.head(10)

Unnamed: 0,Entity,Year,Code,Fixed broadband subscriptions
0,Afghanistan,1998,AFG,
1,Afghanistan,1999,AFG,
2,Afghanistan,2000,AFG,
3,Afghanistan,2001,AFG,
4,Afghanistan,2002,AFG,
5,Afghanistan,2003,AFG,
6,Afghanistan,2004,AFG,200.0
7,Afghanistan,2005,AFG,220.0
8,Afghanistan,2006,AFG,500.0
9,Afghanistan,2007,AFG,500.0


In [4]:
import plotly.express as px

fig = px.choropleth(
    df_full,
    locations='Code',
    color='Fixed broadband subscriptions',
    hover_name='Entity',
    animation_frame='Year',
    color_continuous_scale=px.colors.sequential.YlOrRd,
    projection='natural earth',
    range_color=(0, df_full['Fixed broadband subscriptions'].max())
)

fig.update_traces(marker=dict(line=dict(color='gray', width=0.5)))
fig.update_layout(transition={'duration': 300})
fig.show()

# 2nd alternative

In [1]:
import pandas as pd

# Load and clean as before
df = pd.read_csv("data/landline-internet-subscriptions.csv")
df_clean = df.dropna(subset=['Code']).copy()
df_clean['Year'] = df_clean['Year'].astype(int)
df_clean['Fixed broadband subscriptions'] = df_clean['Fixed broadband subscriptions'].astype(float)

# Build full grid
countries = df_clean[['Entity', 'Code']].drop_duplicates()
years = list(range(1998, 2024))

full_index = pd.MultiIndex.from_product(
    [countries['Entity'], years],
    names=['Entity', 'Year']
).to_frame(index=False)

full_index = full_index.merge(countries, on='Entity', how='left')
df_full = full_index.merge(df_clean, on=['Entity', 'Year', 'Code'], how='left')

# Sort so forward-fill works correctly
df_full = df_full.sort_values(['Entity', 'Year'])

# Add forward-filled column and flag
df_full['FF_broadband'] = df_full.groupby('Entity')['Fixed broadband subscriptions'].ffill()
df_full['is_forward_filled'] = df_full['Fixed broadband subscriptions'].isna() & df_full['FF_broadband'].notna()


In [6]:
df_full.head()

# to do:
# - snap to closes data, backwards up to 5 years!
# - fast forward
# change flat to is_filled meaning closest data point
# change the color scheme
# add title and subtitle
# add color legend

Unnamed: 0,Entity,Year,Code,Fixed broadband subscriptions,FF_broadband,is_forward_filled,hover_text
0,Afghanistan,1998,AFG,,,False,Afghanistan<br>Year: 1998<br>Subscriptions: nan
1,Afghanistan,1999,AFG,,,False,Afghanistan<br>Year: 1999<br>Subscriptions: nan
2,Afghanistan,2000,AFG,,,False,Afghanistan<br>Year: 2000<br>Subscriptions: nan
3,Afghanistan,2001,AFG,,,False,Afghanistan<br>Year: 2001<br>Subscriptions: nan
4,Afghanistan,2002,AFG,,,False,Afghanistan<br>Year: 2002<br>Subscriptions: nan


In [4]:
import plotly.express as px

color_dict = {
    "No data": "#cacaca",
    "0 to 300,000": "#fff5eb",
    "300,000 to 1 million": "#fee6ce",
    "1 million to 3 million": "#fdd0a2",
    "3 million to 10 million": "#fdae6b",
    "10 million to 30 million": "#fd8d3c",
    "30 million to 100 million": "#f16913",
    "100 million to 300 million": "#d94801",
    "300 million to 1 billion": "#8c2d04",
}

df_full['hover_text'] = df_full.apply(
    lambda row: f"{row['Entity']}<br>Year: {row['Year']}<br>Subscriptions: {row['FF_broadband']:.1f}" +
                (" (carried forward)" if row['is_forward_filled'] else ""),
    axis=1
)

fig = px.choropleth(
    df_full,
    locations='Code',
    color='Fixed broadband subscriptions',
    hover_name='Entity',
    hover_data={'hover_text': True, 'Code': False, 'FF_broadband': False},
    animation_frame='Year',
    color_continuous_scale=px.colors.sequential.YlOrRd,
    projection='natural earth',
    range_color=(0, df_full['Fixed broadband subscriptions'].max())
)

fig.update_traces(marker=dict(line=dict(color='gray', width=0.5)))
fig.update_layout(transition={'duration': 300})
fig.show()


# 3rd alternative

In [11]:
import pandas as pd
import plotly.express as px

# Load data
df = pd.read_csv("data/landline-internet-subscriptions.csv")

# Drop entries without a country code (ISO 3)
df_clean = df.dropna(subset=['Code']).copy()

# Ensure correct data types
df_clean['Year'] = df_clean['Year'].astype(int)
df_clean['Fixed broadband subscriptions'] = df_clean['Fixed broadband subscriptions'].astype(float)

# Get list of all countries and years
countries = df_clean[['Entity', 'Code']].drop_duplicates()
years = list(range(1998, 2024))

# Create full grid (Entity, Code, Year)
full_grid = pd.MultiIndex.from_product(
    [countries['Entity'], years],
    names=['Entity', 'Year']
).to_frame(index=False)
full_grid = full_grid.merge(countries, on='Entity', how='left')

# Merge data into grid
df_full = full_grid.merge(df_clean, on=['Entity', 'Year', 'Code'], how='left')

# Sort by country and year
df_full = df_full.sort_values(['Code', 'Year'])

# Forward-fill only countries that have at least one non-NaN value
df_full['Fixed broadband subscriptions'] = (
    df_full.groupby('Code')['Fixed broadband subscriptions']
    .transform(lambda x: x.ffill() if x.notna().any() else x)
)

# Color dictionary
color_dict = {
    "No data": "#cacaca",
    "0 to 300,000": "#fff5eb",
    "300,000 to 1 million": "#fee6ce",
    "1 million to 3 million": "#fdd0a2",
    "3 million to 10 million": "#fdae6b",
    "10 million to 30 million": "#fd8d3c",
    "30 million to 100 million": "#f16913",
    "100 million to 300 million": "#d94801",
    "300 million to 1 billion": "#8c2d04",
}

# Categorization function
def categorize_subscriptions(val):
    if pd.isna(val):
        return "No data"
    elif val <= 300_000:
        return "0 to 300,000"
    elif val <= 1_000_000:
        return "300,000 to 1 million"
    elif val <= 3_000_000:
        return "1 million to 3 million"
    elif val <= 10_000_000:
        return "3 million to 10 million"
    elif val <= 30_000_000:
        return "10 million to 30 million"
    elif val <= 100_000_000:
        return "30 million to 100 million"
    elif val <= 300_000_000:
        return "100 million to 300 million"
    else:
        return "300 million to 1 billion"

# Assign categories
df_full['FF_broadband_category'] = df_full['Fixed broadband subscriptions'].apply(categorize_subscriptions)

# Add forward-fill flag with clearer label in hover text
df_full['is_forward_filled'] = df_full.groupby('Code')['Fixed broadband subscriptions'].transform(
    lambda x: x.isna().cumsum().shift(fill_value=0).gt(0) & x.notna()
)

# Simplified hover text logic
df_full['hover_text'] = df_full.apply(
    lambda row: (
        f"{row['Entity']}<br>Year: {row['Year']}<br>Subscriptions: {row['Fixed broadband subscriptions']:.1f} "
        + ("(carried forward)" if row['is_forward_filled'] else "")
    ) if pd.notna(row['Fixed broadband subscriptions']) else f"{row['Entity']}<br>Year: {row['Year']}<br>No data",
    axis=1
)

# Plot
fig = px.choropleth(
    df_full,
    locations='Code',
    color='FF_broadband_category',
    hover_name='Entity',
    hover_data={'hover_text': True, 'Code': False, 'Fixed broadband subscriptions': False},
    animation_frame='Year',
    color_discrete_map=color_dict,
    projection='natural earth'
)

fig.update_traces(marker=dict(line=dict(color='gray', width=0.5)))
fig.update_layout(transition={'duration': 300})
fig.show()
