# Data Cleanup - Broadband v Mobile
---

### Notes:

 
 

### Dataframe Setup

In [89]:
# Dependencies and Setup
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px

# Study data files
internet_data_csv = "../Resources/Internet_Inclusivity_Index_ Data_table.csv"

# Read the mouse data and the study results
internet_data = pd.read_csv(internet_data_csv)

# Display the data table for preview
internet_data.head()

Unnamed: 0,Country,Edition,1.1.1) Internet users,1.1.2) Fixed-line broadband subscribers,1.1.3) Mobile subscribers,1.1.4) Gender gap in internet access,1.1.5) Gender gap in mobile phone access,1.2.1) Average fixed broadband upload speed,1.2.2) Average fixed broadband download speed,1.2.3) Average fixed broadband latency,...,BG15) Internet users (population),BG16) Offline population,BG17) Internet access gender gap,BG18) Mobile phone access gender gap,BG19) Internet users (percent of population),BG20) Male internet users,BG21) Female internet users,BG22) Male mobile phone subscribers,BG23) Female mobile phone subscribers,BG24) Total fixed line broadband subscribers
0,Algeria,E1,31.9,5.71,108.8,,,1.69,3.39,74.0,...,15.23,24.64,,,38.2,,,95.5,89.9,2269348
1,Algeria,E2,47.3,7.05,116.0,22.0,7.8,6.12,7.85,76.92,...,17.44,23.18,13.0,7.0,42.9,59.0,46.0,90.0,83.0,2859567
2,Algeria,E3,54.2,7.76,110.8,11.7,8.9,2.59,3.77,64.71,...,19.71,21.62,7.0,8.0,47.7,60.0,53.0,90.0,82.0,3210267
3,Algeria,E4,74.4,7.26,111.7,21.7,7.3,1.97,3.87,64.99,...,20.64,21.45,13.0,6.0,49.0,60.0,47.0,82.0,76.0,3067022
4,Algeria,E5,74.4,8.32,109.4,10.9,8.0,1.41,3.84,54.09,...,20.64,21.45,7.0,7.0,49.0,64.0,57.0,87.0,80.0,3582739


### Clean Initial Data

In [90]:
# Update Edition to Year
edition_mapping = {'E1': 2017, 'E2': 2018, 'E3': 2019, 'E4': 2020, 'E5': 2021}

# Use the map function to replace values in the 'Edition' column
internet_data['Edition'] = internet_data['Edition'].map(edition_mapping)

# Rename the 'Edition' column to 'Year'
internet_data = internet_data.rename(columns={'Edition': 'Year'})

# We only want the last 3 years of data
# internet_data = internet_data[internet_data['Year'].isin([2019, 2020, 2021])]

# Drop columns we aren't looking to use for this analysis
# List columns to keep
columns_to_keep = ['Country', 'Year', 'BG2) Population', '1.1.1) Internet users', '1.1.2) Fixed-line broadband subscribers', '1.1.3) Mobile subscribers', '1.4.2) Rural electricity access']

# Update the dataframe to just use the columns to keep
internet_data = internet_data[columns_to_keep]

# Rename the columns to drop the numbers from the beginning
new_column_names = ['Country', 'Year', 'Population (millions)', 'Internet users (% of households)', 'Fixed-line broadband subscribers (per 100 inhabitants)', 'Mobile subscribers (per 100 inhabitants)', 'Rural electricity access (%)']
internet_data.columns = new_column_names

# Test Display
internet_data.head()

Unnamed: 0,Country,Year,Population (millions),Internet users (% of households),Fixed-line broadband subscribers (per 100 inhabitants),Mobile subscribers (per 100 inhabitants),Rural electricity access (%)
0,Algeria,2017,40.6,31.9,5.71,108.8,96.7
1,Algeria,2018,41.3,47.3,7.05,116.0,96.87
2,Algeria,2019,42.1,54.2,7.76,110.8,96.87
3,Algeria,2020,42.9,74.4,7.26,111.7,100.0
4,Algeria,2021,43.7,74.4,8.32,109.4,96.7


### Calculate Rates of Change 

In [91]:
# Rate of change(%) = (new value - old value)/(old value) * 100

# Set up new dataframe
internet_adoption_rate = internet_data

# Add column for percentage change by country - broadband
internet_adoption_rate["Rate of Change (%) - Broadband"] = internet_data.groupby("Country")["Fixed-line broadband subscribers (per 100 inhabitants)"].pct_change().round(4) * 100

# Add column for percentage change by country - mobile
internet_adoption_rate["Rate of Change (%) - Mobile"] = internet_data.groupby("Country")["Mobile subscribers (per 100 inhabitants)"].pct_change().round(4) * 100

# Drop unnecessary rows and columns
internet_adoption_rate = internet_adoption_rate.loc[:, ['Country', 'Year', 'Population (millions)', 'Rate of Change (%) - Broadband', 'Rate of Change (%) - Mobile', 'Rural electricity access (%)']].dropna().reset_index(drop=True)

# Export CSV of cleaned data with new columns for team analysis
csv_file_path = "../Resources/internet_adoption_rates_of_change.csv"
internet_adoption_rate.to_csv(csv_file_path, index=False)
print(f'DataFrame exported to {csv_file_path}')

internet_adoption_rate

DataFrame exported to ../Resources/internet_adoption_rates_of_change.csv


Unnamed: 0,Country,Year,Population (millions),Rate of Change (%) - Broadband,Rate of Change (%) - Mobile,Rural electricity access (%)
0,Algeria,2018,41.3,23.47,6.62,96.87
1,Algeria,2019,42.1,10.07,-4.48,96.87
2,Algeria,2020,42.9,-6.44,0.81,100.00
3,Algeria,2021,43.7,14.60,-2.06,96.70
4,Angola,2018,29.8,-47.27,-9.44,5.55
...,...,...,...,...,...,...
473,Zambia,2021,18.4,19.05,8.07,6.30
474,Zimbabwe,2018,14.2,2.52,-0.54,11.00
475,Zimbabwe,2019,14.4,8.20,7.84,11.00
476,Zimbabwe,2020,14.6,6.82,-9.70,11.50


### Chart Setup

In [80]:
# Set up data (renaming to shorten code - this is a predefined dataframe)
df = internet_adoption_rate

# Create Figure
fig = go.Figure()

choropleth_trace = go.Choropleth(
    locations=df['Country'],
    locationmode='country names',
    z=df['Rate of Change - Broadband'],
    text=df['Country'],
    colorscale='RdYlGn',
    colorbar=dict(title='Rate of Change - Broadband (%)'),
    zmin=-10,
    zmax=10
)

fig.add_trace(choropleth_trace)

# Set layout properties
fig.update_layout(
    geo=dict(
        projection_type='natural earth',
        showocean=True,
        oceancolor='rgba(0,255,255,0.7)',
        showland=True,
        landcolor='rgba(212,212,212,0.7)',
        showcountries=True,
        showframe=False
    ),
    updatemenus=[{
        'type': 'buttons',
        'showactive': False,
        'buttons': [{
            'label': 'Play',
            'method': 'animate',
            'args': [None, {
                'frame': {'duration': 500, 'redraw': True},
                'fromcurrent': True,
            }],
        }, {
            'label': 'Pause',
            'method': 'animate',
            'args': [[None], {
                'frame': {'duration': 0, 'redraw': True},
                'mode': 'immediate',
                'transition': {'duration': 0},
            }],
        }],
    }],
    sliders=[{
        'active': 0,
        'yanchor': 'top',
        'xanchor': 'left',
        'currentvalue': {
            'font': {'size': 20},
            'prefix': 'Year:',
            'visible': True,
            'xanchor': 'right',
        },
        'transition': {'duration': 300, 'easing': 'cubic-in-out'},
        'steps': [{
            'args': [
                [f'{year}'],
                {
                    'frame': {'duration': 300, 'redraw': True},
                    'mode': 'immediate',
                    'transition': {'duration': 300},
                },
            ],
            'label': f'{year}',
        } for year in df['Year'].unique()],
    }],
)

# Define frames for animation
frames = [go.Frame(data=[go.Choropleth(
    locations=df[df['Year'] == year]['Country'],
    locationmode='country names',
    z=df[df['Year'] == year]['Rate of Change - Broadband'],
    text=df[df['Year'] == year]['Country'],
    colorscale='RdYlGn',
    colorbar=dict(title='Rate of Change - Broadband (%)'),
    zmin=-10,
    zmax=10
)], name=f'{year}') for year in df['Year'].unique()]

fig.frames = frames

# Show the figure
fig.show()

