In [1]:
import numpy as np
import wbdata
import pandas as pd
import datetime

indicators = {
    "NY.GDP.PCAP.CD": "GDP per Capita (current US$)",
    "SP.DYN.IMRT.IN": "Infant Mortality Rate (per 1,000 live births)",
    "SE.ADT.LITR.FE.ZS": "Literacy rate, adult female (% of females ages 15 and above)"
}

data = {}
for indicator in indicators.keys():
    data[indicator] = wbdata.get_dataframe({indicator: "value"}, date="2021")

df = pd.concat(data, axis=1)
df.reset_index(inplace=True)
df.columns = [
'Country', 
'GDP per Capita (current US$)', 
'Infant Mortality Rate (per 1,000 live births)', 
'Literacy rate, adult female (% of females ages 15 and above)'
]

df_cleaned = df.dropna()

In [2]:
from scipy.stats import pearsonr

# GDP per Capita and Literacy Rate
corr_gdp_lit, p_value_gdp_lit = pearsonr(df_cleaned['GDP per Capita (current US$)'], df_cleaned['Literacy rate, adult female (% of females ages 15 and above)'])

# GDP per Capita and Infant Mortality Rate
corr_gdp_infant, p_value_gdp_infant = pearsonr(df_cleaned['GDP per Capita (current US$)'], df_cleaned['Infant Mortality Rate (per 1,000 live births)'])

# Infant Mortality Rate and Literacy Rate
corr_infant_lit, p_value_infant_lit = pearsonr(df_cleaned['Infant Mortality Rate (per 1,000 live births)'], df_cleaned['Literacy rate, adult female (% of females ages 15 and above)'])

print(f"GDP & Literacy Rate: Correlation={corr_gdp_lit}, P-value={p_value_gdp_lit}")
print(f"GDP & Infant Mortality Rate: Correlation={corr_gdp_infant}, P-value={p_value_gdp_infant}")
print(f"Infant Mortality Rate & Literacy Rate: Correlation={corr_infant_lit}, P-value={p_value_infant_lit}")

GDP & Literacy Rate: Correlation=0.523531861152508, P-value=7.39436715328683e-07
GDP & Infant Mortality Rate: Correlation=-0.5673973881872082, P-value=4.9695365349187675e-08
Infant Mortality Rate & Literacy Rate: Correlation=-0.9015280073835649, P-value=9.750855190174557e-30


In [6]:
import plotly.express as px
import plotly.graph_objects as go

fig = px.scatter(df_cleaned,
                 x='GDP per Capita (current US$)',
                 y='Literacy rate, adult female (% of females ages 15 and above)',
                 size='Infant Mortality Rate (per 1,000 live births)',
                 hover_name='Country',
                 title='Female Literacy vs. GDP Per Capita (2021)',
                 labels={
                     'GDP per Capita (current US$)': 'GDP Per Capita',
                     'Literacy rate, adult female (% of females ages 15 and above)': 'Female Literacy Rate (%)',
                     'Infant Mortality Rate (per 1,000 live births)': 'Infant Mortality Rate'
                 },
                 size_max=60)

fig.update_layout(
    xaxis = dict(
    showline = True,
    linecolor = 'black',
    showgrid = False),
    yaxis = dict(
        showline = True,
        showgrid = False,
        linecolor = 'black'
    ),
    plot_bgcolor = 'white',
    showlegend = False
)

fig.show()