In [1]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# Read / Cleaning the data
education = pd.read_csv("Data_cleaned/education.csv", header=1)
education

education = education.applymap(lambda x: x.strip() if isinstance(x, str) else x)

gdp = pd.read_csv("Data_cleaned/gdp.csv")
gdp['gdp'] = gdp['gdp'].replace('..', np.nan)
gdp['gdp'] = gdp['gdp'].str.replace(',', '')
gdp['gdp'] = gdp['gdp'].astype(float)
gdp[['Country', 'Year']] = gdp['Country_Year'].str.split('_', expand=True)
gdp_2001 = gdp[gdp['Year'] == '2021']

education_gdp = pd.merge(gdp_2001, education, on=['Country'])



education_gdp = education_gdp[['Country', 'Bachelor’s or equivalent level', 'Master’s or equivalent level', 'gdp']]
education_gdp


  education = education.applymap(lambda x: x.strip() if isinstance(x, str) else x)


Unnamed: 0,Country,Bachelor’s or equivalent level,Master’s or equivalent level,gdp
0,Australia,252506,122642,1646.96
1,Austria,33770,27988,567.99
2,Belgium,69889,44605,719.59
3,Canada,219192,71091,2133.08
4,Chile,131092,52980,568.19
5,Colombia,260366,112291,906.03
6,Costa Rica,41199,4837,122.15
7,Czechia,36622,28201,508.67
8,Denmark,46184,25480,409.26
9,Estonia,5520,3873,59.43


In [2]:
unique_countries = education_gdp['Country'].unique()
unique_countries

array(['Australia', 'Austria', 'Belgium', 'Canada', 'Chile', 'Colombia',
       'Costa Rica', 'Czechia', 'Denmark', 'Estonia', 'Finland', 'France',
       'Germany', 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Israel',
       'Italy', 'Japan', 'Korea', 'Latvia', 'Lithuania', 'Luxembourg',
       'Mexico', 'Netherlands', 'New Zealand', 'Norway', 'Poland',
       'Portugal', 'Slovak Republic', 'Slovenia', 'Spain', 'Sweden',
       'Switzerland', 'Türkiye', 'United Kingdom', 'United States'],
      dtype=object)

In [3]:
country_to_region = {
    'USA': 'North America',
    'Canada': 'North America',
    'Germany': 'Europe',
    'France': 'Europe',
    'Australia': 'Oceania',
    'Austria': 'Europe',
    'Belgium': 'Europe',
    'Chile': 'South America',
    'Colombia': 'South America',
    'Costa Rica': 'North America',
    'Czechia': 'Europe',
    'Denmark': 'Europe',
    'Estonia': 'Europe',
    'Finland': 'Europe',
    'Greece': 'Europe',
    'Hungary': 'Europe',
    'Iceland': 'Europe',
    'Ireland': 'Europe',
    'Israel': 'Middle East',
    'Italy': 'Europe',
    'Japan': 'Asia',
    'Korea': 'Asia',
    'Latvia': 'Europe',
    'Lithuania': 'Europe',
    'Luxembourg': 'Europe',
    'Mexico': 'North America',
    'Netherlands': 'Europe',
    'New Zealand': 'Oceania',
    'Norway': 'Europe',
    'Poland': 'Europe',
    'Portugal': 'Europe',
    'Slovak Republic': 'Europe',
    'Slovenia': 'Europe',
    'Spain': 'Europe',
    'Sweden': 'Europe',
    'Switzerland': 'Europe',
    'Turkey': 'Europe',
    'United Kingdom': 'Europe',
    'United States': 'North America',
    'Türkiye': 'Middle East'
    
}


education_gdp['Region'] = education_gdp['Country'].map(country_to_region)
unique_regions = education_gdp['Region'].unique()
print(unique_regions)


['Oceania' 'Europe' 'North America' 'South America' 'Middle East' 'Asia']


In [4]:
#education_gdp['Region'] = education_gdp['Country'].map(country_to_region)
#education_gdp.drop(columns=['Country'], inplace=True)
education_gdp


Unnamed: 0,Country,Bachelor’s or equivalent level,Master’s or equivalent level,gdp,Region
0,Australia,252506,122642,1646.96,Oceania
1,Austria,33770,27988,567.99,Europe
2,Belgium,69889,44605,719.59,Europe
3,Canada,219192,71091,2133.08,North America
4,Chile,131092,52980,568.19,South America
5,Colombia,260366,112291,906.03,South America
6,Costa Rica,41199,4837,122.15,North America
7,Czechia,36622,28201,508.67,Europe
8,Denmark,46184,25480,409.26,Europe
9,Estonia,5520,3873,59.43,Europe


In [5]:
# plot a nested pie chart using plotly
import plotly.express as px
fig = px.sunburst(education_gdp, path=['Region','Country'], values='gdp', names = 'Country', title='GDP',color_discrete_sequence=px.colors.qualitative.Set3, hover_data=['Master’s or equivalent level', 'Bachelor’s or equivalent level'])
# change the plot size
fig.update_layout(width=800, height=800)
fig.update_layout(plot_bgcolor='#f7ebdb', font_color='black', 
                  font=dict(family="Montserrat, sans-serif"))
fig.show()

In [6]:
import pandas as pd
import plotly.express as px
average_gdp_by_region = education_gdp.groupby('Region')['gdp'].mean().reset_index()
sum_by_region = education_gdp.groupby('Region')[['Bachelor’s or equivalent level', 'Master’s or equivalent level']].sum()
sum_by_region
test  = pd.merge(average_gdp_by_region,sum_by_region, on='Region' )


In [25]:
import pandas as pd
import plotly.express as px

melted_test = test.melt(id_vars=['Region', 'gdp'], value_vars=['Bachelor’s or equivalent level', 'Master’s or equivalent level'],
                        var_name='Education Level', value_name='Total Count')

fig = px.bar(melted_test, x='Region', y='Total Count', color='Education Level',
             title='OECD Total Bachelor\'s and Master\'s Degrees by Region (2021)',
             hover_data={'Region': True, 'Total Count': True, 'gdp': True},
             labels={'Total Count': 'Total Count (Millions)', 'Education Level': 'Education Level'},
             barmode='group',
             base='gdp',
             color_discrete_map={'Bachelor’s or equivalent level': '#84cf63', 
                                 'Master’s or equivalent level': '#3d682b'}
                                 )

fig.update_traces(customdata=melted_test[['Education Level', 'gdp']], 
                  width=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                  hovertemplate='<br>'.join([
                      'Region: %{x}',
                      'Education Level: %{customdata[0]}',
                      'Total Count: %{y:,.0f}',
                      'Average GDP: $%{customdata[1]:,.2f} (Billions)'
                  ]))

fig.update_layout(plot_bgcolor='#faf0e6', font_color='black')

fig.add_annotation(
    x="North America",
    y=3023000,
    text="Highest Average GDP",
    xanchor="center",
    arrowhead=2
)


fig.add_annotation(
    x="South America",
    y=392195,
    text="Lowest Average GDP",
    xanchor="center",
    arrowhead=2
)

fig.update_layout(title_font_family='PT Sans Narrow')

fig.update_layout(
    xaxis=dict(
        title_font=dict(family='PT Sans Narrow'),
        tickfont=dict(family='PT Sans Narrow')
    ),
    yaxis=dict(
        title_font=dict(family='PT Sans Narrow'),
        tickfont=dict(family='PT Sans Narrow')
    )
)

fig.update_layout(
    legend=dict(
        font=dict(family='PT Sans Narrow')
    )
)


fig.show()

In [26]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# Read / Cleaning the data
education = pd.read_csv("Data_cleaned/education.csv", header=1)
education

education = education.applymap(lambda x: x.strip() if isinstance(x, str) else x)

gdp = pd.read_csv("Data_cleaned/gdp.csv")
gdp['gdp'] = gdp['gdp'].replace('..', np.nan)
gdp['gdp'] = gdp['gdp'].str.replace(',', '')
gdp['gdp'] = gdp['gdp'].astype(float)
gdp[['Country', 'Year']] = gdp['Country_Year'].str.split('_', expand=True)
gdp_2001 = gdp[gdp['Year'] == '2021']

education_gdp = pd.merge(gdp_2001, education, on=['Country'])



education_gdp = education_gdp[['Country', 'Bachelor’s or equivalent level', 'Master’s or equivalent level', 'gdp']]
education_gdp

country_to_region = {
    'USA': 'North America',
    'Canada': 'North America',
    'Germany': 'Europe',
    'France': 'Europe',
    'Australia': 'Oceania',
    'Austria': 'Europe',
    'Belgium': 'Europe',
    'Chile': 'South America',
    'Colombia': 'South America',
    'Costa Rica': 'North America',
    'Czechia': 'Europe',
    'Denmark': 'Europe',
    'Estonia': 'Europe',
    'Finland': 'Europe',
    'Greece': 'Europe',
    'Hungary': 'Europe',
    'Iceland': 'Europe',
    'Ireland': 'Europe',
    'Israel': 'Middle East',
    'Italy': 'Europe',
    'Japan': 'Asia',
    'Korea': 'Asia',
    'Latvia': 'Europe',
    'Lithuania': 'Europe',
    'Luxembourg': 'Europe',
    'Mexico': 'North America',
    'Netherlands': 'Europe',
    'New Zealand': 'Oceania',
    'Norway': 'Europe',
    'Poland': 'Europe',
    'Portugal': 'Europe',
    'Slovak Republic': 'Europe',
    'Slovenia': 'Europe',
    'Spain': 'Europe',
    'Sweden': 'Europe',
    'Switzerland': 'Europe',
    'Turkey': 'Europe',
    'United Kingdom': 'Europe',
    'United States': 'North America',
    'Türkiye': 'Middle East'
    
}


education_gdp['Region'] = education_gdp['Country'].map(country_to_region)
unique_regions = education_gdp['Region'].unique()

import pandas as pd
import plotly.express as px
average_gdp_by_region = education_gdp.groupby('Region')['gdp'].mean().reset_index()
sum_by_region = education_gdp.groupby('Region')[['Bachelor’s or equivalent level', 'Master’s or equivalent level']].sum()
sum_by_region
test  = pd.merge(average_gdp_by_region,sum_by_region, on='Region' )

import pandas as pd
import plotly.express as px

melted_test = test.melt(id_vars=['Region', 'gdp'], value_vars=['Bachelor’s or equivalent level', 'Master’s or equivalent level'],
                        var_name='Education Level', value_name='Total Count')

fig = px.bar(melted_test, x='Region', y='Total Count', color='Education Level',
             title='OECD Total Bachelor\'s and Master\'s Degrees by Region (2021)',
             hover_data={'Region': True, 'Total Count': True, 'gdp': True},
             labels={'Total Count': 'Total Count (Millions)', 'Education Level': 'Education Level'},
             barmode='group',
             base='gdp',
             color_discrete_map={'Bachelor’s or equivalent level': '#84cf63', 
                                 'Master’s or equivalent level': '#3d682b'}
                                 )

fig.update_traces(customdata=melted_test[['Education Level', 'gdp']], 
                  width=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                  hovertemplate='<br>'.join([
                      'Region: %{x}',
                      'Education Level: %{customdata[0]}',
                      'Total Count: %{y:,.0f}',
                      'Average GDP: $%{customdata[1]:,.2f} (Billions)'
                  ]))

fig.update_layout(plot_bgcolor='#faf0e6', font_color='black')

fig.add_annotation(
    x="North America",
    y=3023000,
    text="Highest Average GDP",
    xanchor="center",
    arrowhead=2
)


fig.add_annotation(
    x="South America",
    y=392195,
    text="Lowest Average GDP",
    xanchor="center",
    arrowhead=2
)

fig.update_layout(title_font_family='PT Sans Narrow')

fig.update_layout(
    xaxis=dict(
        title_font=dict(family='PT Sans Narrow'),
        tickfont=dict(family='PT Sans Narrow')
    ),
    yaxis=dict(
        title_font=dict(family='PT Sans Narrow'),
        tickfont=dict(family='PT Sans Narrow')
    )
)

fig.update_layout(
    legend=dict(
        font=dict(family='PT Sans Narrow')
    )
)


fig.show()


DataFrame.applymap has been deprecated. Use DataFrame.map instead.

