In [56]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

Load the Data

In [58]:
edu_data = pd.read_csv(r"U:/dataset/Global_Education.csv", encoding="latin1")

edu_data

Unnamed: 0,Countries and areas,Latitude,Longitude,OOSR_Pre0Primary_Age_Male,OOSR_Pre0Primary_Age_Female,OOSR_Primary_Age_Male,OOSR_Primary_Age_Female,OOSR_Lower_Secondary_Age_Male,OOSR_Lower_Secondary_Age_Female,OOSR_Upper_Secondary_Age_Male,...,Primary_End_Proficiency_Reading,Primary_End_Proficiency_Math,Lower_Secondary_End_Proficiency_Reading,Lower_Secondary_End_Proficiency_Math,Youth_15_24_Literacy_Rate_Male,Youth_15_24_Literacy_Rate_Female,Birth_Rate,Gross_Primary_Education_Enrollment,Gross_Tertiary_Education_Enrollment,Unemployment_Rate
0,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,44,...,13,11,0,0,74,56,32.49,104.0,9.7,11.12
1,Albania,41.153332,20.168331,4,2,6,3,6,1,21,...,0,0,48,58,99,100,11.78,107.0,55.0,12.33
2,Algeria,28.033886,1.659626,0,0,0,0,0,0,0,...,0,0,21,19,98,97,24.28,109.9,51.4,11.70
3,Andorra,42.506285,1.521801,0,0,0,0,0,0,0,...,0,0,0,0,0,0,7.20,106.4,0.0,0.00
4,Angola,11.202692,17.873887,31,39,0,0,0,0,0,...,0,0,0,0,0,0,40.73,113.5,9.3,6.89
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,Venezuela,6.423750,66.589730,14,14,10,10,15,13,28,...,0,0,0,0,0,0,17.88,97.2,79.3,8.80
198,Vietnam,14.058324,108.277199,0,0,0,0,0,0,0,...,55,51,86,81,98,98,16.75,110.6,28.5,2.01
199,Yemen,15.552727,48.516388,96,96,10,21,23,34,46,...,0,0,0,0,0,0,30.45,93.6,10.2,12.91
200,Zambia,13.133897,27.849332,0,0,17,13,0,0,0,...,0,0,5,2,93,92,36.19,98.7,4.1,11.43


Filter Data for Selected Countries

In [None]:
selected_countries = ["Afghanistan", "Albania", "Algeria", "Brazil", "Col"]
edu_data_filtered = edu_data[edu_data["Countries and areas"].isin(selected_countries)].copy()

edu_data_filtered.head()

Calculate Summary Statistics

In [None]:
def calculate_stats(data, column):
    return {
        "skewness": data[column].skew(),
        "kurtosis": data[column].kurt()
    }

stats_male = calculate_stats(edu_data, "Youth_15_24_Literacy_Rate_Male")
print(stats_male)

Geospatial Analysis

In [None]:
fig = px.scatter_geo(edu_data, 
                     lat="Latitude ",  # Note the space after Latitude
                     lon="Longitude", 
                     color="Youth_15_24_Literacy_Rate_Male", 
                     hover_name="Countries and areas",
                     title="Global Male Literacy Rates by Country")

fig.show()

Correlation Analysis

In [None]:
cor_columns = [
    "Youth_15_24_Literacy_Rate_Male",
    "Youth_15_24_Literacy_Rate_Female",
    "Gross_Primary_Education_Enrollment",
    "Birth_Rate",
    "Unemployment_Rate"
]
cor_matrix = edu_data[cor_columns].corr()

fig_corr = px.imshow(cor_matrix, 
                     text_auto=True, 
                     title="Correlation Matrix", 
                     color_continuous_scale="Viridis")  # Use a valid colorscale

fig_corr.show()

 Gender Gap Analysis

In [None]:
edu_data_filtered_melted = edu_data_filtered.melt(
    id_vars=["Countries and areas"],
    value_vars=["Youth_15_24_Literacy_Rate_Male", "Youth_15_24_Literacy_Rate_Female"],
    var_name="Gender",
    value_name="Literacy Rate"
)

fig_gender = px.bar(
    edu_data_filtered_melted,
    x="Countries and areas",
    y="Literacy Rate",
    color="Gender",
    barmode="group",
    title="Gender Gap in Literacy Rate by Selected Countries",
    color_discrete_map={"Youth_15_24_Literacy_Rate_Male": "blue", "Youth_15_24_Literacy_Rate_Female": "red"}
)
fig_gender.show()

Proficiency Scores Comparison

In [None]:
proficiency_columns = [col for col in edu_data.columns if "Grade" in col]
edu_data_long = edu_data.melt(
    id_vars=["Countries and areas"],
    value_vars=proficiency_columns,
    var_name="Education_Level",
    value_name="Proficiency_Reading"
)

fig_box = px.box(
    edu_data_long,
    x="Education_Level",
    y="Proficiency_Reading",
    title="Reading Proficiency by Education Level",
    color_discrete_sequence=["lightblue"]
)
fig_box.show()

Unemployment vs Literacy Relationship

In [None]:
fig_scatter = px.scatter(
    edu_data,
    x="Unemployment_Rate",
    y="Youth_15_24_Literacy_Rate_Male",
    trendline="ols",
    title="Unemployment vs Male Literacy Rate",
    labels={"Unemployment_Rate": "Unemployment Rate", "Youth_15_24_Literacy_Rate_Male": "Male Literacy Rate"},
    color_discrete_sequence=["red"]
)
fig_scatter.show()

Animated Visualization

In [None]:
fig_anim = px.bar(
    edu_data_filtered,
    x="Countries and areas",
    y="Youth_15_24_Literacy_Rate_Male",
    animation_frame="Countries and areas",
    title="Male Literacy Rate by Selected Countries (Animated)",
    labels={"x": "Countries", "y": "Literacy Rate"},
    color="Countries and areas"
)
fig_anim.show()

Interactive Visualization

In [None]:
regional_summary_filtered = edu_data_filtered.groupby("Countries and areas").agg(
    Avg_Literacy_Male=("Youth_15_24_Literacy_Rate_Male", "mean"),
    Avg_Literacy_Female=("Youth_15_24_Literacy_Rate_Female", "mean")
).reset_index()


In [None]:
fig_bar = go.Figure()
fig_bar.add_trace(go.Bar(
    x=regional_summary_filtered["Countries and areas"],
    y=regional_summary_filtered["Avg_Literacy_Male"],
    name="Male",
    marker_color='blue'
))

In [None]:
fig_bar.update_layout(
    title="Literacy Rates in Selected Countries",
    xaxis_title="Country",
    yaxis_title="Average Literacy Rate (%)",
    barmode="group"
)
fig_bar.show()