<a href="https://colab.research.google.com/github/nemuulen/INFOSCI301_Final_Project/blob/main/intl_students_migration_vis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from pathlib import Path
import pandas as pd
import plotly.express as px

DATA_DIR = Path("/content")

# --- 1) Generic World Bank reader ---
def read_wb(path: Path, var_name: str) -> pd.DataFrame:
    xls = pd.ExcelFile(path)
    df = pd.read_excel(xls, sheet_name=xls.sheet_names[0], header=None)
    df.columns = df.iloc[4]           # row 5 has the real headers
    df = df.iloc[5:].rename(columns={
        df.columns[0]: "Country",
        df.columns[1]: "Country Code"
    })
    df = df.melt(
        id_vars=["Country","Country Code"],
        var_name="Year",
        value_name=var_name
    )
    df["Year"] = pd.to_numeric(df["Year"], errors="coerce").astype("Int64")
    df[var_name] = pd.to_numeric(df[var_name], errors="coerce")
    return df.dropna(subset=["Year"])

# load your three macro series
gdp   = read_wb(DATA_DIR/"GDP.xlsx",       "GDP_USD")
edu   = read_wb(DATA_DIR/"Government expenditure on education as % of GDP (%).xlsx",
                "Edu_pct_GDP")
urban = read_wb(DATA_DIR/"Urban population (% of total population).xlsx",
                "Urban_pct")

# --- 2) Country lookup for migrations file ---
country_map = (
    pd.read_excel(DATA_DIR/"OPRI_COUNTRY.xlsx", sheet_name=0)
      .rename(columns={"COUNTRY_ID":"Country Code",
                       "COUNTRY_NAME_EN":"Country"})
)

# --- 3) Unified inbound/outbound file ---
raw = pd.read_excel(
    DATA_DIR/"inbound and outbound of international students.xlsx",
    sheet_name="data"
)

flows = {26637: "Inbound", 26519: "Outbound"}
mig = (
    raw[raw["indicatorId"].isin(flows)]
       .rename(columns={"geoUnit":"Country Code",
                        "year":"Year",
                        "value":"Students"})
       .assign(
           Type=lambda df: df["indicatorId"].map(flows),
           Year=lambda df: pd.to_numeric(df["Year"], errors="coerce").astype("Int64")
       )
       .dropna(subset=["Year","Students"])
       .merge(country_map, on="Country Code", how="left")
       .dropna(subset=["Country"])
)

# pivot so each Country+Year has both Inbound & Outbound
mig_wide = (
    mig.pivot_table(
        index=["Country","Year"],
        columns="Type",
        values="Students",
        aggfunc="first"
    )
    .reset_index()
)

# --- 4) Merge in GDP, Edu, Urban ---
df = (
    mig_wide
      .merge(gdp,   on=["Country","Year"], how="left")
      .merge(edu,   on=["Country","Year"], how="left")
      .merge(urban, on=["Country","Year"], how="left")
)

# keep only 2000–2022
df = df[df["Year"].between(2000,2022)]

# unpivot for Plotly
long = (
    df.melt(
        id_vars=["Country","Year","GDP_USD","Edu_pct_GDP","Urban_pct"],
        value_vars=["Inbound","Outbound"],
        var_name="Type",
        value_name="Students"
    )
    .dropna(subset=["Students"])
)
long["Year"] = long["Year"].astype(str)

# --- 5) Draw animated map with country names ---
color_map = {"Inbound":"blue","Outbound":"red"}
years = sorted(long["Year"].unique())

fig = px.scatter_geo(
    long,
    locations="Country",
    locationmode="country names",
    size="Students",
    color="Type",
    color_discrete_map=color_map,
    hover_name="Country",
    hover_data={
        "Students":":,",
        "GDP_USD":":,.0f",
        "Edu_pct_GDP":":.1f",
        "Urban_pct":":.1f",
        "Type":False,
        "Year":False
    },
    animation_frame="Year",
    projection="natural earth",
    size_max=40,
    template="plotly_white",
    category_orders={"Year": years},
    title=(
      "🌐 International Student Migration (2000–2022)<br>"
      "<sub>Blue = Inbound | Red = Outbound; bubble size ∝ count</sub>"
    )
)

fig.update_traces(
    marker=dict(opacity=0.6, line_width=0.5, line_color="darkgrey")
)

fig.update_geos(
    showcountries=True, countrycolor="lightgray",
    showland=True, landcolor="whitesmoke",
    showocean=True, oceancolor="lightblue"
)

fig.update_layout(
    margin=dict(l=0, r=0, t=70, b=0),
    legend_title_text="Flow Type",
    updatemenus=[{
        "type":"buttons","direction":"left","showactive":True,
        "x":0.1,"xanchor":"right","y":0,"yanchor":"top",
        "pad":{"r":10,"t":70},
        "buttons":[
          {"method":"animate","label":"Play",
           "args":[None,{"frame":{"duration":1500,"redraw":True},"fromcurrent":True}]},
          {"method":"animate","label":"Pause",
           "args":[[None],{"frame":{"duration":0,"redraw":False},"mode":"immediate",
                          "transition":{"duration":0}}]}
        ]
    }],
    sliders=[{
        "pad": {"b":10, "t":50},
        "currentvalue": {"prefix": "Year: "}
    }]
)

fig.show()