In [None]:
import requests as rq
import bs4
import pandas as pd
from io import StringIO
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
import numpy as np
import os


url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
headers = {"User-Agent": "Mozilla/5.0"}
page = rq.get(url, headers=headers)

bs4page = bs4.BeautifulSoup(page.text, 'html.parser')
tables = bs4page.find_all('table', {'class': 'wikitable'})

gdp_table = pd.read_html(StringIO(str(tables[0])))[0]

gdp_table.columns = gdp_table.columns.str.strip()

gdp_table = gdp_table[['Country/Territory', 'IMF (2025)[1][6]']]

gdp_table = gdp_table[gdp_table['IMF (2025)[1][6]'].notna()]

gdp_table['IMF (2025)[1][6]'] = (
    gdp_table['IMF (2025)[1][6]']
    .astype(str)
    .str.replace(',', '')
    .str.replace('—', '')
    .replace('', np.nan)
)
gdp_table = gdp_table[gdp_table['IMF (2025)[1][6]'].notna()]
gdp_table['IMF (2025)[1][6]'] = gdp_table['IMF (2025)[1][6]'].astype(float)

gdp_table = gdp_table.sort_values('IMF (2025)[1][6]', ascending=False).head(20)

fig = px.bar(
    gdp_table,
    x='Country/Territory',
    y='IMF (2025)[1][6]',
    title='Top 20 Countries by IMF GDP Forecast (2025)',
    labels={'IMF (2025)[1][6]': 'GDP (million US$)'},
    text='IMF (2025)[1][6]'
)
fig.update_layout(xaxis_tickangle=-45)

fig.show()

pio.write_html(fig, 'stacked_bar.html')

url_lookup = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url_lookup, sep="\t").drop(['Level5'], axis=1)
multilevel_lookup = multilevel_lookup.rename(columns={
    "modify": "roi",
    "modify.1": "level4",
    "modify.2": "level3",
    "modify.3": "level2",
    "modify.4": "level1"
})[['roi','level4','level3','level2','level1']]

id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]

subjectData = pd.merge(subjectData, multilevel_lookup, on='roi')
subjectData['comp'] = subjectData['volume'] / subjectData['volume'].sum()

level1_nodes = subjectData['level1'].unique().tolist()
level2_nodes = subjectData['level2'].unique().tolist()
level3_nodes = subjectData['level3'].unique().tolist()
labels = level1_nodes + level2_nodes + level3_nodes

label_index = {label: i for i, label in enumerate(labels)}

links = []
for _, row in subjectData.iterrows():
    links.append({
        'source': label_index[row['level1']],
        'target': label_index[row['level2']],
        'value': row['comp']
    })

for _, row in subjectData.iterrows():
    links.append({
        'source': label_index[row['level2']],
        'target': label_index[row['level3']],
        'value': row['comp']
    })

fig_sankey = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,
        color="blue"
    ),
    link=dict(
        source=[l['source'] for l in links],
        target=[l['target'] for l in links],
        value=[l['value'] for l in links]
    )
)])

fig_sankey.update_layout(title_text="Subject 127 ICV Hierarchy (3 levels) Sankey Diagram", font_size=10)
fig_sankey.show()

pio.write_html(fig_sankey, "sankey.html")

downloads = os.path.join(os.path.expanduser("~"), "Downloads")

fig.write_html(os.path.join(downloads, "stacked_bar.html"))

fig_sankey.write_html(os.path.join(downloads, "sankey.html"))

print("Files saved in:", downloads)


Files saved in: C:\Users\Juan P. Santos\Downloads


file:///C:/Users/Juan%20P.%20Santos/Downloads/sankey.html