# JBI100 Visualization 
### Academic year 2025-2026

## Health and Global Statistics
Data sources:

- Hospital Beds Management (https://www.kaggle.com/datasets/jaderz/hospital-beds-management/data)
- CIA Global Statistical Database (https://www.kaggle.com/datasets/kushagraarya10/cia-global-statistical-database)

Data dictionaries and additional info can be found in the respective data folders.
Note: you only need to select one dataset for your project; the dataset that you choose consists of multiple CSV files.

In [1]:
# Import libraries
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os
import plotly.io as pio

# Render plotly in IFrame
pio.renderers.default = 'iframe'

# Do not truncate tables
pd.set_option('display.max_columns', None)

# If you receive a 'ModuleNotFoundError' please install the according library. 
# This can be done from within the Jupyter environment with the command 
#'!python -m pip install lib' where lib is the according library name.

In [2]:
# Load the data

# Hospital Beds Management
df_HBM_patients        = pd.read_csv('../Hospital Beds Management/patients.csv', delimiter=',', low_memory=False)
df_HBM_staff           = pd.read_csv('../Hospital Beds Management/staff.csv', delimiter=',', low_memory=False)
df_HBM_staff_schedule  = pd.read_csv('../Hospital Beds Management/staff_schedule.csv', delimiter=',', low_memory=False)
df_HBM_services_weekly = pd.read_csv('../Hospital Beds Management/services_weekly.csv', delimiter=',', low_memory=False)

# CIA Global Statistical Database
df_CIA_communications        = pd.read_csv('../CIA Global Statistical Database/communications_data.csv', delimiter=',', low_memory=False)
df_CIA_demographics          = pd.read_csv('../CIA Global Statistical Database/demographics_data.csv', delimiter=',', low_memory=False)
df_CIA_economy               = pd.read_csv('../CIA Global Statistical Database/economy_data.csv', delimiter=',', low_memory=False)
df_CIA_energy                = pd.read_csv('../CIA Global Statistical Database/energy_data.csv', delimiter=',', low_memory=False)
df_CIA_geography             = pd.read_csv('../CIA Global Statistical Database/geography_data.csv', delimiter=',', low_memory=False)
df_CIA_government_and_civics = pd.read_csv('../CIA Global Statistical Database/government_and_civics_data.csv', delimiter=',', low_memory=False)
df_CIA_transportation        = pd.read_csv('../CIA Global Statistical Database/transportation_data.csv', delimiter=',', low_memory=False)

## Explore Hospital Beds Management

In [3]:
df_HBM_patients.sample(5)

Unnamed: 0,patient_id,name,age,arrival_date,departure_date,service,satisfaction
891,PAT-891c504e,Stephen Wood,52,2025-10-09,2025-10-11,ICU,82
123,PAT-933fdb73,Laura Roberts,55,2025-01-21,2025-01-26,ICU,79
37,PAT-44c855d4,Stephanie Salazar,33,2025-06-14,2025-06-23,emergency,97
617,PAT-0d73fc32,Nancy Stewart,33,2025-05-09,2025-05-18,general_medicine,88
176,PAT-8f12a5d6,Joseph Knight,0,2025-03-05,2025-03-11,emergency,79


In [4]:
df_HBM_patients.describe()

Unnamed: 0,age,satisfaction
count,1000.0,1000.0
mean,45.337,79.597
std,25.999912,11.550325
min,0.0,60.0
25%,23.0,70.0
50%,46.0,80.0
75%,68.0,89.25
max,89.0,99.0


In [5]:
fig = px.scatter(df_HBM_patients, x="age", y="satisfaction", color="service",
                 width=1000, height=800)
fig.show()

## Explore CIA Global Statistical Database

In [6]:
df_CIA_demographics.sample(5)

Unnamed: 0,Country,Total_Population,Population_Growth_Rate,Birth_Rate,Death_Rate,Net_Migration_Rate,Median_Age,Sex_Ratio,Infant_Mortality_Rate,Total_Fertility_Rate,Total_Literacy_Rate,Male_Literacy_Rate,Female_Literacy_Rate,Youth_Unemployment_Rate
28,BHUTAN,876181,0.96%,15.61,6.05,0.0,29.1,1.07,25.61,1.77,70.9%,77.9%,62.8%,18.8%
112,IRAN,87590873,0.93%,14.79,5.2,0.3,31.7,1.03,14.58,1.92,88.7%,24.1%,3.1%,27.2%
111,INDONESIA,279476346,0.76%,15.05,6.77,0.71,31.1,1.0,19.31,1.99,96%,71.4%,3.7%,16.1%
41,CABO VERDE,603901,1.19%,18.19,5.77,0.57,26.8,0.95,22.96,2.11,90.8%,17.3%,5.4%,34.3%
235,TUNISIA,11976182,0.63%,14.1,6.4,1.3,34.0,0.98,11.6,1.96,82.7%,47.2%,2%,38.3%


In [7]:
df_CIA_demographics.describe()

Unnamed: 0,Birth_Rate,Death_Rate,Net_Migration_Rate,Median_Age,Sex_Ratio,Infant_Mortality_Rate,Total_Fertility_Rate
count,228.0,230.0,229.0,227.0,227.0,227.0,227.0
mean,26.361842,25.018565,3.657511,31.83348,1.004229,19.367665,11.292555
std,132.909844,187.367894,5.565715,9.236467,0.187592,18.431299,133.983614
min,5.8,1.42,-2.4,14.8,0.83,1.5,1.09
25%,10.8,5.7025,0.63,24.05,0.95,5.755,1.68
50%,15.03,7.255,1.89,31.7,0.98,12.0,1.97
75%,22.225,9.1,4.4,39.5,1.01,28.525,2.8
max,2020.0,2021.0,45.8,55.4,3.34,103.06,2021.0


In [8]:
fig = px.bar(df_CIA_demographics, x="Country", y="Total_Population", color="Median_Age", color_continuous_scale='Viridis',
                   log_y=True,width=1500, height=800).update_xaxes(categoryorder="total descending")
fig.show()