In [1]:
from pathlib import Path

import pandas as pd
from const import DATA_DIR

def find_dataset_files():
    exts = {".csv", ".tsv", ".xlsx", ".xls"}
    paths = []
    for path in DATA_DIR.rglob("*"):
        if path.suffix.lower() in exts and not path.name.startswith("~$"):
            paths.append(path)
    return sorted(paths)

dataset_files = find_dataset_files()
len(dataset_files), [p.relative_to(DATA_DIR) for p in dataset_files]

(18,
 [PosixPath('chornobyl/data/1_Spatial_dataset.csv'),
  PosixPath('chornobyl/data/2_Plutonium_isotope_measurements.csv'),
  PosixPath('chornobyl/data/3_Plutonium_isotope_layers.csv'),
  PosixPath('chornobyl/data/4_Hot_Particle_Activity.csv'),
  PosixPath('chornobyl/data/5_Fuel_particle_dissolution.csv'),
  PosixPath('chornobyl/data/6_Ivankov_background_radiation.csv'),
  PosixPath('chornobyl/data/6_Ivankov_radionuclide_activity.csv'),
  PosixPath('estimated-megatons-of-nuclear-weapons-deliverable-in-first-strike/estimated-megatons-of-nuclear-weapons-deliverable-in-first-strike.csv'),
  PosixPath('migrations/API_SM.POP.TOTL.ZS_DS2_en_csv_v2_3688.csv'),
  PosixPath('migrations/Metadata_Country_API_SM.POP.TOTL.ZS_DS2_en_csv_v2_3688.csv'),
  PosixPath('migrations/Metadata_Indicator_API_SM.POP.TOTL.ZS_DS2_en_csv_v2_3688.csv'),
  PosixPath('migrations/chornobyl_human_flow.csv'),
  PosixPath('nuclear-weapons/nuclear-warhead-inventories/nuclear-warhead-inventories.csv'),
  PosixPath('nucle

In [2]:
from IPython.display import display

for path in dataset_files:
    rel = path.relative_to(DATA_DIR)
    print("\n" + "=" * 80)
    print(f"Dataset file: {rel}")

    suffix = path.suffix.lower()
    if suffix in {".csv", ".tsv"}:
        read_kwargs = {}
        if suffix == ".tsv":
            read_kwargs["sep"] = "\t"
        try:
            df = pd.read_csv(path, **read_kwargs)
        except Exception as exc:
            print(f"  ERROR reading {rel}: {exc}")
            continue
        print(f"  Shape: {df.shape}")
        display(df.head())
    else:
        # Excel file: treat each sheet as a separate dataset
        try:
            xls = pd.ExcelFile(path)
        except Exception as exc:
            print(f"  ERROR opening Excel file {rel}: {exc}")
            continue

        for sheet in xls.sheet_names:
            print(f"  Sheet: {sheet}")
            try:
                df = pd.read_excel(path, sheet_name=sheet)
            except Exception as exc:
                print(f"    ERROR reading sheet {sheet!r}: {exc}")
                continue
            print(f"    Shape: {df.shape}")
            display(df.head())


Dataset file: chornobyl/data/1_Spatial_dataset.csv
  Shape: (1323, 28)


Unnamed: 0,Code,Identifier,Latitude,Longitude,ADR,Hr,Ca,pH_KCl,pH_H20,P2O5,...,Relative_uncertainty_134Cs_%,154Eu,Relative_uncertainty_154Eu_%,90Sr,Relative_uncertainty_90Sr_%,Date_of_Pu_measurement,Terrestrial_density_of_soil_contamination_with_238Pu_kBq_m-2,Relative_uncertainty_238Pu_%,Terrestrial_density_of_soil_contamination_with_239_240Pu_kBq_m-2,Relative_uncertainty_239_240Pu_%
0,1,1,51.37611,30.04194,,,,,4.7,,...,,275.0,23.0,14000.0,29.0,,,,,
1,2,1a,51.37611,30.04194,,,,,4.5,,...,,337.0,7.0,21000.0,10.0,,,,,
2,3,1b,51.37611,30.04194,,,,,4.5,,...,,222.0,4.0,5400.0,11.0,,,,,
3,4,2,51.37611,30.03333,,,,,5.5,,...,,826.0,23.0,49000.0,24.0,,,,,
4,5,2a,51.37611,30.03333,,,,,5.2,,...,,417.0,67.0,22000.0,59.0,,,,,



Dataset file: chornobyl/data/2_Plutonium_isotope_measurements.csv
  Shape: (94, 7)


Unnamed: 0,Code,Identifier,Date_of_Pu_measurement,Terrestrial_density_of_soil_contamination_with_238Pu_kBq_m-2,Relative_uncertainty_238Pu_%,Terrestrial_density_of_soil_contamination_with_239_240Pu_kBq_m-2,Relative_uncertainty_239_240Pu_%
0,155,212,23-Aug-99,4.93,7,10.8,11
1,157,214,23-Aug-99,6.08,6,12.8,10
2,160,217,23-Aug-99,9.59,6,17.0,10
3,161,218,23-Aug-99,4.26,6,9.1,11
4,162,219,23-Aug-99,6.33,6,14.1,10



Dataset file: chornobyl/data/3_Plutonium_isotope_layers.csv
  Shape: (83, 15)


Unnamed: 0,Code_of_the_sample,Identifier_of_the_sample,Dates_of_the_activity_measurements,Sampling_area_m2,The_top_border_of_a_layer_cm,The_bottom_border_of_a_layer_cm,Mass_of_the_layer_kg,Activity_concentration_137Cs_Bqkg,Relative_uncertainty_of_specific_activity_of_137Cs_%,Activity_concentration_90Sr_Bqkg,Relative_uncertainty_of_specific_activity_of_90Sr_%,Activity_concentration_238Pu_Bqkg,Relative_uncertainty_of_specific_activity_of_238Pu_%,Activity_concentration_239_240Pu_Bqkg,Relative_uncertainty_of_specific_activity_of_239_240Pu_%
0,1269,1P_J,17-Dec-00,0.00981,0,2,0.096,48000.0,6.0,270.0,7,0.98,29,2.35,19
1,1269,1P_J,17-Dec-00,0.00981,2,4,0.146,29000.0,6.0,126.0,12,0.55,24,1.69,21
2,1269,1P_J,17-Dec-00,0.00981,4,6,0.217,8100.0,7.0,79.0,15,0.18,22,1.15,21
3,1269,1P_J,17-Dec-00,0.00981,6,8,0.272,2400.0,7.0,50.0,18,0.09,28,0.62,19
4,1269,1P_J,17-Dec-00,0.00981,8,10,0.193,1210.0,8.0,39.0,15,0.04,25,0.34,15



Dataset file: chornobyl/data/4_Hot_Particle_Activity.csv
  Shape: (1950, 43)


Unnamed: 0,Code,Identifier,Information_source,Dataset_reference,Angle_degree,DIST_km,MAXSIZE_um,MINSIZE_um,Burnup_MW_d_kg-1,VIEW,...,Activity_of_54Mn_Bq,STD_of_54Mn_Bq,Activity_of_60Co_Bq,STD_of_60Co_Bq,Activity_of_241Am_Bq,STD_of_241Am_Bq,DATE_Alpha,Total_alpha_activity_Bq,DATE_Beta,Activity_of_90Sr_Bq
0,1,7_1,UIAR dataset,"Kashparov, 2003",270.0,0.3,756.0,427.0,10.7,,...,,,,,4.7,12.2,,,,
1,2,17_8*,UIAR dataset,"Kashparov, 2003",270.0,0.3,100.0,25.0,10.7,,...,,,,,,,09-Mar-88,0.655,,
2,3,17_9,UIAR dataset,"Kashparov, 2003",270.0,0.3,232.0,220.0,9.84,,...,,,0.814,0.222,,,09-Mar-88,1.18,,
3,4,17_2(1),UIAR dataset,"Kashparov, 2003",270.0,0.3,366.0,244.0,11.6,,...,,,,,,,09-Mar-88,1.46,06-Jun-88,70.7
4,5,17_7,UIAR dataset,"Kashparov, 2003",270.0,0.3,488.0,305.0,11.6,,...,,,6.66,1.85,,,,,06-Jun-88,503.0



Dataset file: chornobyl/data/5_Fuel_particle_dissolution.csv
  Shape: (115, 9)


Unnamed: 0,Code,Identifier,Date_of_90Sr_measurement,90Sr_exchangeable_form_in_soil_%,Relative_uncertainty_of_90Sr_exchangeable_form_portion_in_soil_%,85Sr_exchangeable_form_in_soil_%,Relative_uncertainty_of_85Sr_exchangeable_form_portion_in_soil_%,90Sr_activity_in_the_FP-associated_form_%,Relative_uncertainty_of_90Sr_activity_in_the_FP-associated_form_%
0,1,1,28-Jul-95,46,9.0,82,3.0,44,9
1,2,1a,28-Jul-95,61,3.0,85,3.0,28,2
2,3,1b,28-Jul-95,36,3.0,84,4.0,57,4
3,4,2,28-Jul-95,17,4.0,58,1.0,70,8
4,5,2a,28-Jul-95,30,13.0,85,6.0,65,15



Dataset file: chornobyl/data/6_Ivankov_background_radiation.csv
  Shape: (3389, 7)


Unnamed: 0,Code,latitude,longitude,Analysis_type_1_or_2,at_0.1m,at_1m,Sampling_site_description
0,1,51.160815,29.852414,2,0.1,0.09,forest
1,2,51.151827,29.838133,2,0.09,0.09,old field
2,3,51.151827,29.852412,2,0.1,0.09,old field
3,4,51.151183,29.86728,1,0.11,0.09,forest
4,5,51.151822,29.880973,2,0.11,0.1,forest



Dataset file: chornobyl/data/6_Ivankov_radionuclide_activity.csv
  Shape: (547, 20)


Unnamed: 0,Code,Full_weight_kg,Sample_weight_kg,137Cs_Bq/kg,Relative_uncertainty_137Cs_%,Fraction_90Sr_recovered,Relative_uncertainty_90Sr_recovery_%,90Sr_Bq/kg,Relative_uncertainty_90Sr_%,40K_Bq/kg,Relative_uncertainty_40K_%,226Ra_Bq/kg,Relative_uncertainty_226Ra_%,232Th_Bq/kg,Relative_uncertainty_232Th_%,137Cs_kBq/m2,Relative_uncertainty_137Cs_kBq/m2,90Sr_kBq/m2,Relative_uncertainty_90Sr_kBq/m2,Ratio_Cs:Sr
0,4,1.639,1.109,79.5,10.0,0.81,4.0,17.5,11.7,110.0,16.0,7.0,29.0,10.0,19.0,25.6,2.56,5.65,0.661,4.53
1,7,1.88,1.502,42.0,8.0,0.77,4.0,7.66,7.21,127.0,13.0,9.0,26.0,7.0,18.0,15.5,1.24,2.83,0.204,5.48
2,12,1.76,1.412,109.0,6.0,0.9,4.0,24.9,10.8,124.0,13.0,10.0,22.0,7.0,20.0,37.7,2.26,8.61,0.927,4.38
3,21,1.721,1.415,96.0,6.0,0.92,4.0,27.2,9.85,118.0,15.0,10.0,24.0,6.0,26.0,32.5,1.95,9.19,0.905,3.53
4,39,1.45,0.997,390.0,7.0,0.98,4.0,165.0,8.94,150.0,14.0,19.0,33.0,12.0,24.0,111.0,7.78,47.0,4.2,2.37



Dataset file: estimated-megatons-of-nuclear-weapons-deliverable-in-first-strike/estimated-megatons-of-nuclear-weapons-deliverable-in-first-strike.csv
  Shape: (338, 4)


Unnamed: 0,Entity,Code,Year,Nuclear warheads yield
0,China,CHN,1964,0.116961
1,China,CHN,1965,0.584804
2,China,CHN,1966,2.122814
3,China,CHN,1967,2.491217
4,China,CHN,1968,34.014175



Dataset file: migrations/API_SM.POP.TOTL.ZS_DS2_en_csv_v2_3688.csv
  ERROR reading migrations/API_SM.POP.TOTL.ZS_DS2_en_csv_v2_3688.csv: Error tokenizing data. C error: Expected 3 fields in line 5, saw 70


Dataset file: migrations/Metadata_Country_API_SM.POP.TOTL.ZS_DS2_en_csv_v2_3688.csv
  Shape: (265, 6)


Unnamed: 0,Country Code,Region,IncomeGroup,SpecialNotes,TableName,Unnamed: 5
0,ABW,Latin America & Caribbean,High income,,Aruba,
1,AFE,,,"26 countries, stretching from the Red Sea in t...",Africa Eastern and Southern,
2,AFG,Middle East & North Africa,Low income,The reporting period for national accounts dat...,Afghanistan,
3,AFW,,,"22 countries, stretching from the westernmost ...",Africa Western and Central,
4,AGO,Sub-Saharan Africa,Lower middle income,The World Bank systematically assesses the app...,Angola,



Dataset file: migrations/Metadata_Indicator_API_SM.POP.TOTL.ZS_DS2_en_csv_v2_3688.csv
  Shape: (1, 5)


Unnamed: 0,INDICATOR_CODE,INDICATOR_NAME,SOURCE_NOTE,SOURCE_ORGANIZATION,Unnamed: 4
0,SM.POP.TOTL.ZS,International migrant stock (% of population),International migrant stock (% of population) ...,"International Migrant Stock, United Nations (U...",



Dataset file: migrations/chornobyl_human_flow.csv
  ERROR reading migrations/chornobyl_human_flow.csv: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte

Dataset file: nuclear-weapons/nuclear-warhead-inventories/nuclear-warhead-inventories.csv
  Shape: (9, 7)


Unnamed: 0,Entity,Code,Year,Number of deployed strategic nuclear warheads,Number of deployed nonstrategic nuclear warheads,Number of nondeployed nuclear warheads in reserve,Number of retired nuclear warheads
0,China,CHN,2025,24,0,576,0
1,France,FRA,2025,280,0,10,0
2,India,IND,2025,0,0,180,0
3,Israel,ISR,2025,0,0,90,0
4,North Korea,PRK,2025,0,0,50,0



Dataset file: nuclear-weapons/nuclear-warhead-stockpiles-lines/nuclear-warhead-stockpiles-lines.csv
  Shape: (880, 4)


Unnamed: 0,Entity,Code,Year,Number of nuclear warheads
0,China,CHN,1945,0
1,China,CHN,1946,0
2,China,CHN,1947,0
3,China,CHN,1948,0
4,China,CHN,1949,0



Dataset file: nuclear-weapons/nuclear-weapons-proliferation/nuclear-weapons-proliferation.csv
  Shape: (86, 6)


Unnamed: 0,Entity,Code,Year,Number of countries considering nuclear weapons,Number of countries pursuing nuclear weapons,Number of countries possessing nuclear weapons
0,World,OWID_WRL,1938,0,0,0
1,World,OWID_WRL,1939,2,0,0
2,World,OWID_WRL,1940,3,0,0
3,World,OWID_WRL,1941,3,1,0
4,World,OWID_WRL,1942,3,2,0



Dataset file: nuclear-weapons/number-of-countries-that-approve-of-nuclear-weapons-treaties/number-of-countries-that-approve-of-nuclear-weapons-treaties.csv
  Shape: (188, 7)


Unnamed: 0,Entity,Code,Year,Number of countries with a given status on the Comprehensive Nuclear-Test-Ban Treaty,Number of countries with a given status on the Nuclear Non-Proliferation Treaty,Number of countries with a given status on the Treaty on the Prohibition of Nuclear Weapons,Number of countries with a given status on the Partial Test Ban Treaty
0,Approved,,1996,1.0,185.0,,124.0
1,Approved,,1997,8.0,186.0,,124.0
2,Approved,,1998,26.0,187.0,,124.0
3,Approved,,1999,51.0,187.0,,124.0
4,Approved,,2000,69.0,187.0,,124.0



Dataset file: nuclear-weapons/number-of-nuclear-weapons-tests/number-of-nuclear-weapons-tests.csv
  Shape: (632, 4)


Unnamed: 0,Entity,Code,Year,Number of nuclear weapons tests
0,China,CHN,1945,0
1,China,CHN,1946,0
2,China,CHN,1947,0
3,China,CHN,1948,0
4,China,CHN,1949,0



Dataset file: power-plants/Global-Nuclear-Power-Tracker-September-2025.xlsx
  Sheet: About
    Shape: (63, 4)


Unnamed: 0,"Global Nuclear Power Tracker - September 4, 2025",Unnamed: 1,Unnamed: 2,Unnamed: 3
0,Copyright © Global Energy Monitor. Global Nucl...,,,
1,"Recommended Citation: ""Global Energy Monitor, ...",,,
2,"Contact: Joe Bernardi, Project Manager, Global...",,,
3,,,,
4,September 2025 Release Notes,,,


  Sheet: Data
    Shape: (1749, 38)


Unnamed: 0,Date Last Researched,Country/Area,Project Name,Unit Name,Project Name in Local Language / Script,Other Name(s),Capacity (MW),Status,Reactor Type,Model,...,Location Accuracy,City,"Local Area (taluk, county)","Major Area (prefecture, district)",State/Province,Subregion,Region,GEM location ID,GEM unit ID,Wiki URL
0,2024-05-22,Argentina,Atucha nuclear power plant,1,,Unit IV (Phase 3),362.0,operating,pressurized heavy water reactor,PHWR KWU,...,exact,,,Partido de Zárate,Buenos Aires,Latin America and the Caribbean,Americas,L100000500166,G100000500502,https://www.gem.wiki/Atucha_nuclear_power_plant
1,2024-05-22,Argentina,Atucha nuclear power plant,2,,Unit IV (Phase 3),745.0,operating,pressurized heavy water reactor,PHWR KWU,...,exact,,,Partido de Zárate,Buenos Aires,Latin America and the Caribbean,Americas,L100000500166,G100000500382,https://www.gem.wiki/Atucha_nuclear_power_plant
2,2025-07-09,Argentina,Atucha nuclear power plant,3,,Unit IV (Phase 3),1150.0,shelved,pressurized water reactor,Hualong One,...,exact,,,Partido de Zárate,Buenos Aires,Latin America and the Caribbean,Americas,L100000500166,G100000500646,https://www.gem.wiki/Atucha_nuclear_power_plant
3,2025-07-09,Argentina,Atucha nuclear power plant,SMR 1,,Unit IV (Phase 3),300.0,announced,small modular reactor,ACR-300,...,exact,Atucha,,,,Latin America and the Caribbean,Americas,L100000500166,G100001055220,https://www.gem.wiki/Atucha_nuclear_power_plant
4,2025-07-09,Argentina,Atucha nuclear power plant,SMR 2,,Unit IV (Phase 3),300.0,announced,small modular reactor,ACR-300,...,exact,Atucha,,,,Latin America and the Caribbean,Americas,L100000500166,G100001055221,https://www.gem.wiki/Atucha_nuclear_power_plant
