In [28]:
import pandas as pd
import numpy as np

from const import DATA_DIR

nuclear_explosions_path = DATA_DIR / "nuclear-weapons" / "nuclear-explosions" / "nuclear_explosions.csv"
nuclear_explosions_df = pd.read_csv(nuclear_explosions_path)
nuclear_explosions_df.head()

Unnamed: 0,WEAPON SOURCE COUNTRY,WEAPON DEPLOYMENT LOCATION,Data.Source,Location.Cordinates.Latitude,Location.Cordinates.Longitude,Data.Magnitude.Body,Data.Magnitude.Surface,Location.Cordinates.Depth,Data.Yeild.Lower,Data.Yeild.Upper,Data.Purpose,Data.Name,Data.Type,Date.Day,Date.Month,Date.Year
0,USA,Alamogordo,DOE,32.54,-105.57,0.0,0.0,-0.1,21.0,21.0,Wr,Trinity,Tower,16,7,1945
1,USA,Hiroshima,DOE,34.23,132.27,0.0,0.0,-0.6,15.0,15.0,Combat,Littleboy,Airdrop,5,8,1945
2,USA,Nagasaki,DOE,32.45,129.52,0.0,0.0,-0.6,21.0,21.0,Combat,Fatman,Airdrop,9,8,1945
3,USA,Bikini,DOE,11.35,165.2,0.0,0.0,-0.2,21.0,21.0,We,Able,Airdrop,30,6,1946
4,USA,Bikini,DOE,11.35,165.2,0.0,0.0,0.03,21.0,21.0,We,Baker,Uw,24,7,1946


In [29]:
nuclear_explosions_df.describe()

Unnamed: 0,Location.Cordinates.Latitude,Location.Cordinates.Longitude,Data.Magnitude.Body,Data.Magnitude.Surface,Location.Cordinates.Depth,Data.Yeild.Lower,Data.Yeild.Upper,Date.Day,Date.Month,Date.Year
count,2046.0,2046.0,2046.0,2046.0,2046.0,2046.0,2046.0,2046.0,2046.0,2046.0
mean,35.462429,-36.015037,2.145406,0.356696,-0.490829,208.444528,323.431021,16.683773,7.282502,1970.896383
std,23.352702,100.829355,2.625453,1.203569,10.981072,1641.962943,2055.203066,8.799878,3.132347,10.37276
min,-49.5,-169.32,0.0,0.0,-400.0,0.0,0.0,1.0,1.0,1945.0
25%,37.0,-116.0515,0.0,0.0,0.0,0.0,18.25,9.0,5.0,1962.0
50%,37.1,-116.0,0.0,0.0,0.0,0.001,20.0,17.0,8.0,1970.0
75%,49.87,78.0,5.1,0.0,0.0,20.0,150.0,25.0,10.0,1979.0
max,75.1,179.22,7.4,6.0,1.451,50000.0,50000.0,31.0,12.0,1998.0


In [30]:
def format_date(day, month, year):
    try:
        d = int(day)
        m = int(month)
        y = int(year)
        return f"{d:02d}/{m:02d}/{y:04d}"
    except Exception:
        return ""

# usa la variabile nuclear_explosions_path già definita nel notebook
df = nuclear_explosions_df.copy()

# crea la colonna Date formattata
df['Date'] = df.apply(lambda r: format_date(r.get('Date.Day'), r.get('Date.Month'), r.get('Date.Year')), axis=1)

# colonne originali e nuovo ordine richiesto
orig_cols = [
    'WEAPON DEPLOYMENT LOCATION',
    'Location.Cordinates.Latitude',
    'Location.Cordinates.Longitude',
    'Data.Yeild.Upper',
    'WEAPON SOURCE COUNTRY',
    'Date',                # la colonna creata
    'Data.Purpose',
    'Data.Name',
    'Data.Type'
]

# seleziona solo le colonne esistenti (evita errori se mancano)
existing = [c for c in orig_cols if c in df.columns or c == 'Date']
final_df = df[existing].copy()

# rinomina le colonne secondo l'ordine desiderato
final_df.columns = [
    'Weapon deployment location',
    'Latitude',
    'Longitude',
    'Data yeild upper',
    'Weapon source country',
    'Date',
    'Purpose',
    'Name',
    'Method of deployment'
][:len(final_df.columns)]

# --- FILTRO: rimuovi righe con latitudine e longitudine entrambe 0 ---
final_df = final_df[~((final_df['Latitude'] == 0) & (final_df['Longitude'] == 0))].reset_index(drop=True)

# --- NUOVA PARTE: aggiungi colonne per visualizzazione coerente su Flourish ---

# La colonna 'Data yeild upper' è già numerica e >= 0
numeric = final_df['Data yeild upper']

# log sicuro che include 0 -> log1p
final_df['Data yeild upper (log1p)'] = numeric.apply(lambda v: np.log1p(v) if pd.notna(v) else np.nan)

# alternativa meno distorsiva: radice quadrata
final_df['Data yeild upper (sqrt)'] = numeric.apply(lambda v: np.sqrt(v) if pd.notna(v) else np.nan)

# dimensione normalizzata 0-10 consigliata per Flourish (basata su sqrt per coerenza percettiva)
max_sqrt = final_df['Data yeild upper (sqrt)'].max(skipna=True)
if pd.isna(max_sqrt) or max_sqrt == 0:
    max_sqrt = 1.0
final_df['Marker size (sqrt, 0-10)'] = final_df['Data yeild upper (sqrt)'].fillna(0) / max_sqrt * 10

# salva nello stesso folder del CSV di origine
out_path = DATA_DIR / "nuclear-weapons" / "nuclear-explosions" / "nuclear_explosions_cleaned.csv"
final_df.to_csv(out_path, index=False, encoding='utf-8')
print(f"Saved cleaned CSV")

Saved cleaned CSV


In [31]:
final_df.head()

Unnamed: 0,Weapon deployment location,Latitude,Longitude,Data yeild upper,Weapon source country,Date,Purpose,Name,Method of deployment,Data yeild upper (log1p),Data yeild upper (sqrt),"Marker size (sqrt, 0-10)"
0,Alamogordo,32.54,-105.57,21.0,USA,16/07/1945,Wr,Trinity,Tower,3.091042,4.582576,0.204939
1,Hiroshima,34.23,132.27,15.0,USA,05/08/1945,Combat,Littleboy,Airdrop,2.772589,3.872983,0.173205
2,Nagasaki,32.45,129.52,21.0,USA,09/08/1945,Combat,Fatman,Airdrop,3.091042,4.582576,0.204939
3,Bikini,11.35,165.2,21.0,USA,30/06/1946,We,Able,Airdrop,3.091042,4.582576,0.204939
4,Bikini,11.35,165.2,21.0,USA,24/07/1946,We,Baker,Uw,3.091042,4.582576,0.204939


In [32]:
final_df.describe()

Unnamed: 0,Latitude,Longitude,Data yeild upper,Data yeild upper (log1p),Data yeild upper (sqrt),"Marker size (sqrt, 0-10)"
count,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0
mean,35.883348,-36.442515,326.837571,3.481941,9.299159,0.415871
std,23.167076,101.349415,2067.123024,1.81457,15.507487,0.693516
min,-49.5,-169.32,0.0,0.0,0.0,0.0
25%,37.0,-116.06,19.0,2.995732,4.358899,0.194936
50%,37.1,-116.0,20.0,3.044522,4.472136,0.2
75%,49.8795,78.0,150.0,5.01728,12.247449,0.547723
max,75.1,179.22,50000.0,10.819798,223.606798,10.0


In [33]:
from const import VISUALIZATIONS_DIR

html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Nuclear Explosions Visualization</title>
</head>
<body>
    <div class="flourish-embed flourish-webgl" data-src="visualisation/26786353">
        <script src="https://public.flourish.studio/resources/embed.js"></script>
        <noscript>
            <img src="https://public.flourish.studio/visualisation/26786353/thumbnail" width="100%" alt="webgl visualization" />
        </noscript>
    </div>
</body>
</html>
"""

with open(VISUALIZATIONS_DIR / "nuclear-explosions.html", "w", encoding="utf-8") as f:
    f.write(html_content)