# US Disasters Mashup

Explore **54,575 disaster events** across four categories.

**Categories:**
- Aviation Accidents (32,410)
- Severe Storms (14,770)
- Earthquakes (3,742)
- Shipwrecks (3,653)

In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
print('Libraries loaded')

## 1. Load Dataset

In [None]:
with open('disasters_mashup.json') as f:
    data = json.load(f)

df = pd.DataFrame(data)
print(f'Total disasters: {len(df):,}')
df.head()

## 2. Category Distribution

In [None]:
cat_counts = df['category'].value_counts()
print('Disasters by Category:')
print('=' * 40)
for cat, count in cat_counts.items():
    pct = count / len(df) * 100
    print(f'{cat:20s} {count:6,} ({pct:5.1f}%)')

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
colors = ['#e63946', '#457b9d', '#f4a261', '#2a9d8f']
cat_counts.plot(kind='barh', ax=ax, color=colors)
ax.set_xlabel('Number of Events')
ax.set_title('Disaster Events by Category', fontweight='bold')
plt.tight_layout()
plt.show()

## 3. Geographic Distribution

In [None]:
fig, ax = plt.subplots(figsize=(14, 8))

category_colors = {
    'aviation_accident': '#e63946',
    'storm': '#f4a261',
    'earthquake': '#2a9d8f',
    'shipwreck': '#457b9d'
}

for cat in df['category'].unique():
    subset = df[df['category'] == cat]
    ax.scatter(subset['longitude'], subset['latitude'],
               alpha=0.3, s=3, c=category_colors.get(cat, 'gray'),
               label=cat.replace('_', ' ').title())

ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title('Disaster Locations Across the US', fontweight='bold')
ax.set_xlim(-180, -60)
ax.set_ylim(15, 75)
ax.legend(loc='lower right')
plt.tight_layout()
plt.show()

## 4. Temporal Analysis

In [None]:
df['date_parsed'] = pd.to_datetime(df['date'], errors='coerce')
df['year'] = df['date_parsed'].dt.year

valid_years = df[(df['year'] >= 1950) & (df['year'] <= 2025)]
yearly = valid_years.groupby(['year', 'category']).size().unstack(fill_value=0)

print(f'Records with valid dates: {len(valid_years):,}')
print(f'Date range: {valid_years["year"].min():.0f} - {valid_years["year"].max():.0f}')

In [None]:
fig, ax = plt.subplots(figsize=(14, 6))
yearly.plot(kind='area', ax=ax, alpha=0.7, stacked=True,
            color=['#e63946', '#2a9d8f', '#457b9d', '#f4a261'])
ax.set_xlabel('Year')
ax.set_ylabel('Number of Events')
ax.set_title('Disaster Events Over Time', fontweight='bold')
ax.legend(title='Category', loc='upper left')
plt.tight_layout()
plt.show()

## 5. Storm Analysis

In [None]:
storms = df[df['category'] == 'storm']
print(f'Total storms: {len(storms):,}')

if 'subcategory' in storms.columns:
    storm_types = storms['subcategory'].value_counts().head(10)
    print('\nTop Storm Types:')
    for t, count in storm_types.items():
        print(f'  {t}: {count:,}')

if 'fatalities' in storms.columns:
    fatal = storms[storms['fatalities'].notna() & (storms['fatalities'] > 0)]
    print(f'\nFatal storm events: {len(fatal):,}')
    print(f'Total storm fatalities: {storms["fatalities"].sum():,.0f}')

## 6. Earthquake Magnitudes

In [None]:
quakes = df[df['category'] == 'earthquake']
print(f'Total earthquakes: {len(quakes):,}')

if 'magnitude' in quakes.columns:
    valid_mag = quakes[quakes['magnitude'].notna()]
    print(f'\nMagnitude Statistics:')
    print(f'  Minimum: {valid_mag["magnitude"].min():.1f}')
    print(f'  Median: {valid_mag["magnitude"].median():.1f}')
    print(f'  Maximum: {valid_mag["magnitude"].max():.1f}')
    print(f'  M6+: {(valid_mag["magnitude"] >= 6).sum():,}')
    print(f'  M7+: {(valid_mag["magnitude"] >= 7).sum():,}')

In [None]:
if 'magnitude' in quakes.columns:
    valid_mag = quakes[quakes['magnitude'].notna()]
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.hist(valid_mag['magnitude'], bins=30, color='#2a9d8f', edgecolor='white')
    ax.axvline(x=6.0, color='orange', linestyle='--', label='M6.0 (Major)')
    ax.axvline(x=7.0, color='red', linestyle='--', label='M7.0 (Great)')
    ax.set_xlabel('Magnitude')
    ax.set_ylabel('Frequency')
    ax.set_title('Earthquake Magnitude Distribution', fontweight='bold')
    ax.legend()
    plt.tight_layout()
    plt.show()

## 7. Aviation vs Maritime

In [None]:
aviation = df[df['category'] == 'aviation_accident']
maritime = df[df['category'] == 'shipwreck']

print('Aviation vs Maritime:')
print('=' * 40)
print(f'Aviation accidents: {len(aviation):,}')
print(f'Shipwrecks: {len(maritime):,}')
print(f'Ratio: {len(aviation)/len(maritime):.1f}:1')

## Conclusion

This unified disaster dataset enables:
- Cross-category geographic analysis
- Temporal trend comparison
- Risk corridor identification
- Multi-modal visualization

**Sources**: NTSB, NOAA (AWOIS, Storm Events), USGS

**Author**: Luke Steuber | @lukesteuber.com (Bluesky)