# Meteorites vs UFOs: Detection Bias Study

1,279 records across 3 datasets exploring observation bias in sky-watching phenomena:
- `temporal_comparison`: Year-by-year meteorite falls vs UFO reports (1900–2023)
- `state_comparison`: 58 US states, UFO-per-meteorite ratio
- `meteorite_detail`: 1,097 individual witnessed falls

**Dataset**: [github.com/lukeslp/meteorites-ufos-detection-bias](https://github.com/lukeslp/meteorites-ufos-detection-bias)

In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (14, 5)
plt.rcParams['font.size'] = 11
print('Libraries loaded')

## 1. Load Data

In [None]:
with open('meteorites_ufos_detection_bias.json') as f:
    data = json.load(f)

temporal = pd.DataFrame(data['temporal_comparison'])
states = pd.DataFrame(data['state_comparison'])
meteorites = pd.DataFrame(data['meteorite_detail'])

print(f'Temporal records: {len(temporal)} (years {temporal.year.min()}-{temporal.year.max()})')
print(f'State records: {len(states)}')
print(f'Meteorite detail records: {len(meteorites)}')
temporal.head(3)

## 2. Temporal Trends: UFO Reports vs Meteorite Falls (1900–2023)

In [None]:
fig, ax1 = plt.subplots(figsize=(14, 6))

ax2 = ax1.twinx()
ax1.fill_between(temporal['year'], temporal['ufo_sightings'], alpha=0.25, color='#FF6B35', label='UFO sightings')
ax1.plot(temporal['year'], temporal['ufo_sightings'], color='#FF6B35', linewidth=1.5, label='UFO sightings')
ax2.bar(temporal['year'], temporal['meteorite_falls'], alpha=0.6, color='#2196F3', width=0.8, label='Meteorite falls')

ax1.set_xlabel('Year')
ax1.set_ylabel('UFO Sightings (NUFORC)', color='#FF6B35')
ax2.set_ylabel('Witnessed Meteorite Falls (NASA)', color='#2196F3')
ax1.set_title('UFO Reports vs Meteorite Falls (1900–2023)', fontsize=14, fontweight='bold', pad=12)
ax1.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))

# Mark key events
ax1.axvline(1947, color='purple', linestyle='--', alpha=0.5, label='Roswell (1947)')
ax1.axvline(1993, color='green', linestyle='--', alpha=0.5, label='X-Files debut (1993)')

lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left', fontsize=9)

plt.tight_layout()
plt.show()

## 3. State-Level UFO-per-Meteorite Ratio (Top 20 States)

In [None]:
# Filter states with valid ratios and sort
states_valid = states.dropna(subset=['ufo_per_meteorite']).sort_values('ufo_per_meteorite', ascending=False)
top20 = states_valid.head(20)

fig, ax = plt.subplots(figsize=(14, 8))
colors = ['#FF4444' if v > 1000 else '#FF9800' if v > 100 else '#4CAF50' for v in top20['ufo_per_meteorite']]
bars = ax.barh(top20['state'][::-1], top20['ufo_per_meteorite'][::-1], color=colors[::-1])

ax.set_xlabel('UFO Reports per Witnessed Meteorite Fall')
ax.set_title('Detection Bias: UFO Reports per Meteorite Fall by State', fontsize=14, fontweight='bold', pad=12)
ax.xaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))

for bar, (_, row) in zip(bars[::-1], top20.iterrows()):
    ax.text(bar.get_width() + 10, bar.get_y() + bar.get_height()/2,
            f'{row["ufo_per_meteorite"]:,.0f}x', va='center', fontsize=9)

plt.tight_layout()
plt.show()

print('Top 5 most biased states:')
print(top20[['state','meteorite_falls','ufo_sightings','ufo_per_meteorite']].head(5).to_string(index=False))

## 4. Meteorite Fall Locations & Mass Distribution

In [None]:
# Filter to valid coordinates
m_valid = meteorites.dropna(subset=['latitude', 'longitude', 'mass_g'])
m_valid = m_valid[(m_valid['latitude'].between(-90, 90)) & (m_valid['longitude'].between(-180, 180))]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Scatter map
sizes = np.sqrt(m_valid['mass_g'].clip(100, 1e8)) / 10
ax1.scatter(m_valid['longitude'], m_valid['latitude'], s=sizes, alpha=0.5, color='#FF6B35', edgecolor='none')
ax1.set_xlim(-180, 180)
ax1.set_ylim(-90, 90)
ax1.axhline(0, color='gray', linewidth=0.5)
ax1.axvline(0, color='gray', linewidth=0.5)
ax1.set_title(f'Witnessed Meteorite Falls (n={len(m_valid):,})', fontsize=13, fontweight='bold')
ax1.set_xlabel('Longitude')
ax1.set_ylabel('Latitude')

# Mass histogram (log scale)
ax2.hist(np.log10(m_valid['mass_g'].clip(lower=1)), bins=40, color='#2196F3', edgecolor='white', linewidth=0.5)
ax2.set_title('Meteorite Mass Distribution (log scale)', fontsize=13, fontweight='bold')
ax2.set_xlabel('log10(mass in grams)')
ax2.set_ylabel('Count')
ax2.set_xticks(range(0, 9))
ax2.set_xticklabels(['1g', '10g', '100g', '1kg', '10kg', '100kg', '1t', '10t', '100t'])

plt.tight_layout()
plt.show()

## 5. Detection Bias Summary Table

In [None]:
# Top 10 most 'biased' states
ranked = states.dropna(subset=['ufo_per_meteorite']).sort_values('ufo_per_meteorite', ascending=False)

print('=== Top 10 Most "Biased" States (UFO reports per meteorite fall) ===')
top10 = ranked.head(10)[['state', 'meteorite_falls', 'ufo_sightings', 'ufo_per_meteorite']].copy()
top10.columns = ['State', 'Meteorite Falls', 'UFO Reports', 'UFO per Meteorite']
top10['UFO per Meteorite'] = top10['UFO per Meteorite'].apply(lambda x: f'{x:,.0f}x')
print(top10.to_string(index=False))

print('\n=== Key Observations ===')
total_ufo = states['ufo_sightings'].sum()
total_met = states['meteorite_falls'].sum()
print(f'Total US UFO reports: {total_ufo:,}')
print(f'Total US witnessed meteorite falls: {total_met}')
print(f'Overall US ratio: {total_ufo/total_met:.0f} UFO reports per meteorite fall')
print(f'\nPost-1990s UFO surge: {temporal[temporal.year >= 1990]["ufo_sightings"].sum():,} reports vs {temporal[temporal.year < 1990]["ufo_sightings"].sum():,} before 1990')