# Diagnostic Analysis

Analysis of orbital debris dataset. Generates tables to verify key findings about object categories, operators, and zombie satellites.


## Setup: Load Master Registry

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

data_path = '../data/clean/kinetic_master.csv'
master = pd.read_csv(data_path, low_memory=False)

print(f"Loaded kinetic_master.csv: {len(master):,} records")

Loaded kinetic_master.csv: 32,843 records


## 1. Data Quality Audit

In [2]:
# Data Quality Summary
quality_audit = pd.DataFrame({
    'Column': master.columns,
    'Data Type': master.dtypes.values,
    'Non-Null Count': master.count().values,
    'Null Count': master.isnull().sum().values,
    'Null %': (master.isnull().sum().values / len(master) * 100).round(2),
    'Unique Values': [master[col].nunique() for col in master.columns]
})

print("\n" + "="*100)
print("DATA QUALITY AUDIT")
print("="*100)
print(quality_audit.to_string(index=False))
print(f"\n✓ Total Records: {len(master):,}")
print(f"✓ Total Features: {len(master.columns)}")
print(f"✓ Average Data Completeness: {(master.count().sum() / (len(master) * len(master.columns)) * 100):.2f}%")


DATA QUALITY AUDIT
             Column Data Type  Non-Null Count  Null Count  Null %  Unique Values
           norad_id     int64           32843           0    0.00          32843
          cospar_id    object           32843           0    0.00          32843
        object_name    object           32843           0    0.00          18256
     satellite_name    object            5591       27252   82.98           5578
      official_name    object            5591       27252   82.98           5569
           category    object           32843           0    0.00              5
        object_type    object           32843           0    0.00              4
     launch_mass_kg   float64            5591       27252   82.98            562
      proxy_mass_kg   float64           32843           0    0.00            563
        dry_mass_kg   float64           32843           0    0.00            614
        power_watts   float64            5591       27252   82.98            136
        

## 2. Category Breakdown Report

In [3]:
# Category Analysis
in_orbit = master[master['in_orbit'] == 1]

category_breakdown = in_orbit.groupby('category').agg({
    'norad_id': 'count',
    'proxy_mass_kg': ['sum', 'mean', 'median'],
    'kinetic_joules': ['sum', 'mean'],
    'launch_year': 'mean'
}).round(2)

category_breakdown.columns = ['Count', 'Total Mass (kg)', 'Avg Mass (kg)', 'Median Mass (kg)', 
                               'Total Kinetic Energy (J)', 'Avg Kinetic Energy (J)', 'Avg Launch Year']

# Add percentages
total_count = category_breakdown['Count'].sum()
total_mass = category_breakdown['Total Mass (kg)'].sum()
total_energy = category_breakdown['Total Kinetic Energy (J)'].sum()

category_breakdown['% of Count'] = (category_breakdown['Count'] / total_count * 100).round(2)
category_breakdown['% of Mass'] = (category_breakdown['Total Mass (kg)'] / total_mass * 100).round(2)
category_breakdown['% of Energy'] = (category_breakdown['Total Kinetic Energy (J)'] / total_energy * 100).round(2)

print("\n" + "="*150)
print("CATEGORY BREAKDOWN (In-Orbit Objects Only)")
print("="*150)
print(category_breakdown.to_string())
print(f"\nTOTALS:")
print(f"  Objects: {int(total_count):,}")
print(f"  Mass: {total_mass:,.0f} kg ({total_mass/1e6:.2f} kilotons)")
print(f"  Kinetic Energy: {total_energy:.2e} J ({total_energy/1e12:.2f} TJ)")


CATEGORY BREAKDOWN (In-Orbit Objects Only)
                    Count  Total Mass (kg)  Avg Mass (kg)  Median Mass (kg)  Total Kinetic Energy (J)  Avg Kinetic Energy (J)  Avg Launch Year  % of Count  % of Mass  % of Energy
category                                                                                                                                                                          
Active Satellite    12470       6251936.00         501.36             355.0              1.398869e+14            1.121788e+10          2023.24       37.97      42.79        48.29
Debris              12655        668305.00          52.81              50.0              1.641738e+13            1.297304e+09          1993.72       38.53       4.57         5.67
Inactive Satellite   5263       2884104.85         548.00             355.0              4.759891e+13            9.044064e+09          2000.75       16.02      19.74        16.43
Rocket Body          2403       4800635.00        1997.77    

## 3. Operator Intelligence Report

In [4]:
# Top Operators Analysis
operator_intel = in_orbit.groupby('country_operator').agg({
    'norad_id': 'count',
    'proxy_mass_kg': ['sum', 'mean'],
    'kinetic_joules': 'sum',
    'is_zombie': 'sum'
}).round(2)

operator_intel.columns = ['Object Count', 'Total Mass (kg)', 'Avg Mass (kg)',
                          'Total Kinetic Energy (J)', 'Zombie Count']
operator_intel = operator_intel.sort_values('Total Mass (kg)', ascending=False)

# Add percentages and rates
operator_intel['% of Objects'] = (operator_intel['Object Count'] / operator_intel['Object Count'].sum() * 100).round(2)
operator_intel['% of Mass'] = (operator_intel['Total Mass (kg)'] / operator_intel['Total Mass (kg)'].sum() * 100).round(2)
operator_intel['Zombie Rate %'] = (operator_intel['Zombie Count'] / operator_intel['Object Count'] * 100).round(2)

print("\n" + "="*160)
print("OPERATOR INTELLIGENCE REPORT (Top 20 by Mass)")
print("="*160)
print(operator_intel.head(20).to_string())

# Prepare zombies subset for downstream zombie analysis and summary
zombies = in_orbit[in_orbit['is_zombie'] == 1].copy()



OPERATOR INTELLIGENCE REPORT (Top 20 by Mass)
                      Object Count  Total Mass (kg)  Avg Mass (kg)  Total Kinetic Energy (J)  Zombie Count  % of Objects  % of Mass  Zombie Rate %
country_operator                                                                                                                                  
USA                           3651       2126984.75         582.58              3.860089e+13          1381         65.30      43.78          37.83
MULTINATIONAL                   62        692221.00       11164.85              1.451974e+13            31          1.11      14.25          50.00
CHINA                          467        494332.80        1058.53              5.772438e+12           294          8.35      10.18          62.96
RUSSIA                         163        266416.00        1634.45              3.649696e+12           110          2.92       5.48          67.48
UNITED KINGDOM                 639        194896.70         305.00     

## 4. Risk Profile Report

In [5]:
# Altitude Band Risk Analysis
altitude_bins = [0, 200, 400, 600, 800, 1000, 1500, 2000, 36000, 100000]
altitude_labels = ['<200km', '200-400km', '400-600km', '600-800km', '800-1000km',
                   '1000-1500km', '1500-2000km', '2000-36000km', '>36000km']

in_orbit['altitude_band'] = pd.cut(in_orbit['perigee_km'], bins=altitude_bins, labels=altitude_labels)

altitude_risk = in_orbit.groupby('altitude_band', observed=False).agg({
    'norad_id': 'count',
    'proxy_mass_kg': 'sum',
    'kinetic_joules': 'sum',
    'category': lambda x: x.value_counts().to_dict()
}).round(2)

altitude_risk.columns = ['Object Count', 'Total Mass (kg)', 'Total Kinetic Energy (J)', 'Category Distribution']

print("\n" + "="*130)
print("ALTITUDE BAND RISK PROFILE")
print("="*130)
for band in altitude_labels:
    if band in altitude_risk.index:
        row = altitude_risk.loc[band]
        print(f"\n{band}:")
        print(f"  Objects: {int(row['Object Count']):,}")
        print(f"  Mass: {row['Total Mass (kg)']:,.0f} kg")
        print(f"  Kinetic Energy: {row['Total Kinetic Energy (J)']:.2e} J")

# RCS Class Analysis
rcs_risk = in_orbit.groupby('rcs_class', observed=False).agg({
    'norad_id': 'count',
    'proxy_mass_kg': ['sum', 'mean'],
    'kinetic_joules': 'sum'
}).round(2)

rcs_risk.columns = ['Object Count', 'Total Mass (kg)', 'Avg Mass (kg)', 'Total Kinetic Energy (J)']
rcs_risk = rcs_risk.sort_values('Total Kinetic Energy (J)', ascending=False)



ALTITUDE BAND RISK PROFILE

<200km:
  Objects: 79
  Mass: 115,516 kg
  Kinetic Energy: 1.57e+12 J

200-400km:
  Objects: 1,970
  Mass: 1,221,633 kg
  Kinetic Energy: 2.45e+13 J

400-600km:
  Objects: 13,412
  Mass: 5,272,152 kg
  Kinetic Energy: 1.49e+14 J

600-800km:
  Objects: 6,294
  Mass: 1,391,568 kg
  Kinetic Energy: 3.64e+13 J

800-1000km:
  Objects: 3,886
  Mass: 1,107,250 kg
  Kinetic Energy: 2.90e+13 J

1000-1500km:
  Objects: 3,592
  Mass: 972,789 kg
  Kinetic Energy: 2.33e+13 J

1500-2000km:
  Objects: 294
  Mass: 124,644 kg
  Kinetic Energy: 1.78e+12 J

2000-36000km:
  Objects: 2,782
  Mass: 3,886,002 kg
  Kinetic Energy: 2.18e+13 J

>36000km:
  Objects: 523
  Mass: 511,063 kg
  Kinetic Energy: 2.34e+12 J


## 5. Temporal Analysis

In [6]:
# Launch Year Trends
temporal_analysis = in_orbit.groupby('launch_year').agg({
    'norad_id': 'count',
    'proxy_mass_kg': 'sum',
    'kinetic_joules': 'sum'
}).round(2)

temporal_analysis.columns = ['Objects Launched', 'Total Mass (kg)', 'Total Kinetic Energy (J)']
temporal_analysis = temporal_analysis.sort_index(ascending=False)

print("\n" + "="*100)
print("TEMPORAL ANALYSIS: Top 20 Launch Years by Objects")
print("="*100)
print(temporal_analysis.sort_values('Objects Launched', ascending=False).head(20).to_string())

# Age Statistics
age_stats = in_orbit[in_orbit['sat_age_years'].notna()]['sat_age_years'].describe()
print("\n" + "="*100)
print("SATELLITE AGE STATISTICS (In-Orbit Objects)")
print("="*100)
print(age_stats.to_string())


TEMPORAL ANALYSIS: Top 20 Launch Years by Objects
             Objects Launched  Total Mass (kg)  Total Kinetic Energy (J)
launch_year                                                             
2025                     4555        1765090.0              4.928355e+13
2024                     3307        1087645.0              3.004898e+13
1999                     2648         250820.0              5.502471e+12
2022                     2583         757283.0              1.869691e+13
2023                     2470         953529.0              2.480634e+13
2021                     1169         475307.0              1.018051e+13
1993                      837         132975.0              2.561791e+12
1981                      647         140905.0              2.829711e+12
2020                      594         263729.0              4.973075e+12
2018                      534         349170.6              4.775424e+12
2000                      524         204441.0              2.700215e+12


## 6. Satellite Age & Zombie Profile

In [7]:
# Satellite Age Distribution
satellites = in_orbit[in_orbit['category'] == 'Active Satellite'].copy()
inactive_sats = in_orbit[in_orbit['category'] == 'Inactive Satellite'].copy()

print("\n" + "="*100)
print("SATELLITE POPULATION ANALYSIS")
print("="*100)
print(f"\nActive Satellites: {len(satellites):,}")
print(f"Inactive Satellites: {len(inactive_sats):,}")
print(f"Total Satellites: {len(satellites) + len(inactive_sats):,}")
print(f"\nZombie Satellites (Inactive + Age > 110% Design Life): {len(zombies):,}")
print(f"Zombie Rate: {len(zombies) / (len(satellites) + len(inactive_sats)) * 100:.2f}%")
print(f"\nZombie Age Statistics:")
print(zombies[['sat_age_years']].describe().to_string())

# Age distribution bins
# define the bins, then define the label that we will eventually bind to it (age_dist_named series)
age_bins = [0, 10, 15, 20, 25, 30, 40, 50, 100]
age_labels = ['0-10yr', '10-15yr', '15-20yr', '20-25yr', '25-30yr', '30-40yr', '40-50yr', '>50yr']

age_dist = zombies['sat_age_years'].value_counts(bins=age_bins, sort=False).sort_index()
age_dist_named = pd.Series(age_dist.values, index=age_labels[:len(age_dist)])

print("\n" + "="*100)
print("ZOMBIE SATELLITE AGE DISTRIBUTION")
print("="*100)
print(age_dist_named.to_string())

# Build zombie profile
zombie_profile = pd.DataFrame({
    'Metric': ['Total Zombies', 'Zombie Rate (%)', 'Avg Zombie Age (years)', 'Median Zombie Age (years)',
               'Total Zombie Mass (kg)', 'Total Zombie Kinetic Energy (J)', 'Zombie by US', 'Zombie by Russia/CIS',
               'Zombie by China', 'Zombie by Other'],
    'Value': [
        len(zombies),
        (len(zombies) / (len(satellites) + len(inactive_sats)) * 100),
        zombies['sat_age_years'].mean(),
        zombies['sat_age_years'].median(),
        zombies['proxy_mass_kg'].sum(),
        zombies['kinetic_joules'].sum(),
        len(zombies[zombies['country_operator'] == 'US']),
        len(zombies[zombies['country_operator'].isin(['CIS', 'Russia'])]),
        len(zombies[zombies['country_operator'] == 'PRC']),
        len(zombies[~zombies['country_operator'].isin(['US', 'CIS', 'Russia', 'PRC'])])
    ]
})



SATELLITE POPULATION ANALYSIS

Active Satellites: 12,470
Inactive Satellites: 5,263
Total Satellites: 17,733

Zombie Satellites (Inactive + Age > 110% Design Life): 5,263
Zombie Rate: 29.68%

Zombie Age Statistics:
       sat_age_years
count    5263.000000
mean       25.250048
std        18.116157
min         1.000000
25%         6.000000
50%        23.000000
75%        40.000000
max        68.000000

ZOMBIE SATELLITE AGE DISTRIBUTION
0-10yr     1753
10-15yr     378
15-20yr     338
20-25yr     274
25-30yr     435
30-40yr     777
40-50yr     719
>50yr       589


## Summary Tables


In [8]:
print("\n" + "="*80)
print("SUMMARY STATISTICS")
print("="*80)
print(f"Total records: {len(master):,}")
print(f"In-orbit objects: {len(in_orbit):,}")
print(f"Decayed objects: {len(master) - len(in_orbit):,}")
print(f"Total in-orbit mass: {in_orbit['proxy_mass_kg'].sum() / 1e6:.2f} kilotons")
print(f"Total kinetic energy: {in_orbit['kinetic_joules'].sum() / 1e12:.2f} TJ")
print(f"Average velocity: {in_orbit['velocity_kms'].mean():.2f} km/s")

print("\nCategories:")
print(f"  Active Satellites: {len(in_orbit[in_orbit['category'] == 'Active Satellite']):,}")
print(f"  Inactive Satellites: {len(in_orbit[in_orbit['category'] == 'Inactive Satellite']):,}")
print(f"  Debris: {len(in_orbit[in_orbit['category'] == 'Debris']):,}")
print(f"  Rocket Bodies: {len(in_orbit[in_orbit['category'] == 'Rocket Body']):,}")

print("\nZombie satellites:")
print(f"  Total: {len(zombies):,}")
print(f"  Rate: {len(zombies) / (len(satellites) + len(inactive_sats)) * 100:.2f}%")
print(f"  Avg age: {zombies['sat_age_years'].mean():.1f} years")
print(f"  US: {len(zombies[zombies['country_operator'] == 'US']):,}")
print(f"  Russia/CIS: {len(zombies[zombies['country_operator'].isin(['CIS', 'Russia'])]):,}")

print("\nRisk hotspots:")
print(f"  Objects in 400-600km: {len(in_orbit[(in_orbit['perigee_km'] >= 400) & (in_orbit['perigee_km'] <= 600)]):,}")
print(f"  Rocket bodies in 400-600km: {len(in_orbit[(in_orbit['category'] == 'Rocket Body') & (in_orbit['perigee_km'] >= 400) & (in_orbit['perigee_km'] <= 600)]):,}")
print(f"  Rocket bodies below 600km: {len(in_orbit[(in_orbit['category'] == 'Rocket Body') & (in_orbit['perigee_km'] < 600)]):,}")



SUMMARY STATISTICS
Total records: 32,843
In-orbit objects: 32,843
Decayed objects: 0
Total in-orbit mass: 14.61 kilotons
Total kinetic energy: 289.68 TJ
Average velocity: 6.91 km/s

Categories:
  Active Satellites: 12,470
  Inactive Satellites: 5,263
  Debris: 12,655
  Rocket Bodies: 2,403

Zombie satellites:
  Total: 5,263
  Rate: 29.68%
  Avg age: 25.3 years
  US: 0
  Russia/CIS: 0

Risk hotspots:
  Objects in 400-600km: 13,423
  Rocket bodies in 400-600km: 494
  Rocket bodies below 600km: 893
