In [1]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
import os

In [3]:
# Configuration
NASA_API_KEY = "YOUR_API_KEY_HERE"  # Your API Key here
BASE_URL = "https://api.nasa.gov/neo/rest/v1"

def get_10_years_data(api_key, years=10):
    """
    Gets asteroid data for the last X years by iterating through 7-day periods
    """
    print(f"STARTING EXTRACTION OF {years} YEARS OF ASTEROID DATA")
    print("=" * 60)
    
    end_date = datetime.now()
    start_date = end_date - timedelta(days=years*365)
    
    print(f"Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
    
    all_asteroids = []
    current_date = start_date
    total_requests = 0
    
    while current_date < end_date:
        # Calculate period end date (7 days later)
        period_end_date = min(current_date + timedelta(days=7), end_date)
        
        # Format dates for API
        start_date_str = current_date.strftime('%Y-%m-%d')
        end_date_str = period_end_date.strftime('%Y-%m-%d')
        
        print(f" Getting data: {start_date_str} to {end_date_str}")
        
        try:
            url = f"{BASE_URL}/feed?start_date={start_date_str}&end_date={end_date_str}&api_key={api_key}"
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()
            
            # Extract asteroids from all days in this period
            period_asteroids = []
            for date, objects in data.get('near_earth_objects', {}).items():
                for asteroid in objects:
                    # Add approach date to each asteroid
                    asteroid['close_approach_date'] = date
                    period_asteroids.append(asteroid)
            
            all_asteroids.extend(period_asteroids)
            total_requests += 1
            
            print(f"Period {start_date_str}: {len(period_asteroids)} approaches")
            
            # Small pause to avoid API rate limiting
            time.sleep(0.5)
            
        except requests.exceptions.RequestException as e:
            print(f"Error in period {start_date_str}: {e}")
            continue
        
        # Move to next period
        current_date = period_end_date
    
    print(f"\nEXTRACTION COMPLETED:")
    print(f"   • Total periods processed: {total_requests}")
    print(f"   • Total unique approaches: {len(all_asteroids)}")
    
    return all_asteroids

def create_historical_dataframe(asteroid_data):
    """
    Creates an enriched DataFrame with historical data
    """
    if not asteroid_data:
        print("No data obtained to create DataFrame")
        return pd.DataFrame()
    
    print(f"\nProcessing {len(asteroid_data)} approaches...")
    
    processed_data = []
    
    for asteroid in asteroid_data:
        try:
            # BASIC INFORMATION
            base_data = {
                'id': asteroid.get('id'),
                'neo_reference_id': asteroid.get('neo_reference_id'),
                'name': asteroid.get('name'),
                'cleaned_name': asteroid.get('name', '').replace('(', '').replace(')', ''),
                'nasa_jpl_url': asteroid.get('nasa_jpl_url'),
                'is_hazardous': asteroid.get('is_potentially_hazardous_asteroid', False),
                'approach_date': asteroid.get('close_approach_date'),
                'absolute_magnitude': asteroid.get('absolute_magnitude_h'),
            }
            
            # ESTIMATED DIAMETERS IN DIFFERENT UNITS
            estimated_diameter = asteroid.get('estimated_diameter', {})
            units = ['kilometers', 'meters', 'miles', 'feet']
            
            for unit in units:
                if unit in estimated_diameter:
                    base_data.update({
                        f'diameter_min_{unit}': estimated_diameter[unit].get('estimated_diameter_min'),
                        f'diameter_max_{unit}': estimated_diameter[unit].get('estimated_diameter_max'),
                        f'diameter_avg_{unit}': (
                            estimated_diameter[unit].get('estimated_diameter_min', 0) + 
                            estimated_diameter[unit].get('estimated_diameter_max', 0)
                        ) / 2
                    })
            
            # CLOSE APPROACH DATA (more detailed)
            close_approaches = asteroid.get('close_approach_data', [])
            if close_approaches:
                # Take the first approach (which matches the date)
                approach = close_approaches[0]
                base_data.update({
                    'full_approach_date': approach.get('close_approach_date_full'),
                    'approach_epoch': approach.get('epoch_date_close_approach'),
                    'relative_velocity_kms': approach.get('relative_velocity', {}).get('kilometers_per_second'),
                    'relative_velocity_kmh': approach.get('relative_velocity', {}).get('kilometers_per_hour'),
                    'relative_velocity_mph': approach.get('relative_velocity', {}).get('miles_per_hour'),
                    'miss_distance_km': approach.get('miss_distance', {}).get('kilometers'),
                    'miss_distance_lunar': approach.get('miss_distance', {}).get('lunar'),
                    'miss_distance_astronomical': approach.get('miss_distance', {}).get('astronomical'),
                    'orbiting_body': approach.get('orbiting_body')
                })
            
            processed_data.append(base_data)
            
        except Exception as e:
            print(f"Error processing asteroid {asteroid.get('id')}: {e}")
            continue
    
    # Create DataFrame
    df = pd.DataFrame(processed_data)
    
    # Convert date columns
    if 'approach_date' in df.columns:
        df['approach_date'] = pd.to_datetime(df['approach_date'], errors='coerce')
    
    # Convert numeric columns
    numeric_columns = [
        'absolute_magnitude', 'relative_velocity_kms', 'miss_distance_km',
        'miss_distance_lunar', 'miss_distance_astronomical'
    ]
    
    for col in numeric_columns:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    print(f"Historical DataFrame created with {len(df)} rows and {len(df.columns)} columns")
    return df

def save_dataset(df, filename_prefix="asteroids_10_years"):
    """
    Saves the dataset to a CSV file with timestamp
    """
    if df.empty:
        print("No data to save")
        return None
    
    timestamp = datetime.now().strftime('%Y%m%d_%H%M')
    filename = f"{filename_prefix}_{timestamp}.csv"
    
    df.to_csv(filename, index=False, encoding='utf-8')
    
    size_mb = os.path.getsize(filename) / 1024 / 1024
    print(f"Dataset saved in: {filename}")
    print(f"File size: {size_mb:.2f} MB")
    print(f"Dimensions: {df.shape[0]:,} rows × {df.shape[1]} columns")
    
    return filename

# MAIN EXECUTION - EXTRACTION ONLY
if __name__ == "__main__":
    print("CREATING ASTEROID DATABASE (10 YEARS)")
    print("=" * 70)
    
    # 1. Get historical data
    asteroid_data = get_10_years_data(NASA_API_KEY, years=10)
    
    if asteroid_data:
        # 2. Create historical DataFrame
        historical_df = create_historical_dataframe(asteroid_data)
        
        if not historical_df.empty:
            # 3. Save dataset
            saved_file = save_dataset(historical_df)
            
            if saved_file:
                print(f"\nDATABASE SUCCESSFULLY CREATED!")
                print(f"File: {saved_file}")
                print(f"Total approaches: {len(historical_df):,}")
                print(f"Period covered: {historical_df['approach_date'].min()} to {historical_df['approach_date'].max()}")
            else:
                print("Error saving file")
    else:
        print("Could not get historical data from NASA API")

CREATING ASTEROID DATABASE (10 YEARS)
STARTING EXTRACTION OF 10 YEARS OF ASTEROID DATA
Period: 2015-10-08 to 2025-10-05
 Getting data: 2015-10-08 to 2015-10-15
Period 2015-10-08: 121 approaches
 Getting data: 2015-10-15 to 2015-10-22
Period 2015-10-15: 122 approaches
 Getting data: 2015-10-22 to 2015-10-29
Period 2015-10-22: 127 approaches
 Getting data: 2015-10-29 to 2015-11-05
Period 2015-10-29: 144 approaches
 Getting data: 2015-11-05 to 2015-11-12
Period 2015-11-05: 148 approaches
 Getting data: 2015-11-12 to 2015-11-19
Period 2015-11-12: 113 approaches
 Getting data: 2015-11-19 to 2015-11-26
Period 2015-11-19: 117 approaches
 Getting data: 2015-11-26 to 2015-12-03
Period 2015-11-26: 128 approaches
 Getting data: 2015-12-03 to 2015-12-10
Period 2015-12-03: 147 approaches
 Getting data: 2015-12-10 to 2015-12-17
Period 2015-12-10: 113 approaches
 Getting data: 2015-12-17 to 2015-12-24
Period 2015-12-17: 107 approaches
 Getting data: 2015-12-24 to 2015-12-31
Period 2015-12-24: 123 app