In [1]:
import pandas as pd
import numpy as np

# Assuming you have TLE data files for each debris source as CSV files
iridium_33 = pd.read_csv(r"iridium_33.csv")


ModuleNotFoundError: No module named 'pandas'

In [None]:
cosmos_1408 = pd.read_csv(r"cosmos_1408.csv")
fengyun_1c = pd.read_csv(r"fengyun_1c.csv")
iridium_33 = pd.read_csv(r"iridium_33.csv")
cosmos_2251 = pd.read_csv(r"cosmos_2251.csv")

In [None]:
# Function to calculate Semimajor Axis from Mean Motion
def calculate_semimajor_axis(mean_motion):
    MU = 398600.4418  # Gravitational parameter, km^3/s^2
    n = mean_motion * 2 * np.pi / 86400  # Convert from rev/day to rad/sec
    semimajor_axis = (MU / (n ** 2)) ** (1/3)
    return semimajor_axis

# Add Semimajor Axis to each dataset
for df in [cosmos_1408, fengyun_1c, iridium_33, cosmos_2251]:
    df['SEMIMAJOR_AXIS'] = df['MEAN_MOTION'].apply(calculate_semimajor_axis)

# Optional: Add a column to label the debris source
cosmos_1408['DEBRIS_TYPE'] = 'COSMOS 1408'
fengyun_1c['DEBRIS_TYPE'] = 'FENGYUN 1C'
iridium_33['DEBRIS_TYPE'] = 'IRIDIUM 33'
cosmos_2251['DEBRIS_TYPE'] = 'COSMOS 2251'

# Combine all datasets into one
combined_debris_data = pd.concat([cosmos_1408, fengyun_1c, iridium_33, cosmos_2251], ignore_index=True)

# Save the combined data to a new CSV file
combined_debris_data.to_csv("combined_debris_data.csv", index=False)

# Display the first few rows of the combined data
print(combined_debris_data.head())

In [None]:
import pandas as pd
import numpy as np

# Function to calculate semimajor axis from mean motion
def calculate_semimajor_axis(mean_motion):
    """
    Calculate semimajor axis (in km) from mean motion (in rev/day).
    """
    MU = 398600.4418  # Earth's gravitational parameter (km^3/s^2)
    n = mean_motion * 2 * np.pi / 86400  # Convert rev/day to rad/s
    semimajor_axis = (MU / (n ** 2)) ** (1 / 3)  # Kepler's Third Law
    return semimajor_axis

# Function to process a TLE dataset
def process_tle_dataset(file_path, satellite_type):
    """
    Process a TLE dataset:
    - Load TLE data.
    - Calculate semimajor axis.
    - Add a column for satellite type.
    """
    # Load dataset
    data = pd.read_csv(file_path)

    # Ensure 'MEAN_MOTION' exists and calculate semimajor axis
    if 'MEAN_MOTION' in data.columns:
        data['SEMIMAJOR_AXIS'] = data['MEAN_MOTION'].apply(calculate_semimajor_axis)
    else:
        print(f"Error: MEAN_MOTION column missing in {file_path}.")
        return None

    # Add satellite type column
    data['SATELLITE_TYPE'] = satellite_type

    return data

# Paths to the TLE datasets
cubesat_path = r"cubesat.csv"
iridium_next_path = r"iridium_next.csv"
globalstar_path =r"globalstar.csv"
orbcomm_path = r"orbcomm.csv"
amateur_radio_path = r"amateur_radio.csv"

# Process each dataset
cubesat_data = process_tle_dataset(cubesat_path, "CubeSat")
iridium_next_data = process_tle_dataset(iridium_next_path, "Iridium Next")
globalstar_data = process_tle_dataset(globalstar_path, "Globalstar")
orbcomm_data = process_tle_dataset(orbcomm_path, "Orbcomm")
amateur_radio_data = process_tle_dataset(amateur_radio_path, "Amateur Radio")

# Combine all datasets
all_satellites_data = pd.concat([cubesat_data, iridium_next_data, globalstar_data, orbcomm_data, amateur_radio_data], ignore_index=True)

# Save the combined dataset
all_satellites_data.to_csv("merged_satellite_data.csv", index=False)

print("Merged dataset saved as 'merged_satellite_data.csv'.")


In [None]:
print(all_satellites_data.head())

In [None]:
import pandas as pd
from sklearn.utils import resample

# Load the merged dataset
data = pd.read_csv("merged_satellite_data.csv")

# Separate CubeSat and Non-CubeSat entries
cubesat_data = data[data['SATELLITE_TYPE'] == "CubeSat"]
non_cubesat_data = data[data['SATELLITE_TYPE'] != "CubeSat"]

print(f"CubeSat entries: {len(cubesat_data)}")
print(f"Non-CubeSat entries: {len(non_cubesat_data)}")


In [None]:
# Oversample CubeSat data to 250
cubesat_oversampled = resample(cubesat_data,
                               replace=True,  # Allow replacement
                               n_samples=250,  # Desired size
                               random_state=42)

# Downsample Non-CubeSat data to 250
non_cubesat_downsampled = resample(non_cubesat_data,
                                   replace=False,  # No replacement
                                   n_samples=250,  # Desired size
                                   random_state=42)

# Combine oversampled CubeSat and downsampled Non-CubeSat data
balanced_data = pd.concat([cubesat_oversampled, non_cubesat_downsampled])

print(f"Balanced dataset size: {len(balanced_data)}")


In [None]:
print(balanced_data.isnull().sum())


In [None]:
balanced_data.drop_duplicates(inplace=True)
print(f"Dataset after removing duplicates: {balanced_data.shape}")


In [None]:
print(f"Balanced dataset size: {len(balanced_data)}")

In [None]:
print(balanced_data['SATELLITE_TYPE'].value_counts())

In [None]:
from sklearn.utils import resample

# Separate each class into its own DataFrame
cubesat_data = balanced_data[balanced_data['SATELLITE_TYPE'] == 'CubeSat']
globalstar_data = balanced_data[balanced_data['SATELLITE_TYPE'] == 'Globalstar']
amateur_radio_data = balanced_data[balanced_data['SATELLITE_TYPE'] == 'Amateur Radio']
iridium_next_data = balanced_data[balanced_data['SATELLITE_TYPE'] == 'Iridium Next']
orbcomm_data = balanced_data[balanced_data['SATELLITE_TYPE'] == 'Orbcomm']

# Target sizes for each class
target_cubesat = int(len(balanced_data) * 0.5)  # 50% of the dataset
target_other = int(len(balanced_data) * 0.5 / 4)  # Split remaining 50% among other types

# Oversample CubeSat data
cubesat_oversampled = resample(cubesat_data, replace=True, n_samples=target_cubesat, random_state=42)

# Downsample other satellite types
globalstar_downsampled = resample(globalstar_data, replace=False, n_samples=target_other, random_state=42)
amateur_radio_downsampled = resample(amateur_radio_data, replace=False, n_samples=target_other, random_state=42)
iridium_next_downsampled = resample(iridium_next_data, replace=False, n_samples=target_other, random_state=42)
orbcomm_downsampled = resample(orbcomm_data, replace=False, n_samples=target_other, random_state=42)

# Combine the balanced dataset
focused_data = pd.concat([
    cubesat_oversampled,
    globalstar_downsampled,
    amateur_radio_downsampled,
    iridium_next_downsampled,
    orbcomm_downsampled
])

# Shuffle the dataset
focused_data = focused_data.sample(frac=1, random_state=42).reset_index(drop=True)

# Check the new class distribution
print(focused_data['SATELLITE_TYPE'].value_counts())


In [None]:
print(f"Total dataset size: {len(focused_data)}")


In [None]:
print(f"Duplicates remaining: {focused_data.duplicated().sum()}")

In [None]:
print(f"Duplicates remaining: {focused_data.duplicated().sum()}")

In [None]:
focused_data.to_csv("focused_satellite_data.csv", index=False)

In [None]:
duplicates = focused_data[focused_data.duplicated()]
print(duplicates)

In [None]:
duplicates = focused_data[focused_data.duplicated()]

# Check columns with differences among duplicates
duplicate_columns = []
for column in focused_data.columns:
    if not focused_data[column].duplicated().all():
        duplicate_columns.append(column)

print("Columns with duplicates:", duplicate_columns)

In [None]:
# Check for rows where all columns are identical
duplicates = focused_data[focused_data.duplicated()]
print(duplicates)


In [None]:
# Specify columns to check for duplicates
columns_to_check = ['SEMIMAJOR_AXIS', 'ECCENTRICITY', 'BSTAR', 'INCLINATION']
duplicates = focused_data[focused_data.duplicated(subset=columns_to_check)]
print(duplicates)

In [None]:
# Checking duplicates based on important columns (like OBJECT_NAME, OBJECT_ID, NORAD_CAT_ID, SEMIMAJOR_AXIS)
key_columns = ['OBJECT_NAME', 'OBJECT_ID', 'NORAD_CAT_ID']
duplicates_key_columns = focused_data[focused_data.duplicated(subset=key_columns)]
print(duplicates_key_columns)


In [None]:
import numpy as np

# Constants
mu_earth = 398600  # Gravitational parameter for Earth in km^3/s^2
earth_radius = 6371  # Earth's radius in km

# 1. Calculate Orbital Period (T)
# Orbital Period in seconds
focused_data['ORBITAL_PERIOD'] = 2 * np.pi * np.sqrt(focused_data['SEMIMAJOR_AXIS']**3 / mu_earth)

# Convert to hours (optional)
focused_data['ORBITAL_PERIOD_HOURS'] = focused_data['ORBITAL_PERIOD'] / 3600

# 2. Calculate Altitude
# Altitude in km (distance from Earth's surface)
focused_data['ALTITUDE'] = focused_data['SEMIMAJOR_AXIS'] - earth_radius

# 3. Calculate Perigee and Apogee
# Perigee = a(1 - e), Apogee = a(1 + e)
focused_data['PERIGEE'] = focused_data['SEMIMAJOR_AXIS'] * (1 - focused_data['ECCENTRICITY'])
focused_data['APOGEE'] = focused_data['SEMIMAJOR_AXIS'] * (1 + focused_data['ECCENTRICITY'])

# Optionally, you can print the first few rows to verify the calculations
print(focused_data[['OBJECT_NAME', 'SEMIMAJOR_AXIS', 'ECCENTRICITY', 'ORBITAL_PERIOD', 'ALTITUDE', 'PERIGEE', 'APOGEE']].head())


In [None]:
duplicates = focused_data[focused_data.duplicated()]
print(duplicates)

In [None]:
cubesats = focused_data[focused_data['SATELLITE_TYPE'].str.contains("CubeSat", case=False, na=False)]
cubesats.to_csv("cubesats1_data.csv", index=False)

In [None]:
import os
print(os.getcwd())

In [None]:
all_debris = pd.read_csv(r"C:\Users\rajve\collision_course\debris\combined1_debris_data.csv")

In [None]:
cubesats


In [None]:
all_debris


In [None]:
# Step 1: Standardize the object type for CubeSats
cubesats['OBJECT_TYPE'] = cubesats['SATELLITE_TYPE']

# Step 2: Standardize the object type for debris
all_debris['OBJECT_TYPE'] = all_debris['DEBRIS_TYPE']

# Step 3: Drop the old type columns (optional)
cubesats.drop(columns=['SATELLITE_TYPE'], inplace=True)
all_debris.drop(columns=['DEBRIS_TYPE'], inplace=True)


In [None]:
cubesats


In [None]:
all_debris

In [None]:
# Add a key column for cross-joining
cubesats['key'] = 1
all_debris['key'] = 1

# Generate all pairs of CubeSats and debris
pair_data = pd.merge(cubesats, all_debris, on="key").drop(columns="key")

# Verify the output
print(pair_data.head())
print(f"Total pairs generated: {len(pair_data)}")

# Save the pairs for further processing
pair_data.to_csv("cubesat_debris_pairs.csv", index=False)

In [None]:
pair_data


In [None]:
column_headers = list(pair_data.columns)
print(column_headers)

In [None]:
one_row = pair_data.iloc[0]

In [None]:
one_row

In [None]:
pair_data['EPOCH_x'] = pd.to_datetime(pair_data['EPOCH_x'], errors='coerce')
pair_data['EPOCH_y'] = pd.to_datetime(pair_data['EPOCH_y'], errors='coerce')


In [None]:
import numpy as np

# Constants
mu_earth = 398600  # Gravitational parameter for Earth (km^3/s^2)
earth_radius = 6371  # Earth's radius (km)

# 1. Relative Altitude
pair_data['RELATIVE_ALTITUDE'] = np.abs(pair_data['ALTITUDE_x'] - pair_data['ALTITUDE_y'])

# 2. Relative Inclination
pair_data['RELATIVE_INCLINATION'] = np.abs(pair_data['INCLINATION_x'] - pair_data['INCLINATION_y'])

# 3. Relative Velocity (using mean motion difference)
pair_data['RELATIVE_VELOCITY'] = np.abs(pair_data['MEAN_MOTION_x'] - pair_data['MEAN_MOTION_y'])

# 4. Distance Approximation (3D geometry)
pair_data['DISTANCE'] = np.sqrt(
    pair_data['RELATIVE_ALTITUDE']**2 + (pair_data['RELATIVE_INCLINATION'] * np.pi / 180)**2
)

# 5. Orbital Overlap (simplified estimate, assuming proximity in inclination and RAAN)
pair_data['ORBITAL_OVERLAP'] = np.where(
    (pair_data['RELATIVE_INCLINATION'] < 1) & (np.abs(pair_data['RA_OF_ASC_NODE_x'] - pair_data['RA_OF_ASC_NODE_y']) < 10),
    1,
    0
)

# 6. Perigee and Apogee Difference
pair_data['PERIGEE_DIFF'] = np.abs(pair_data['PERIGEE_x'] - pair_data['PERIGEE_y'])
pair_data['APOGEE_DIFF'] = np.abs(pair_data['APOGEE_x'] - pair_data['APOGEE_y'])

# 7. Orbital Period (in hours)
pair_data['ORBITAL_PERIOD_x'] = 2 * np.pi * np.sqrt(pair_data['SEMIMAJOR_AXIS_x']**3 / mu_earth)
pair_data['ORBITAL_PERIOD_y'] = 2 * np.pi * np.sqrt(pair_data['SEMIMAJOR_AXIS_y']**3 / mu_earth)

# 8. Epoch Difference (in seconds)
pair_data['EPOCH_DIFF'] = np.abs(pair_data['EPOCH_x'] - pair_data['EPOCH_y'])

# 9. RAAN Difference
pair_data['RAAN_DIFF'] = np.abs(pair_data['RA_OF_ASC_NODE_x'] - pair_data['RA_OF_ASC_NODE_y'])

# Check the new columns
print(pair_data[['RELATIVE_ALTITUDE', 'RELATIVE_INCLINATION', 'RELATIVE_VELOCITY', 'DISTANCE', 
                 'ORBITAL_OVERLAP', 'PERIGEE_DIFF', 'APOGEE_DIFF', 'EPOCH_DIFF', 'RAAN_DIFF']].head())


In [None]:
column_headers = list(pair_data.columns)
print(column_headers)

In [None]:
# Convert EPOCH_DIFF to seconds (or another unit if needed)
pair_data['EPOCH_DIFF'] = pair_data['EPOCH_DIFF'].dt.total_seconds()

# Scale the relevant features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

features_to_scale = ['DISTANCE', 'RELATIVE_VELOCITY', 'RELATIVE_ALTITUDE', 
                     'RELATIVE_INCLINATION', 'PERIGEE_DIFF', 'APOGEE_DIFF', 
                     'EPOCH_DIFF']

pair_data[features_to_scale] = scaler.fit_transform(pair_data[features_to_scale])

# Calculate RISK_SCORE
pair_data['RISK_SCORE'] = (
    0.3 * pair_data['DISTANCE'] +
    0.3 * pair_data['RELATIVE_VELOCITY'] +
    0.2 * pair_data['ORBITAL_OVERLAP'] +
    0.1 * pair_data['RELATIVE_ALTITUDE'] +
    0.05 * pair_data['RELATIVE_INCLINATION'] +
    0.05 * pair_data['PERIGEE_DIFF'] +
    0.05 * pair_data['APOGEE_DIFF']
)

# Assign a COLLISION_RISK based on the median risk score
median_risk_score = pair_data['RISK_SCORE'].median()
pair_data['COLLISION_RISK'] = np.where(pair_data['RISK_SCORE'] > median_risk_score, 1, 0)

# Print the resulting data
print(pair_data[['RISK_SCORE', 'COLLISION_RISK']].head())


In [None]:
pair_data

In [None]:
# Count the number of rows where COLLISION_RISK is 1
count_risk_1 = pair_data['COLLISION_RISK'].sum()

print("Number of rows with RISK_SCORE > median (COLLISION_RISK = 1):", count_risk_1)


In [None]:
pair_data.to_csv("pair_data_with_target.csv", index=False)

In [None]:
from sklearn.model_selection import train_test_split

# Features (X) and Target (y)
X = pair_data[["RELATIVE_ALTITUDE", "RELATIVE_VELOCITY", "DISTANCE", "ORBITAL_OVERLAP"]]  # Add other relevant features
y = pair_data['COLLISION_RISK']  # Binary target variable (collision risk)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the shape of the train and test sets
print(f"Training set size: {X_train.shape}, Testing set size: {X_test.shape}")


In [None]:
from sklearn.linear_model import LogisticRegression

# Initialize the Logistic Regression model
model = LogisticRegression()

# Train the model using the training set
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Print out some predictions
print("Predictions on Test Set:", y_pred[:10])  # Print first 10 predictions


In [None]:
first_10_risk_scores = pair_data['COLLISION_RISK'].head(10)

# Print the first 10 RISK_SCORE values
print(first_10_risk_scores)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Evaluate model performance on the test set
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)
