In [1]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

First, we must create a dataset of fake asteroids without the hazardous/not hazardous ratings. Then, we will input this new dataset into our model to predict their hazard ratings.

In [2]:
# Set a random seed for reproducibility
np.random.seed(42)

# Define the number of samples in your dataset
num_samples = 90000

In [3]:
# Generate synthetic data for asteroid parameters
data = {
    'est_diameter_min': np.random.uniform(low=0, high=4e01, size=num_samples),
    'est_diameter_max': np.random.uniform(low=0, high=9e01, size=num_samples),
    'relative_velocity': np.random.uniform(low=150, high=2.5e05, size=num_samples),
    'miss_distance': np.random.uniform(low=6500, high=8e07, size=num_samples),
    'absolute_magnitude': np.random.uniform(low=5, high=3.5e01, size=num_samples),
}

In [4]:
#make sure that est_diameter_max is always larger than est_diameter_min
data['est_diameter_max'] = np.maximum(data['est_diameter_min'], data['est_diameter_max'])

# Create a DataFrame from the synthetic data
hypothetical_asteroids = pd.DataFrame(data)

# Display the first few rows of the dataset
hypothetical_asteroids.head()

Unnamed: 0,est_diameter_min,est_diameter_max,relative_velocity,miss_distance,absolute_magnitude
0,14.981605,78.944324,53641.094128,29887570.0,17.026393
1,38.028572,81.64552,187971.525886,59366880.0,7.979078
2,29.279758,34.053509,72971.660757,37220400.0,24.781366
3,23.946339,23.946339,224091.385381,62417510.0,29.182326
4,6.240746,17.249495,98994.294293,21686090.0,34.139751


In [5]:
# Load the trained model from the file
model = joblib.load('asteroidpredictor.joblib')

# make predictions using our hypothetical_asteroids dataframe
predictions = model.predict(hypothetical_asteroids)

# Display the predictions
predictions

array([0, 0, 0, ..., 1, 0, 0], dtype=uint8)

In [6]:
hypothetical_asteroids['hazardous'] = predictions
hypothetical_asteroids['hazardous'] = hypothetical_asteroids['hazardous'].map({0: 'F', 1: 'T'})

# Display the DataFrame with the 'hazardous' column
hypothetical_asteroids.head()

Unnamed: 0,est_diameter_min,est_diameter_max,relative_velocity,miss_distance,absolute_magnitude,hazardous
0,14.981605,78.944324,53641.094128,29887570.0,17.026393,F
1,38.028572,81.64552,187971.525886,59366880.0,7.979078,F
2,29.279758,34.053509,72971.660757,37220400.0,24.781366,F
3,23.946339,23.946339,224091.385381,62417510.0,29.182326,F
4,6.240746,17.249495,98994.294293,21686090.0,34.139751,F


## Using the model to predict hazard rating

In [7]:
# Display the count of hazardous and non-hazardous asteroids
count_hazardous = hypothetical_asteroids['hazardous'].value_counts()

print("Count of Non-Hazardous (F):", count_hazardous['F'])
print("Count of Hazardous (T):", count_hazardous['T'])

Count of Non-Hazardous (F): 75138
Count of Hazardous (T): 14862
