In [None]:
from astroquery.ipac.nexsci.nasa_exoplanet_archive import NasaExoplanetArchive
import joblib
import pandas as pd
import numpy as np

def compute_esi(row):
    temp = np.clip(row['temperature'], 150, 350)
    mass = np.clip(row['mass'], 0.1, 10)
    radius = np.clip(row['radius'], 0.5, 2.5)

    radius_score = (1 - abs(radius - 1) / (radius + 1)) ** 0.57
    mass_score = (1 - abs(mass - 1) / (mass + 1)) ** 1.07
    temp_score = (1 - abs(temp - 288) / (temp + 288)) ** 5.58

    esi = radius_score * mass_score * temp_score
    return round(esi * 100, 2)

# live exoplanet data from NASA
data = NasaExoplanetArchive.query_criteria(
    table="ps",
    select="pl_name, pl_bmasse, pl_rade, pl_eqt, pl_insol",
    where="pl_bmasse IS NOT NULL AND pl_rade IS NOT NULL AND pl_eqt IS NOT NULL AND pl_insol IS NOT NULL"
)

# pandas DataFrame
g = data.to_pandas()
g.rename(columns={
    'pl_name': 'name',
    'pl_bmasse': 'mass',
    'pl_rade': 'radius',
    'pl_eqt': 'temperature',
    'pl_insol': 'flux'
}, inplace=True)

# Dropping null values
g.dropna(subset=['mass', 'radius', 'temperature', 'flux'], inplace=True)

g[['mass', 'radius', 'temperature', 'flux']] = g[['mass', 'radius', 'temperature', 'flux']].astype(float)

scaler = joblib.load('scaler.pkl')
model = joblib.load('habitability_model.pkl')

features = ['mass', 'radius', 'temperature', 'flux']
X_new = scaler.transform(g[features])
g['potentially_habitable'] = model.predict(X_new)

habitable_planets = g[g['potentially_habitable'] == 1].copy()

habitable_planets['ESI(%)'] = habitable_planets.apply(compute_esi, axis=1)

# Sorting planets by highest ESI
habitable_planets.sort_values(by='ESI(%)', ascending=False, inplace=True)

habitable_planets.to_csv('predicted_habitable_exoplanets.csv', index=False)

print("Total exoplanets analyzed:", len(g))
print("Potentially habitable exoplanets found (after ESI):", len(habitable_planets))
print("Top habitable planets by ESI:")
print(habitable_planets[['name', 'mass', 'radius', 'temperature', 'flux', 'ESI(%)']].head(18))


Total exoplanets analyzed: 572
Potentially habitable exoplanets found (after ESI): 17
Top habitable planets by ESI:
                 name         mass     radius  temperature   flux  ESI(%)
105            K2-3 d     2.200000   1.458000        305.2  1.440   45.63
143      TRAPPIST-1 f     0.680000   1.045000        219.0  0.382   34.82
144      TRAPPIST-1 g     1.340000   1.127000        198.6  0.258   26.29
2         Kepler-22 b     9.100000   2.100000        279.0  1.013   12.60
396        LP 890-9 c    25.300000   1.367000        272.0  0.906   12.47
148        LHS 1140 b     5.600000   1.730000        226.0  0.430   11.39
150        LHS 1140 b     6.650000   1.430000        230.0  0.460   10.98
43              PH2 b    87.084984   9.337081        295.1  1.200   10.95
149        LHS 1140 b     6.980000   1.727000        235.0  0.503   10.50
492        TOI-2134 c    41.890000   7.270000        306.0  1.400    9.88
109           K2-18 b     8.630000   2.610000        254.9  1.005    9

In [8]:
##graphics
import matplotlib.pyplot as plt
import seaborn as sns
import os
os.makedirs("graphics", exist_ok=True)

for feature in features:
    plt.figure()
    sns.histplot(data=g, x=feature, hue='potentially_habitable', kde=True, palette='Set2')
    plt.title(f"{feature.capitalize()} Distribution by Habitability")
    if feature == 'flux' or feature == 'mass':
        plt.xscale('log')
        
    plt.xlabel(feature.capitalize())

    plt.tight_layout()
    plt.savefig(f"graphics/{feature}_distribution_hist.png", dpi=300)
    plt.clf()

# Scatter Plot: Radius vs Temperature
sns.scatterplot(data=g, x='radius', y='temperature', hue='potentially_habitable', palette='coolwarm')
plt.title("Habitability Zone: Radius vs Temperature")
plt.xlabel("Radius (Earth radii)")
plt.ylabel("Equilibrium Temperature (K)")
plt.tight_layout()
plt.savefig("graphics/radius_vs_temperature.png", dpi=300)
plt.clf()

# Feature Importance Bar Plot
importances = model.feature_importances_
plt.barh(features, importances, color='teal')
plt.xlabel("Importance")
plt.title("Feature Importance (Random Forest)")
plt.tight_layout()
plt.savefig("graphics/feature_importance.png", dpi=300)
plt.clf()

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>