In [7]:
import pandas as pd
import numpy as np

# Load your original exoplanets CSV
df = pd.read_csv('../output/exoplanets.csv')

# Clean columns and convert to numeric as needed
for col in ['pl_rade', 'pl_eqt', 'pl_insol']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Define habitability score function
def habitability_score(row):
    try:
        # Normalized differences with clamping
        radius_score = 0.3 * (1 - min(1, abs(row['pl_rade'] - 1) / 10))
        temp_score   = 0.4 * (1 - min(1, abs(row['pl_eqt'] - 288) / 88))
        insol_score  = 0.3 * (1 - min(1, abs(row['pl_insol'] - 1) / 2))

        score = (radius_score + temp_score + insol_score)
        return round(score, 3)
    except:
        return np.nan

# Apply habitability score
df['hab_score'] = df.apply(habitability_score, axis=1)

# Define potentially habitable flag (e.g., hab_score >= 0.7)
df['potentially_habitable'] = df['hab_score'].apply(
    lambda x: 1 if x >= 0.7 else 0
)

# Aggregate per star system
agg_df = df.groupby('hostname').agg(
    sy_dist = ('sy_dist', 'min'),             # Closest distance to Earth in parsecs
    total_planets = ('pl_name', 'count'),     # Number of planets in system
    st_teff = ('st_teff', 'min'),
    st_rad = ('st_rad', 'min'),
    st_mass = ('st_mass', 'min'),
    potentially_habitable = ('potentially_habitable', 'max')  # 1 if any planet is habitable
).reset_index()

# Save to CSV for JS visualization
agg_df.to_csv('stars.csv', index=False)

print("Star systems summary CSV created!")
print(agg_df.head())

Star systems summary CSV created!
         hostname    sy_dist  total_planets  st_teff  st_rad  st_mass  \
0          AU Mic    9.72210              2   3678.0   0.744    0.510   
1    BD-14 3065 A  589.42300              1   6935.0   2.350    1.410   
2  Barnard's star    1.82655              1   3195.0   0.185    0.162   
3          CD Cet    8.60715              1   3130.0   0.175    0.161   
4        CoRoT-32  576.68300              1   5970.0   0.790    1.080   

   potentially_habitable  
0                      0  
1                      0  
2                      0  
3                      0  
4                      0  
