In [1]:
import pandas as pd

df = pd.read_csv("exoplanets_clean_full.csv")

FEATURES = [
    "pl_rade",
    "pl_bmasse",
    "pl_eqt",
    "pl_orbper",
    "st_teff",
    "st_rad"
]

df = df[FEATURES].dropna()

df.to_csv("exoplanets_clean_full.csv", index=False)

print("✅ Clean dataset saved")


✅ Clean dataset saved


In [4]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

df = pd.read_csv("exoplanets_clean_full.csv")

FEATURES = df.columns.tolist()
X = df[FEATURES]

# Dummy habitability score (replace later if you have labels)
y = (
    (df["pl_eqt"].between(200, 350)) &
    (df["pl_rade"].between(0.8, 1.5))
).astype(int)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_scaled, y)

joblib.dump(model, "model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(FEATURES, "features.pkl")

print("✅ Model trained & saved")


✅ Model trained & saved


In [5]:
import sqlite3
import pandas as pd

df = pd.read_csv("exoplanets_clean_full.csv")

conn = sqlite3.connect("instance/exoplanets.db")

df["name"] = ["Planet_" + str(i) for i in range(len(df))]
df["habitability_score"] = None
df["rank"] = None

df.to_sql("exoplanet", conn, if_exists="append", index=False)

conn.close()
print("✅ Data loaded into database")


✅ Data loaded into database


In [6]:
import numpy as np
import joblib
from flask import Flask
from flask_sqlalchemy import SQLAlchemy

app = Flask(__name__)
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///database/exoplanets.db"
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False

db = SQLAlchemy(app)

model = joblib.load("model.pkl")
scaler = joblib.load("scaler.pkl")
FEATURES = joblib.load("features.pkl")

class Exoplanet(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String)
    pl_rade = db.Column(db.Float)
    pl_bmasse = db.Column(db.Float)
    pl_eqt = db.Column(db.Float)
    pl_orbper = db.Column(db.Float)
    st_teff = db.Column(db.Float)
    st_rad = db.Column(db.Float)
    habitability_score = db.Column(db.Float)
    rank = db.Column(db.Integer)

def bulk_predict_and_rank():
    planets = Exoplanet.query.all()
    print(f"Total planets: {len(planets)}")

    for p in planets:
        vals = [getattr(p, f) for f in FEATURES]
        X = scaler.transform([vals])
        p.habitability_score = float(model.predict(X)[0])

    db.session.commit()

    ranked = (
        Exoplanet.query
        .order_by(Exoplanet.habitability_score.desc())
        .all()
    )

    for i, p in enumerate(ranked, 1):
        p.rank = i

    db.session.commit()
    print("✅ Bulk prediction + ranking completed")

if __name__ == "__main__":
    with app.app_context():
        bulk_predict_and_rank()


OperationalError: (sqlite3.OperationalError) unable to open database file
(Background on this error at: https://sqlalche.me/e/20/e3q8)