In [6]:
import os, glob, joblib
import pandas as pd
import numpy as np

# --- Directories ---
MODEL_DIR = "../models"
PRED_DIR = "../predictions"
DATA_DIR = "../preprocessed_tabular_data"

os.makedirs(PRED_DIR, exist_ok=True)

# --- Load the best model (assume first in ranking) ---
model_files = sorted(glob.glob(os.path.join(MODEL_DIR, "*.pkl")))
best_model_file = model_files[0]   # top-ranked model saved as 01_ModelName.pkl
print(f"🏆 Loading best model: {best_model_file}")
best_model = joblib.load(best_model_file)


🏆 Loading best model: ../models\01_RandomForest.pkl


In [7]:
# --- Load test/unseen data ---
test = pd.read_csv(os.path.join(DATA_DIR, "test_prepared.csv"))

X_test = test.drop(columns=["label", "tic_id", "obj_id", "object_name", "star_name"], errors="ignore")
X_test = X_test.select_dtypes(include=[np.number])

print("Test data shape:", X_test.shape)

# --- Run inference ---
y_pred = best_model.predict(X_test)
y_prob = best_model.predict_proba(X_test)[:,1]

# Attach predictions to test set
results = test.copy()
results["prediction"] = y_pred
results["probability"] = y_prob

# Save predictions
pred_path = os.path.join(PRED_DIR, "final_predictions.csv")
results.to_csv(pred_path, index=False)
print(f"✅ Predictions saved: {pred_path}")


Test data shape: (3112, 6)
✅ Predictions saved: ../predictions\final_predictions.csv


In [8]:
# --- Inference Helper Function ---
def predict_single(sample_dict):
    """
    Run inference on a single sample passed as dict of feature: value.
    Example:
        sample = {"feature1": 0.5, "feature2": 1.2, "feature3": 3}
        predict_single(sample)
    """
    sample_df = pd.DataFrame([sample])
    sample_df = sample_df.reindex(columns=X_test.columns, fill_value=0)
    
    pred = best_model.predict(sample_df)[0]
    prob = best_model.predict_proba(sample_df)[0,1]
    return {"prediction": int(pred), "probability": float(prob)}

# --- Example single prediction ---
sample = X_test.iloc[0].to_dict()
print("🔮 Sample prediction:", predict_single(sample))

🔮 Sample prediction: {'prediction': 1, 'probability': 0.5263058825579292}


In [9]:
import joblib
import os

# Assuming 'best_model' is your loaded and trained model object
# and 'MODEL_DIR' is defined as "../models"

# Define the full path for the new model file
output_model_path = os.path.join(MODEL_DIR, "best_final_model.pkl")

# Save the model to the file
joblib.dump(best_model, output_model_path)

print(f"\n💾 Model saved successfully to: {output_model_path}")


💾 Model saved successfully to: ../models\best_final_model.pkl
