In [5]:
!pip install xgboost



Collecting xgboost
  Downloading xgboost-3.0.5-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.5-py3-none-win_amd64.whl (56.8 MB)
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.3/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.3/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.5/56.8 MB 581.6 kB/s eta 0:01:37
    --------------------------------------- 0.8/56.8 MB 721.9 kB/s eta 0:01:18
   - -------------------------------------- 1.6/56.8 MB 1.4 MB/s eta 0:00:41
   - -------------------------------------- 2.1/56.8 MB 1.6 MB/s eta 0:00:36
   - -------------------------------------- 2.1/56.8 MB 1.6 MB/s eta 0:00:36
   - -------------------------------------- 2.1/56.8 MB 1.6 MB/s eta 0:00:36
   - -------------------------------------- 2.4/56.8 MB 1.2 MB/s eta 0:00:47
   - -------------------------

ValueError: Mismatched version between the Python package and the native shared object.  Python package version: 2.1.1. Shared object version: 3.0.5. Shared object is loaded from: C:\Users\sohie\anaconda3\Lib\site-packages\xgboost\lib\xgboost.dll.
Likely cause:
  * XGBoost is first installed with anaconda then upgraded with pip. To fix it please remove one of the installations.

In [5]:
! "C:\Users\sohie\anaconda3\python.exe" -m pip install xgboost




In [7]:
# ============================================
# Exoplanet Classification using XGBoost
# ============================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import joblib
from xgboost import XGBClassifier
import warnings

# Suppress warnings (optional)
warnings.filterwarnings("ignore")

# ===============================
# 1. Load Dataset
# ===============================
df = pd.read_csv(
    r"C:\Users\sohie\OneDrive\Desktop\NASA Space Apps 2025\New folder\SET 4.csv"
)

# ===============================
# 2. Define Features and Labels
# ===============================
features = [
    "ra", "dec", "pl_orbper", "pl_trandurh", "pl_trandep",
    "pl_rade", "pl_insol", "pl_eqt", "st_teff", "st_rad"
]
label_col = "tfopwg_disp"

# Keep only necessary columns
df = df[features + [label_col]]

# Drop rows with missing labels
df = df.dropna(subset=[label_col])

# Map labels
label_map = {"CANDIDATE": 1, "FALSE POSITIVE": 0}
y = df[label_col].map(label_map)
X = df[features]

# ===============================
# 3. Handle Missing Values
# ===============================
# Fill missing values with medians
X_medians = X.median()
X = X.fillna(X_medians)

# ===============================
# 4. Train/Test Split
# ===============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ===============================
# 5. Scale Features
# ===============================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ===============================
# 6. Train XGBoost Model
# ===============================
clf = XGBClassifier(
    objective="binary:logistic",
    eval_metric="logloss",
    use_label_encoder=False,
    random_state=42,
    scale_pos_weight=(y == 0).sum() / (y == 1).sum(),  # handle imbalance
    n_estimators=300,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8
)

print("\n🚀 Training XGBoost model...")
clf.fit(X_train_scaled, y_train)

# ===============================
# 7. Evaluate Model
# ===============================
y_pred = clf.predict(X_test_scaled)

print("\n✅ Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\n✅ Classification Report:")
print(
    classification_report(
        y_test, y_pred, target_names=["FALSE POSITIVE", "CANDIDATE"]
    )
)

# ===============================
# 8. Save Model + Preprocessing
# ===============================
joblib.dump(clf, "exoplanet_xgb_model.joblib")
joblib.dump(scaler, "exoplanet_xgb_scaler.joblib")
joblib.dump(features, "exoplanet_xgb_features.joblib")
joblib.dump(X_medians, "exoplanet_xgb_medians.joblib")

print("\n💾 Model, scaler, feature list, and medians saved successfully.")



ValueError: Mismatched version between the Python package and the native shared object.  Python package version: 2.1.1. Shared object version: 3.0.5. Shared object is loaded from: C:\Users\sohie\anaconda3\Lib\site-packages\xgboost\lib\xgboost.dll.
Likely cause:
  * XGBoost is first installed with anaconda then upgraded with pip. To fix it please remove one of the installations.