In [11]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import ADASYN
from imblearn.pipeline import Pipeline
import joblib
from sklearn.preprocessing import StandardScaler


In [12]:
X_train = pd.read_csv("/home/kubuntu/Desktop/my_pc/Exoplanet_Prediction/data/X_train.csv")
X_test =  pd.read_csv("/home/kubuntu/Desktop/my_pc/Exoplanet_Prediction/data/X_test.csv")
Y_train = pd.read_csv("/home/kubuntu/Desktop/my_pc/Exoplanet_Prediction/data/Y_train.csv")
Y_test = pd.read_csv("/home/kubuntu/Desktop/my_pc/Exoplanet_Prediction/data/Y_test.csv")

In [13]:

pipeline = Pipeline([
    ('adasyn', ADASYN(random_state=42)),  
    ('xgb', XGBClassifier(colsample_bytree = 0.8, gamma = 0.2, learning_rate = 0.2, max_depth = 5, n_estimators = 200,
                           scale_pos_weight = 1, subsample = 0.8))  
])




In [14]:
pipeline.fit(X_train,Y_train)

In [15]:
Y_pred = pipeline.predict(X_test)

In [16]:
# Generate and display the classification report
report = classification_report(Y_test, Y_pred)
print("Classification Report:\n", report)


Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.88      0.91      1210
           1       0.81      0.92      0.86       686

    accuracy                           0.89      1896
   macro avg       0.88      0.90      0.89      1896
weighted avg       0.90      0.89      0.89      1896



In [17]:
# Save the best model to a file using joblib
joblib.dump(pipeline,'xgb.joblib')


['xgb.joblib']

In [18]:
model = joblib.load('/home/kubuntu/Desktop/my_pc/Exoplanet_Prediction/xgb.joblib')

In [19]:
model.predict(X_test)

array([0, 1, 1, ..., 0, 1, 1])

In [20]:
data = pd.read_csv("/home/kubuntu/Desktop/my_pc/Exoplanet_Prediction/data/data_final.csv")

data.head()

Unnamed: 0,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_teq,koi_steff,koi_slogg,koi_srad
0,9.488036,170.53875,0.146,2.9575,616.0,793.0,5455.0,4.467,0.927
1,54.418383,162.51384,0.586,4.507,875.0,443.0,5455.0,4.467,0.927
2,2.525592,171.59555,0.701,1.6545,603.0,1406.0,6031.0,4.438,1.046
3,11.094321,171.20116,0.538,4.5945,1520.0,835.0,6046.0,4.486,0.972
4,4.134435,172.97937,0.762,3.1402,686.0,1160.0,6046.0,4.486,0.972


In [21]:
sc = StandardScaler()



In [22]:
scaled = sc.fit_transform(data)

In [23]:
scaled

array([[-0.32487153,  0.31658383, -0.87203728, ..., -0.03695585,
         0.22836875, -0.21666153],
       [ 0.47398865,  0.12868232,  0.43039676, ..., -0.03695585,
         0.22836875, -0.21666153],
       [-0.44866357,  0.34132858,  0.77080566, ...,  0.82202808,
         0.10323827, -0.02157835],
       ...,
       [ 6.3490091 ,  3.69841007, -1.12956402, ...,  0.1479643 ,
         0.71594613, -0.42813825],
       [-0.42465436, -0.51911713, -1.23020665, ...,  0.34779737,
         0.54766721, -0.27239958],
       [-0.06189967, -0.05864975,  0.8181669 , ..., -1.53570385,
         0.72889066, -0.5773195 ]])

In [24]:
final_df = pd.DataFrame(scaled,columns=data.columns)

In [25]:
final_df

Unnamed: 0,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_teq,koi_steff,koi_slogg,koi_srad
0,-0.324872,0.316584,-0.872037,-0.471474,-0.174657,-0.102966,-0.036956,0.228369,-0.216662
1,0.473989,0.128682,0.430397,0.100166,-0.070688,-1.029343,-0.036956,0.228369,-0.216662
2,-0.448664,0.341329,0.770806,-0.952176,-0.179875,1.519517,0.822028,0.103238,-0.021578
3,-0.296312,0.332094,0.288313,0.132447,0.188229,0.008199,0.844397,0.310351,-0.142891
4,-0.420058,0.373730,0.951370,-0.404073,-0.146557,0.868407,0.844397,0.310351,-0.142891
...,...,...,...,...,...,...,...,...,...
2738,-0.413815,-0.505440,0.004146,-1.261478,0.489296,-1.235793,-3.346132,2.946721,-1.419948
2739,-0.348613,-0.534503,0.060387,-0.918420,6.201543,-1.394601,-3.210424,3.015758,-1.426505
2740,6.349009,3.698410,-1.129564,2.115202,-0.345663,-1.619578,0.147964,0.715946,-0.428138
2741,-0.424654,-0.519117,-1.230207,-0.708874,-0.398410,0.659310,0.347797,0.547667,-0.272400


In [26]:
data

Unnamed: 0,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_teq,koi_steff,koi_slogg,koi_srad
0,9.488036,170.53875,0.146,2.9575,616.0,793.0,5455.0,4.467,0.927
1,54.418383,162.51384,0.586,4.5070,875.0,443.0,5455.0,4.467,0.927
2,2.525592,171.59555,0.701,1.6545,603.0,1406.0,6031.0,4.438,1.046
3,11.094321,171.20116,0.538,4.5945,1520.0,835.0,6046.0,4.486,0.972
4,4.134435,172.97937,0.762,3.1402,686.0,1160.0,6046.0,4.486,0.972
...,...,...,...,...,...,...,...,...,...
2738,4.485592,135.43172,0.442,0.8161,2270.0,365.0,3236.0,5.097,0.193
2739,8.152759,134.19046,0.461,1.7460,16500.0,305.0,3327.0,5.113,0.189
2740,384.847556,314.97000,0.059,9.9690,190.0,220.0,5579.0,4.580,0.798
2741,3.875943,134.84758,0.025,2.3140,58.6,1081.0,5713.0,4.541,0.893
