In [1]:
import numpy as np
import pandas as pd
import json
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [2]:
def model_prediction(X, y, model_name):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 2025, stratify = y)
    if model_name == "catb":
        model = CatBoostClassifier(random_state=2025, verbose=False)
        model.fit(X_train, y_train)
        y_pred_prob = model.predict_proba(X_test)[:, 1]
        y_pred = model.predict(X_test)
        auc_score = roc_auc_score(y_test, y_pred_prob)
        print(f"{model_name} Test AUC: {auc_score}")
        predictions_df = pd.DataFrame({
            'Actual': y,
            'Predicted_Probability': model.predict_proba(X)[:, 1],
            'Predicted_Class': model.predict(X)
        })
    elif model_name == "ligb":
        model = LGBMClassifier(random_state=2025)
        model.fit(X_train, y_train)
        y_pred_prob = model.predict_proba(X_test)[:, 1]
        y_pred = model.predict(X_test)
        auc_score = roc_auc_score(y_test, y_pred_prob)
        print(f"{model_name} Test AUC: {auc_score}")
        predictions_df = pd.DataFrame({
            'Actual': y,
            'Predicted_Probability': model.predict_proba(X)[:, 1],
            'Predicted_Class': model.predict(X)
        })
    elif model_name == "xgb":
        model = XGBClassifier(random_state=2025, use_label_encoder=False, eval_metric='logloss')
        model.fit(X_train, y_train)
        y_pred_prob = model.predict_proba(X_test)[:, 1]
        y_pred = model.predict(X_test)
        auc_score = roc_auc_score(y_test, y_pred_prob)
        print(f"{model_name} Test AUC: {auc_score}")
        predictions_df = pd.DataFrame({
            'Actual': y,
            'Predicted_Probability': model.predict_proba(X)[:, 1],
            'Predicted_Class': model.predict(X)
        })
    elif model_name == "gbdt":
        model = GradientBoostingClassifier(random_state=2025)
        model.fit(X_train, y_train)
        y_pred_prob = model.predict_proba(X_test)[:, 1]
        y_pred = model.predict(X_test)
        auc_score = roc_auc_score(y_test, y_pred_prob)
        print(f"{model_name} Test AUC: {auc_score}")
        predictions_df = pd.DataFrame({
            'Actual': y,
            'Predicted_Probability': model.predict_proba(X)[:, 1],
            'Predicted_Class': model.predict(X)
        })
    elif model_name == "rf":
        model = RandomForestClassifier(random_state=2025)
        model.fit(X_train, y_train)
        y_pred_prob = model.predict_proba(X_test)[:, 1]
        y_pred = model.predict(X_test)
        auc_score = roc_auc_score(y_test, y_pred_prob)
        print(f"{model_name} Test AUC: {auc_score}")
        predictions_df = pd.DataFrame({
            'Actual': y,
            'Predicted_Probability': model.predict_proba(X)[:, 1],
            'Predicted_Class': model.predict(X)
        })
    else:
        raise ValueError(f"Unrecognized model name: {model_name}. Please check your input.")
    return predictions_df

In [3]:
def koml_prediction(X, y, models, colpath="", outpath=""):

    y=y.to_numpy().ravel()
    
    for model_name in models:
        
        file_select = f'{colpath}/{model_name}_select.csv'
        select_info = pd.read_csv(file_select)
        selected_features = select_info[select_info['select']]['Feature'].tolist()
        X_new = X[selected_features]
        prediction = model_prediction(X_new, y, model_name=model_name)
        
        file_path = f"{outpath}/{model_name}_prediction.csv"
        prediction.to_csv(file_path, index=False)
        print(f"{model_name}: Write Prediction, Done!")
    

In [None]:
X = pd.read_csv('/Data/X.csv')
y = pd.read_csv('/Data/y.csv')
koml_prediction(X, y, models=['catb', 'ligb', 'xgb', 'gbdt', 'rf'], 
                colpath="",
                outpath="")