In [4]:
import numpy as np
import pandas as pd
import lightgbm as lgb

In [3]:
def get_prob_matrix(novae_oof, nonnovae_oof, novae_obj, nonnovae_obj):
    novae_oof_df = pd.DataFrame(novae_oof, columns=[
        "prob15",
        "prob42",
        "prob52",
        "prob62",
        "prob67",
        "prob90"])
    novae_oof_df["prob6"] = 0.0
    novae_oof_df["prob16"] = 0.0
    novae_oof_df["prob53"] = 0.0
    novae_oof_df["prob64"] = 0.0
    novae_oof_df["prob65"] = 0.0
    novae_oof_df["prob88"] = 0.0
    novae_oof_df["prob92"] = 0.0
    novae_oof_df["prob95"] = 0.0
    novae_oof_df["object_id"] = novae_obj
    
    nonnovae_oof_df = pd.DataFrame(nonnovae_oof, columns=[
        "prob6",
        "prob16",
        "prob53",
        "prob64",
        "prob65",
        "prob88",
        "prob92",
        "prob95"
    ])
    nonnovae_oof_df["prob15"] = 0.0
    nonnovae_oof_df["prob42"] = 0.0
    nonnovae_oof_df["prob52"] = 0.0
    nonnovae_oof_df["prob62"] = 0.0
    nonnovae_oof_df["prob67"] = 0.0
    nonnovae_oof_df["prob90"] = 0.0
    nonnovae_oof_df["object_id"] = nonnovae_obj
    
    columns = ["object_id", "prob6", "prob15", "prob16", "prob42", 
               "prob52", "prob53", "prob62", "prob64", "prob65", 
               "prob67", "prob88", "prob90", "prob92", "prob95"]
    # 順番整える
    novae_oof_df = novae_oof_df[columns]
    nonnovae_oof_df = nonnovae_oof_df[columns]
    prob_matrix = pd.concat([novae_oof_df, nonnovae_oof_df])
    return prob_matrix

In [None]:
class StackingClassifier:
    def __init__(self, binary_params, novae_params, nonnovae_params, final_params):
        self.binary_model = None
        self.novae_model = None
        self.nonnovae_model = None
        self.final_stage = None
        
        self.binary_params = binary_params
        self.novae_params = novae_params
        self.nonnovae_params = nonnovae_params
        self.final_params = final_params
        
        self.binary_columns = None
        self.novae_columns = None
        self.nonnovae_columns = None
        self.final_columns = None
        
        self.
        
    def binary_fit(self, X, bin_y, bin_eval_set, eval_metric):
        self.binary_model = lgb.LGBMClassifier(**self.binary_params)
        self.binary_model.fit(
            X,
            bin_y,
            eval_set=bin_eval_set,
            eval_metric=eval_metric,
            verbose=100,
            early_stopping_rounds=100)
        
    def binary_predict(self, val_X):
        best_iteration = self.binary_model.best_iteration_
        return self.binary_model.predict(val_X, num_iteration=best_iteration)
    
    def novae_fit(self, X, y, eval_set, eval_metric, weights):
        self.novae_model = lgb.LGBMClassifier(**self.novae_params)
        self.novae_model.fit(
            X,
            y,
            eval_set=eval_set,
            eval_metric=eval_metric,
            verbose=100,
            early_stopping_rounds=100,
            sample_weights=y.map(weights))
        
    def novae_predict(self, val_X):
        best_iteration = self.novae_model.best_iteration_
        return self.novae_model.predict_proba(val_X, num_iteration=best_iteration)
        
    def nonnovae_fit(self, X, y, eval_set, eval_metric, weights):
        self.nonnovae_model = lgb.LGBMClassifier(**self.nonnovae_params)
        self.nonnovae_model.fit(
            X,
            y,
            eval_set=eval_set,
            eval_metric=eval_metric,
            verbose=100,
            early_stopping_rounds=100,
            sample_weights=y.map(weights))
        
    def nonnovae_predict(self, val_X):
        best_iteration = self.nonnovae_model.best_iteration_
        return self.nonnovae_model.predict_proba(val_X, num_iteration=best_iteration)
    
    def final_fit(self, X, y, objid, novae_oof_preds, nonnovae_oof_preds, 
                  novae_obj, nonnovae_obj, val_X, val_y, val_objid,
                  val_novae_oof, val_nonnovae_oof, val_novae_obj, val_nonnovae_obj,
                  eval_metric, weights):
        self.final_model = lgb.LGBMClassifier(**self.final_params)
        prob_matrix = get_prob_matrix(
            novae_oof_preds,
            nonnovae_oof_preds,
            novae_obj,
            nonnovae_obj)
        val_prob_matrix = get_prob_matrix(
            val_novae_oof,
            val_nonnovae_oof,
            val_novae_obj,
            val_nonnovae_obj)
        prob_matrix = pd.merge(objid, prob_matrix, how="left", on="object_id")
        X_full = pd.concat([X, prob_matrix], axis=1)
        val_prob_matrix = pd.merge(val_objid, val_prob_matrix, how="left", on="object_id")
        val_X_full = pd.merge([val_X, val_prob_matrix], axis=1)
        self.final_model.fit(
            X_full,
            y,
            eval_set=[(X_full, y), (val_X_full, val_y)],
            eval_metric=eval_metric,
            verbose=100,
            early_stopping_rounds=100,
            sample_weights=y.map(weights)
        )
        
    def final_predict(self, val_X):
        best_iteration = self.final_model.best_iteration_
        return self.final_model.predict_proba(val_X, num_iteration=best_itearaion)
    
    def predict_test(self, X, objid):
        binary_prediction = self.binary_predict(X)
        novae_X = X.loc[np.where(binary_prediction == 1.0)[0], :]
        nonnovae_X = X.loc[np.where(binary_prediction == 0.0)[0], :]
        
        novae_preds = self.novae_predict(novae_X)
        nonnovae_preds = self.nonnovae_predict(nonnovae_X)
        
        novae_obj = objid[np.where(binary_prediction == 1.0)[0]].values
        nonnovae_obj = objid[np.where(binary_prediction == 0.0)[0]].values
        
        prob_matrix = get_prob_matrix(novae_preds, nonnovae_preds, novae_obj, nonnovae_obj)
        prob_matrix = pd.merge(objid, prob_matrix, how="left", on="object_id")
        X_full = pd.concat([val_X, prob_matrix], axis=1)
        