In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

np.random.seed(0)
import datetime as dt
from datetime import datetime

from xgboost import XGBClassifier
import catboost as ctb

import eli5
import xgbfir

from sklearn.metrics import fbeta_score, precision_score, recall_score, make_scorer
from sklearn.model_selection import StratifiedKFold, cross_val_score

from itertools  import combinations

import seaborn as sns
sns.set(rc={'figure.figsize':(15, 5)})
import matplotlib.pyplot as plt


df_train = pd.read_hdf("../input/train_churn_pred.h5") 
df_test = pd.read_hdf("../input/test_churn_pred.h5") 
print(df_train.shape, df_test.shape)


ctrain = df_train.copy()  # pomocnicze, juz nie usuwalem, bo dalej zostaly uzyte
ctest = df_test.copy()



# tworze zmienne numeryczne z dat, jak ilosc dni od 2014-01-01

obj_feats = df_train.select_dtypes("object").columns

for oname in obj_feats:
    fname1 = f'{oname}_dt'
    fname2 = f'{oname}_daynr'
    df_train[fname1] = pd.to_datetime(df_train[oname], dayfirst=False, errors='ignore')
    df_train[fname2] = df_train[fname1].map(lambda x: (x - datetime.strptime('2014-01-01', "%Y-%m-%d")).days )
    df_train[fname2] = df_train[fname2].replace([-16071], 0)
    df_test[fname1] = pd.to_datetime(df_test[oname], dayfirst=False, errors='ignore')
    df_test[fname2] = df_test[fname1].map(lambda x: (x - datetime.strptime('2014-01-01', "%Y-%m-%d")).days )
    df_test[fname2] = df_test[fname2].replace([-16071], 0)

drop_feats = df_train.select_dtypes(include=["object", 'datetime']).columns

df_train.drop(drop_feats, inplace=True, axis=1)
df_test.drop(drop_feats, inplace=True, axis=1)
print("Kolumny drop_feats usuniete")



# dodatkowe kolumny pokazujace kierunek zmian pomiedzy 6. i 8. - usuniete, bo wchloniete (chyba) przez funkcje fe()

# def indicator1(row):
#         result = -2
#         if np.isnan(row[feats[0]]) or np.isnan(row[feats[2]]):
#             result = -1
#         elif row[feats[0]] > row[feats[2]]:
#             result = 1
#         else:
#             result = 0
#         return result

# metrics = {"_".join( x.split("_")[:-1] ) for x in df_train.columns if ("_6" in x) & ("date" not in x)}

# for metric in metrics:
#     feats = ["{}_{}".format(metric, x) for x in [6, 7, 8]]
#     print(feats)
#     df_train[f'{metric}_ind'] = df_train.apply(indicator1, axis=1)
#     df_test[f'{metric}_ind'] = df_test.apply(indicator1, axis=1)

# print('Koniec')

# ind_list = [x for x in df_train.columns if ("_ind" in x)]



# Usuwanie danych odstajacych"

black_list = ["id", "churn_probability"]
feats = [x for x in df_train.select_dtypes("number").columns if x not in black_list]

def cap_outliers(array, k=3):
    upper_limit = array.mean() + k*array.std()
    lower_limit = array.mean() - k*array.std()
    array[array<lower_limit] = lower_limit
    array[array>upper_limit] = upper_limit
    return array

%%time
df_train_ot = df_train[feats].apply(cap_outliers, axis=0)
df_test_ot = df_test[feats].apply(cap_outliers, axis=0)

# plt.figure(figsize=(15,8))
# plt.xticks(rotation=45)
# sns.boxplot(data = df_train_ot[['total_ic_mou_8', 'loc_ic_mou_8', 'loc_ic_t2m_mou_8', 'total_og_mou_8', 'count_rech_2g_8']])

ytrain = ctrain[['id', "churn_probability"]]
df_train= pd.concat([df_train_ot, ytrain], axis=1)


def prepare_submit(ctrain, ctest, df_train, df_test, model, feats, out_filename, treshold=0.2):
    X_train = df_train[feats].fillna(-1).values
    y_train = ctrain["churn_probability"].values
    X_test = df_test[feats].fillna(-1).values
    
    model.fit(X_train, y_train)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    y_pred = (y_pred_proba > treshold).astype("int")
    
    ctest["churn_probability"] = y_pred

    out_path = "../output/{}.csv".format(out_filename)
    ctest[ ["id", "churn_probability"] ].to_csv(out_path, index=False)
    
#     fbeta_15 = make_scorer(fbeta_score, beta=1.5)
#     print(f'CTB: {cross_val_score(model, X_train, y_train, scoring=fbeta_15, cv=3, n_jobs=-1).mean()}')

def fe(df):
    metrics = {"_".join( x.split("_")[:-1] ) for x in df_train.columns if "mou_6" in x}

    for metric in metrics:
        metric_months = ["{}_{}".format(metric, x) for x in [8, 7, 6]]

        mean_by_month = df[metric_months].mean(axis=0).to_dict()
        df[f"mean_{metric}"] = df[metric_months].mean(axis=1)
        df[f"sum_{metric}"] = df[metric_months].sum(axis=1)
        df[f"min_{metric}"] = df[metric_months].min(axis=1)
        df[f"max_{metric}"] = df[metric_months].max(axis=1)

        for pair in combinations(metric_months, 2):
            df[f"{pair[0]} - {pair[1]}"] = df[pair[0]] - df[pair[1]] 
            df[f"{pair[0]} / {pair[1]}"] = df[pair[0]] / df[pair[1]] 
            

            df[f"mean_{pair[0]} - {pair[0]}"] = mean_by_month[pair[0]] - df[pair[0]]
            df[f"mean_{pair[1]} - {pair[0]}"] = mean_by_month[pair[1]] - df[pair[1]]
            
            
            df[f"mean_{pair[0]} / {pair[0]}"] = mean_by_month[pair[0]] / df[pair[0]]
            df[f"mean_{pair[1]} / {pair[0]}"] = mean_by_month[pair[1]] / df[pair[1]]
            
        df.replace([np.inf, -np.inf], 3, inplace=True)

    return df

feats_386 = ['mean_loc_og_t2c_mou_8 - loc_og_t2c_mou_8', 'mean_std_og_mou_8 - std_og_mou_8', 'mean_total_ic_mou_8 / total_ic_mou_8', 'roam_og_mou_8 / roam_og_mou_7', 'std_og_t2f_mou_6', 'mean_std_og_t2m_mou', 'max_rech_amt_8', 'spl_og_mou_8 / spl_og_mou_7', 'min_roam_og_mou', 'std_ic_t2m_mou_8 - std_ic_t2m_mou_7', 'loc_og_t2c_mou_8 - loc_og_t2c_mou_6', 'arpu_2g_7', 'mean_isd_og_mou', 'max_rech_data_7', 'loc_ic_mou_7 / loc_ic_mou_6', 'min_loc_ic_mou', 'sum_spl_og_mou', 'loc_ic_t2f_mou_8 / loc_ic_t2f_mou_6', 'max_rech_amt_7', 'mean_total_og_mou_7 - total_og_mou_7', 'sum_std_og_t2t_mou', 'mean_std_ic_t2t_mou_8 / std_ic_t2t_mou_8', 'count_rech_3g_7', 'last_day_rch_amt_7', 'total_ic_mou_8 - total_ic_mou_7', 'mean_std_og_mou_7 - std_og_mou_8', 'total_og_mou_8 / total_og_mou_6', 'mean_loc_ic_mou_7 / loc_ic_mou_8', 'sum_loc_og_t2m_mou', 'total_ic_mou_8 / total_ic_mou_6', 'mean_loc_ic_mou_6 / loc_ic_mou_7', 'max_std_ic_t2m_mou', 'mean_loc_og_t2f_mou_8 / loc_og_t2f_mou_8', 'arpu_3g_6', 'sum_total_og_mou', 'offnet_mou_8 - offnet_mou_7', 'mean_loc_og_t2t_mou_7 / loc_og_t2t_mou_8', 'sum_loc_ic_mou', 'mean_onnet_mou_7 - onnet_mou_7', 'min_roam_ic_mou', 'onnet_mou_6', 'total_og_mou_7 / total_og_mou_6', 'arpu_2g_6', 'std_og_t2t_mou_7', 'std_ic_t2m_mou_8 - std_ic_t2m_mou_6', 'max_total_og_mou', 'mean_total_ic_mou_7 / total_ic_mou_8', 'std_og_t2m_mou_8 - std_og_t2m_mou_6', 'spl_ic_mou_8', 'mean_loc_ic_t2m_mou_6 - loc_ic_t2m_mou_8', 'std_og_mou_7 - std_og_mou_6', 'mean_loc_ic_t2f_mou_7 / loc_ic_t2f_mou_7', 'spl_ic_mou_7 / spl_ic_mou_6', 'onnet_mou_8 - onnet_mou_6', 'loc_og_t2f_mou_8 / loc_og_t2f_mou_7', 'onnet_mou_7', 'jun_vbc_3g', 'std_ic_t2t_mou_7', 'mean_loc_ic_t2m_mou', 'mean_onnet_mou_6 - onnet_mou_8', 'mean_roam_ic_mou_8 / roam_ic_mou_8', 'mean_std_ic_t2t_mou_6 / std_ic_t2t_mou_7', 'mean_std_og_mou_6 / std_og_mou_7', 'min_onnet_mou', 'fb_user_6', 'mean_total_ic_mou_7 / total_ic_mou_7', 'loc_og_t2c_mou_7 - loc_og_t2c_mou_6', 'mean_loc_og_mou_7 - loc_og_mou_7', 'spl_ic_mou_8 - spl_ic_mou_7', 'mean_loc_og_t2t_mou_7 / loc_og_t2t_mou_7', 'offnet_mou_7', 'mean_total_og_mou_6 - total_og_mou_8', 'std_og_t2m_mou_8 / std_og_t2m_mou_6', 'mean_offnet_mou_6 - offnet_mou_7', 'std_og_t2m_mou_7 - std_og_t2m_mou_6', 'loc_og_t2t_mou_7 - loc_og_t2t_mou_6', 'mean_total_ic_mou_6 / total_ic_mou_7', 'loc_og_t2t_mou_6', 'onnet_mou_8 - onnet_mou_7', 'mean_loc_ic_t2m_mou_6 / loc_ic_t2m_mou_8', 'random_cats', 'mean_roam_ic_mou_7 / roam_ic_mou_7', 'std_og_t2f_mou_7 - std_og_t2f_mou_6', 'mean_std_og_mou', 'max_isd_og_mou', 'max_offnet_mou', 'sum_offnet_mou', 'arpu_8', 'loc_ic_t2m_mou_6', 'mean_onnet_mou_8 - onnet_mou_8', 'max_std_og_mou', 'roam_og_mou_6', 'mean_loc_ic_mou_7 / loc_ic_mou_7', 'max_loc_ic_t2t_mou', 'max_spl_og_mou', 'max_spl_ic_mou', 'total_ic_mou_8 / total_ic_mou_7', 'roam_ic_mou_8 - roam_ic_mou_6', 'last_day_rch_amt_8', 'offnet_mou_8 / offnet_mou_6', 'loc_ic_t2t_mou_8 / loc_ic_t2t_mou_6', 'mean_total_og_mou_8 - total_og_mou_8', 'total_rech_data_8', 'mean_total_ic_mou_8 - total_ic_mou_8', 'onnet_mou_8 / onnet_mou_7', 'isd_og_mou_8', 'mean_loc_og_t2c_mou_7 - loc_og_t2c_mou_7', 'std_ic_t2t_mou_8 - std_ic_t2t_mou_6', 'mean_roam_ic_mou_8 - roam_ic_mou_8', 'vol_2g_mb_7', 'max_rech_amt_6', 'min_loc_og_t2t_mou', 'mean_roam_ic_mou_7 - roam_ic_mou_8', 'loc_og_t2t_mou_8 - loc_og_t2t_mou_7', 'spl_og_mou_7', 'spl_ic_mou_6', 'sachet_2g_6', 'mean_roam_og_mou_7 / roam_og_mou_7', 'mean_loc_ic_t2t_mou_8 - loc_ic_t2t_mou_8', 'roam_ic_mou_8', 'mean_roam_og_mou', 'std_ic_mou_8 - std_ic_mou_6', 'loc_og_t2f_mou_8 - loc_og_t2f_mou_6', 'jul_vbc_3g', 'sum_roam_og_mou', 'max_loc_ic_t2f_mou', 'loc_ic_mou_8 - loc_ic_mou_6', 'mean_loc_ic_t2m_mou_7 / loc_ic_t2m_mou_8', 'mean_loc_og_t2m_mou_7 - loc_og_t2m_mou_8', 'offnet_mou_7 - offnet_mou_6', 'mean_std_ic_t2t_mou_6 - std_ic_t2t_mou_8', 'sum_loc_og_mou', 'mean_loc_ic_t2t_mou', 'onnet_mou_7 / onnet_mou_6', 'min_isd_ic_mou', 'loc_ic_t2t_mou_8 - loc_ic_t2t_mou_7', 'sum_onnet_mou', 'spl_ic_mou_7 - spl_ic_mou_6', 'std_og_t2o_mou', 'random_normal', 'mean_roam_og_mou_8 - roam_og_mou_8', 'max_onnet_mou', 'fb_user_7', 'mean_loc_ic_mou_8 - loc_ic_mou_8', 'roam_ic_mou_7 - roam_ic_mou_6', 'mean_std_og_t2m_mou_6 / std_og_t2m_mou_8', 'mean_loc_ic_t2m_mou_6 - loc_ic_t2m_mou_7', 'std_og_mou_8 - std_og_mou_6', 'mean_std_og_t2t_mou_7 - std_og_t2t_mou_7', 'mean_std_ic_t2m_mou_7 / std_ic_t2m_mou_8', 'mean_loc_og_t2t_mou_8 - loc_og_t2t_mou_8', 'mean_loc_og_t2c_mou_6 - loc_og_t2c_mou_7', 'std_og_mou_6', 'mean_loc_og_t2m_mou_8 - loc_og_t2m_mou_8', 'std_ic_t2m_mou_7 / std_ic_t2m_mou_6', 'mean_roam_ic_mou', 'loc_ic_t2t_mou_8 - loc_ic_t2t_mou_6', 'loc_ic_t2m_mou_7 / loc_ic_t2m_mou_6', 'mean_std_ic_t2t_mou_7 - std_ic_t2t_mou_8', 'vol_3g_mb_8', 'og_others_6', 'mean_roam_og_mou_6 / roam_og_mou_8', 'mean_roam_og_mou_8 / roam_og_mou_8', 'roam_og_mou_8', 'loc_og_t2c_mou_7 / loc_og_t2c_mou_6', 'mean_loc_og_mou_7 - loc_og_mou_8', 'spl_ic_mou_8 / spl_ic_mou_6', 'loc_og_t2o_mou', 'spl_og_mou_8 - spl_og_mou_7', 'mean_std_ic_t2t_mou_7 - std_ic_t2t_mou_7', 'mean_std_og_t2t_mou_6 - std_og_t2t_mou_7', 'loc_og_t2m_mou_8 - loc_og_t2m_mou_6', 'spl_og_mou_7 - spl_og_mou_6', 'fb_user_8', 'mean_std_og_mou_7 / std_og_mou_8', 'mean_loc_og_t2t_mou_6 / loc_og_t2t_mou_7', 'roam_ic_mou_8 / roam_ic_mou_6', 'loc_og_t2f_mou_7 - loc_og_t2f_mou_6', 'mean_isd_og_mou_8 / isd_og_mou_8', 'min_std_og_t2f_mou', 'mean_loc_ic_mou_6 - loc_ic_mou_7', 'onnet_mou_8', 'loc_og_mou_8 / loc_og_mou_7', 'total_rech_amt_8', 'mean_std_ic_t2f_mou', 'mean_std_og_t2t_mou_8 / std_og_t2t_mou_8', 'mean_spl_ic_mou_8 - spl_ic_mou_8', 'loc_ic_mou_8 - loc_ic_mou_7', 'total_ic_mou_7 / total_ic_mou_6', 'total_og_mou_8 / total_og_mou_7', 'mean_isd_ic_mou_6 - isd_ic_mou_7', 'mean_roam_og_mou_7 / roam_og_mou_8', 'mean_total_ic_mou_6 / total_ic_mou_8', 'std_ic_t2f_mou_8 / std_ic_t2f_mou_7', 'vol_3g_mb_7', 'min_isd_og_mou', 'total_rech_amt_6', 'mean_loc_ic_mou_7 - loc_ic_mou_8', 'total_og_mou_8 - total_og_mou_7', 'mean_loc_ic_t2m_mou_6 / loc_ic_t2m_mou_7', 'min_loc_og_t2c_mou', 'mean_loc_ic_t2t_mou_7 - loc_ic_t2t_mou_7', 'arpu_3g_8', 'sum_isd_og_mou', 'roam_og_mou_7 / roam_og_mou_6', 'mean_std_ic_t2m_mou_8 - std_ic_t2m_mou_8', 'loc_ic_t2t_mou_6', 'loc_ic_mou_6', 'max_loc_og_t2t_mou', 'min_std_og_t2m_mou', 'max_rech_data_6', 'loc_ic_t2f_mou_8 - loc_ic_t2f_mou_6', 'loc_ic_t2m_mou_8 - loc_ic_t2m_mou_6', 'loc_ic_mou_8', 'mean_isd_og_mou_7 / isd_og_mou_8', 'mean_loc_ic_mou_6 / loc_ic_mou_8', 'loc_ic_t2t_mou_8 / loc_ic_t2t_mou_7', 'onnet_mou_7 - onnet_mou_6', 'std_og_mou_8 / std_og_mou_7', 'mean_std_og_t2m_mou_8 / std_og_t2m_mou_8', 'mean_onnet_mou_8 / onnet_mou_8', 'total_rech_num_8', 'ic_others_6', 'mean_loc_ic_t2m_mou_8 / loc_ic_t2m_mou_8', 'loc_ic_t2f_mou_6', 'mean_loc_ic_t2f_mou_8 - loc_ic_t2f_mou_8', 'max_std_ic_mou', 'arpu_6', 'sum_isd_ic_mou', 'mean_offnet_mou_8 / offnet_mou_8', 'max_roam_og_mou', 'mean_onnet_mou_7 / onnet_mou_8', 'loc_ic_t2m_mou_8', 'loc_ic_mou_8 / loc_ic_mou_6', 'std_ic_t2t_mou_6', 'sum_std_ic_mou', 'roam_og_mou_8 - roam_og_mou_7', 'loc_og_t2m_mou_8 / loc_og_t2m_mou_6', 'mean_roam_og_mou_7 - roam_og_mou_8', 'mean_onnet_mou_6 - onnet_mou_7', 'loc_ic_t2m_mou_8 / loc_ic_t2m_mou_6', 'sum_roam_ic_mou', 'max_std_ic_t2t_mou', 'max_loc_ic_t2m_mou', 'ic_others_7', 'max_rech_data_8', 'aug_vbc_3g', 'loc_og_t2f_mou_8 - loc_og_t2f_mou_7', 'mean_offnet_mou_6 / offnet_mou_8', 'mean_spl_og_mou', 'loc_og_t2c_mou_8 - loc_og_t2c_mou_7', 'total_ic_mou_6', 'mean_spl_ic_mou_7 / spl_ic_mou_7', 'mean_loc_ic_t2f_mou_8 / loc_ic_t2f_mou_8', 'loc_og_t2m_mou_8', 'std_ic_t2m_mou_7 - std_ic_t2m_mou_6', 'arpu_3g_7', 'total_og_mou_8', 'mean_std_ic_mou_7 - std_ic_mou_7', 'min_offnet_mou', 'loc_ic_t2t_mou_7 / loc_ic_t2t_mou_6', 'std_og_t2f_mou_8 - std_og_t2f_mou_7', 'mean_spl_ic_mou', 'loc_ic_t2m_mou_7', 'std_og_mou_7', 'std_ic_mou_8 - std_ic_mou_7', 'std_og_t2t_mou_6', 'roam_og_mou_7', 'offnet_mou_8', 'last_day_rch_amt_6', 'min_loc_ic_t2t_mou', 'loc_ic_t2f_mou_7', 'mean_onnet_mou_6 / onnet_mou_8', 'isd_ic_mou_6', 'total_rech_num_7', 'mean_loc_ic_t2m_mou_7 / loc_ic_t2m_mou_7', 'sum_loc_ic_t2f_mou', 'mean_spl_og_mou_6 - spl_og_mou_7', 'sum_loc_og_t2t_mou', 'loc_ic_mou_7 - loc_ic_mou_6', 'max_loc_og_t2m_mou', 'mean_spl_og_mou_7 - spl_og_mou_7', 'av_rech_amt_data_8', 'std_ic_mou_6', 'mean_offnet_mou_7 - offnet_mou_7', 'max_loc_og_mou', 'night_pck_user_7', 'mean_std_ic_mou', 'total_ic_mou_7 - total_ic_mou_6', 'random_exponential', 'loc_ic_t2m_mou_8 / loc_ic_t2m_mou_7', 'mean_loc_ic_t2f_mou', 'mean_isd_ic_mou', 'mean_loc_og_t2f_mou_6 / loc_og_t2f_mou_8', 'mean_offnet_mou_8 - offnet_mou_8', 'mean_std_ic_t2t_mou_6 / std_ic_t2t_mou_8', 'sum_loc_ic_t2m_mou', 'total_og_mou_6', 'loc_ic_t2t_mou_7', 'mean_total_og_mou', 'max_loc_ic_mou', 'mean_loc_og_mou_6 - loc_og_mou_7', 'loc_ic_mou_8 / loc_ic_mou_7', 'roam_ic_mou_7 / roam_ic_mou_6', 'roam_ic_mou_7', 'std_og_t2t_mou_7 - std_og_t2t_mou_6', 'loc_og_mou_7 - loc_og_mou_6', 'mean_total_ic_mou', 'std_ic_t2t_mou_7 - std_ic_t2t_mou_6', 'mean_loc_og_mou_6 - loc_og_mou_8', 'loc_ic_t2f_mou_7 - loc_ic_t2f_mou_6', 'min_spl_og_mou', 'mean_std_ic_mou_6 - std_ic_mou_8', 'loc_og_t2t_mou_8', 'loc_ic_mou_7', 'loc_og_mou_7 / loc_og_mou_6', 'std_og_t2t_mou_8 - std_og_t2t_mou_6', 'count_rech_2g_6', 'std_og_t2m_mou_8', 'roam_og_mou_8 - roam_og_mou_6', 'isd_og_mou_8 - isd_og_mou_7', 'max_loc_og_t2f_mou', 'total_ic_mou_8 - total_ic_mou_6', 'mean_std_og_t2m_mou_6 - std_og_t2m_mou_7', 'mean_std_ic_t2m_mou_7 - std_ic_t2m_mou_8', 'total_ic_mou_8', 'loc_og_mou_7', 'mean_roam_ic_mou_7 - roam_ic_mou_7', 'std_ic_mou_7 / std_ic_mou_6', 'total_rech_num_6', 'total_rech_amt_7', 'mean_loc_og_t2c_mou_8 / loc_og_t2c_mou_8', 'mean_offnet_mou', 'mean_std_og_mou_6 - std_og_mou_8', 'spl_ic_mou_7', 'av_rech_amt_data_7', 'roam_ic_mou_8 / roam_ic_mou_7', 'mean_total_ic_mou_6 - total_ic_mou_7', 'night_pck_user_8', 'std_og_mou_8 - std_og_mou_7', 'mean_loc_og_t2c_mou', 'arpu_7', 'std_ic_t2m_mou_6', 'min_std_ic_t2m_mou', 'roam_ic_mou_8 - roam_ic_mou_7', 'mean_onnet_mou', 'max_total_ic_mou', 'mean_loc_ic_mou', 'std_og_t2m_mou_8 - std_og_t2m_mou_7', 'spl_ic_mou_8 / spl_ic_mou_7', 'loc_ic_t2m_mou_7 - loc_ic_t2m_mou_6', 'mean_loc_ic_t2m_mou_8 - loc_ic_t2m_mou_8', 'mean_loc_ic_t2f_mou_6 - loc_ic_t2f_mou_8', 'mean_loc_ic_mou_8 / loc_ic_mou_8', 'max_loc_og_t2c_mou', 'mean_loc_ic_t2f_mou_6 - loc_ic_t2f_mou_7', 'mean_loc_ic_t2t_mou_8 / loc_ic_t2t_mou_8', 'mean_loc_ic_t2t_mou_6 - loc_ic_t2t_mou_8', 'std_ic_t2m_mou_8 / std_ic_t2m_mou_7', 'loc_og_t2m_mou_8 - loc_og_t2m_mou_7', 'loc_og_t2m_mou_7 / loc_og_t2m_mou_6', 'roam_og_mou_8 / roam_og_mou_6', 'loc_ic_t2m_mou_8 - loc_ic_t2m_mou_7', 'vol_2g_mb_6', 'total_og_mou_8 - total_og_mou_6', 'mean_loc_og_t2t_mou_7 - loc_og_t2t_mou_8', 'aon', 'ic_others_8', 'std_og_t2t_mou_8 - std_og_t2t_mou_7', 'roam_og_mou_7 - roam_og_mou_6', 'mean_offnet_mou_6 - offnet_mou_8', 'mean_loc_og_t2m_mou', 'std_og_t2t_mou_7 / std_og_t2t_mou_6', 'loc_ic_t2t_mou_8', 'loc_ic_t2f_mou_8', 'mean_roam_ic_mou_7 / roam_ic_mou_8', 'loc_og_mou_8 - loc_og_mou_6', 'std_ic_t2f_mou_7 - std_ic_t2f_mou_6', 'vol_3g_mb_6', 'mean_loc_og_t2f_mou', 'sum_total_ic_mou', 'offnet_mou_8 / offnet_mou_7', 'mean_std_og_t2f_mou_8 - std_og_t2f_mou_8', 'mean_std_og_mou_8 / std_og_mou_8', 'mean_std_og_mou_6 - std_og_mou_7', 'onnet_mou_8 / onnet_mou_6']
feats_383 =  [x for x in feats_386 if "random" not in x]
feats_383 =  feats_383 + ['date_of_last_rech_6_daynr', 'date_of_last_rech_7_daynr', 'date_of_last_rech_8_daynr', 'date_of_last_rech_data_6_daynr', 'date_of_last_rech_data_7_daynr', 'date_of_last_rech_data_8_daynr']
# feats_383 =  feats_383 + ind_list
print(len(feats_383))


df_train_fe = fe(df_train_ot.copy())
df_test_fe = fe(df_test_ot.copy())


# model = ctb.CatBoostClassifier(
#     max_depth=7, n_estimators=200, random_state=0, 
#     learning_rate=0.085, verbose=False)

model = XGBClassifier(learning_rate=0.01,  
                      colsample_bytree = 0.4,
                      subsample = 0.8,
                      objective='binary:logistic', 
                      n_estimators=1000, 
                      reg_alpha = 0.3,
                      max_depth=7, 
                      gamma=10,
                      random_state=0)

prepare_submit(ctrain, ctest, df_train_fe, df_test_fe, model, feats_383, "chrm_30F_xgb_ot_inf3", treshold=0.2)