In [1]:
import pandas as pd 
import numpy as np
from sklearn.ensemble import StackingClassifier
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold

* Data Preparations


In [2]:
train_df = pd.read_csv("../train.csv")
test_df = pd.read_csv("../test.csv")


In [3]:
def transform(df):
    object_df = df.select_dtypes(include=['object'])
    object_columns = object_df.columns
    return pd.get_dummies(object_df, prefix=object_columns), object_columns

def connectData(df, transformed, columns, all_dummy_columns):
    return pd.concat([df.drop(columns, axis=1), transformed.reindex(columns=all_dummy_columns, fill_value=False)], axis=1)

def split(title, df):
    return df.drop(title, axis=1), df[title]

In [4]:
train_transformed_data, train_deserted_columns = transform(train_df.drop(["id", "CustomerId", "Surname"], axis=1))
test_transformed_data, test_deserted_columns = transform(test_df.drop(["id", "CustomerId", "Surname"], axis=1))
all_dummy_columns = set(train_transformed_data.columns).union(test_transformed_data.columns)

train_converted_df = connectData(train_df, train_transformed_data, train_deserted_columns, all_dummy_columns)
test_converted_df = connectData(test_df, test_transformed_data, test_deserted_columns, all_dummy_columns)

train_x, train_y = split("Exited", train_converted_df.drop(["id", "CustomerId", "Surname"], axis=1))
test_x = test_converted_df.drop(["id", "CustomerId", "Surname"], axis=1)

# Find The Best Params

In [5]:
from sklearn.model_selection import GridSearchCV
def findParams(param_try, estimator):
    grid_search = GridSearchCV(estimator=estimator, param_grid=param_try, cv=5)
    grid_search.fit(train_x, train_y)
    return grid_search.best_params_

In [6]:
rfParams = findParams({
    'max_depth': [20, 30],
}, estimator=RandomForestClassifier())



In [7]:
xgParams = findParams({
    'max_depth': [3, 5, 7],
    "n_estimators": [50, 100, 200]}, xgb.XGBClassifier(device='gpu'))

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




In [8]:
mlp = MLPClassifier(hidden_layer_sizes=(70, 50), activation='relu', solver='adam', max_iter=1500, validation_fraction=.05)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
stacking_model = StackingClassifier(
    estimators=[
        ("xg", xgb.XGBClassifier(**xgParams)),
        ("rf", RandomForestClassifier(**rfParams))
    ],
    final_estimator=mlp,
    cv=skf,
)

In [9]:
constraint_interval = pd.read_csv("../analysis/interval_found.csv")
interval_list = constraint_interval.values.flatten()

In [10]:
stacking_model.fit(train_x, train_y)

In [11]:
machine_predicting_ans = stacking_model.predict_proba(test_x)[:, 1]

In [None]:
submitting = pd.read_csv("../sample_submission.csv")

In [None]:
submit_test = test_x.copy()
submit_test['id'] = test_df['id']
submit_test['Exited'] = machine_predicting_ans
final_df = submitting.drop("Exited", axis=1).merge(submit_test[["Exited", "id"]],on='id', how='left')

In [None]:
final_df.to_csv("deeperperceptron122.csv", index=False)