In [31]:
import pandas as pd 
import numpy as np
from sklearn.ensemble import StackingClassifier, StackingRegressor
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.model_selection import StratifiedKFold

* Data Preparations


In [32]:
train_df = pd.read_csv("../train.csv")
test_df = pd.read_csv("../test.csv")


In [33]:
def transform(df):
    object_df = df.select_dtypes(include=['object'])
    object_columns = object_df.columns
    return pd.get_dummies(object_df, prefix=object_columns), object_columns

def connectData(df, transformed, columns, all_dummy_columns):
    return pd.concat([df.drop(columns, axis=1), transformed.reindex(columns=all_dummy_columns, fill_value=False)], axis=1)

def split(title, df):
    return df.drop(title, axis=1), df[title]

In [34]:
train_transformed_data, train_deserted_columns = transform(train_df.drop(["id", "CustomerId", "Surname"], axis=1))
test_transformed_data, test_deserted_columns = transform(test_df.drop(["id", "CustomerId", "Surname"], axis=1))
all_dummy_columns = set(train_transformed_data.columns).union(test_transformed_data.columns)

train_converted_df = connectData(train_df, train_transformed_data, train_deserted_columns, all_dummy_columns)
test_converted_df = connectData(test_df, test_transformed_data, test_deserted_columns, all_dummy_columns)

train_x, train_y = split("Exited", train_converted_df.drop(["id", "CustomerId", "Surname"], axis=1))
test_x = test_converted_df.drop(["id", "CustomerId", "Surname"], axis=1)

# Find The Best Params

In [35]:
from sklearn.model_selection import GridSearchCV
def findParams(param_try, estimator):
    grid_search = GridSearchCV(estimator=estimator, param_grid=param_try, cv=5)
    grid_search.fit(train_x, train_y)
    return grid_search.best_params_

In [36]:
rfParams = findParams({
    'max_depth': [7, 10, 20],
}, estimator=RandomForestClassifier())



In [37]:
xgParams = findParams({
    'max_depth': [3, 5, 7],
    "n_estimators": [10, 45, 50, 100]}, xgb.XGBClassifier(device='gpu'))

In [38]:
mlp = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', solver='adam', max_iter=1200, validation_fraction=.2)
skf = StratifiedKFold(n_splits=7, shuffle=True, random_state=42)
stacking_model = StackingClassifier(
    estimators=[
        ("xg", xgb.XGBClassifier(**xgParams)),
        ("rf", RandomForestClassifier(**rfParams)),
    ],
    final_estimator=mlp,
    cv=skf,
)

In [39]:
constraint_interval = pd.read_csv("../analysis/interval_found.csv")
interval_list = constraint_interval.values.flatten()

In [40]:
stacking_model.fit(train_x, train_y)

In [41]:
machine_predicting_ans = stacking_model.predict_proba(test_x)[:, 1]

In [42]:
submitting = pd.read_csv("../sample_submission.csv")

In [None]:
submit_test = test_x.copy()
submit_test['id'] = test_df['id']
submit_test['Exited'] = machine_predicting_ans
final_df = submitting.drop("Exited", axis=1).merge(submit_test[["Exited", "id"]],on='id', how='left')

In [None]:
final_df.to_csv("../../pedicted_data/shallowLearner.csv", index=False)