# Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor, XGBClassifier
from sklearn.metrics import roc_auc_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler

%matplotlib inline
np.random.seed(0)

# Loading Data and Split

In [None]:
trainData = pd.read_csv('../input/tabular-playground-series-sep-2021/train.csv')
testData = pd.read_csv('../input/tabular-playground-series-sep-2021/test.csv')

trainData.head()

In [None]:
y = trainData['claim']
trainDataCopy = trainData.copy()
trainDataCopy['n_missing'] = trainData.isnull().sum(axis=1)
X = trainDataCopy.drop(['claim', 'id'], axis=1)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.7, test_size=0.3,
                                                      random_state=0)


In [None]:
def score_dataset(X_train, X_valid, y_train, y_valid, est, lrate, model_type):
    if model_type == 'classifier':
        model = XGBClassifier(n_estimators=est, learning_rate=lrate, n_jobs=4, tree_method='gpu_hist')
    else:
        model = XGBRegressor(n_estimators=est, learning_rate=lrate, n_jobs=4, tree_method='gpu_hist')

    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    score = roc_auc_score(y_valid, preds)
    return score

# Imputer

In [None]:
imputer = SimpleImputer(strategy='mean')
imputed_X_train = pd.DataFrame(imputer.fit_transform(X_train))
imputed_X_valid = pd.DataFrame(imputer.transform(X_valid))

imputed_X_train.columns = X_train.columns
imputed_X_valid.columns = X_valid.columns



# Validação

In [None]:
print(score_dataset(imputed_X_train, imputed_X_valid, y_train, y_valid, 2300, 0.02, 'regressor')) 

# Arquivo de submissão

In [None]:
testDataCopy = testData.copy().drop('id', axis=1)
testDataCopy['n_missing'] = testData.isnull().sum(axis=1)

imputer = SimpleImputer(strategy='mean')
imputed_X_test = pd.DataFrame(imputer.fit_transform(testDataCopy))
imputed_X_test.columns = testDataCopy.columns

model = XGBRegressor(n_estimators=2500, learning_rate=0.02, n_jobs=4, tree_method='gpu_hist')
model.fit(imputed_X_train, y_train)
preds = model.predict(imputed_X_test)
series = pd.Series(preds, index=testData['id'].astype('int'), name='claim')
print(series)
series.to_csv('output.csv')