This is try to predict by simple XGBClassifier model without any feature engineering.

# Import libraries and load data

In [None]:
# Import libraries
import pandas as pd
import numpy as np

import xgboost as xgb

from sklearn.model_selection import StratifiedKFold, cross_val_score

In [None]:
# Set options
pd.set_option('display.float_format', lambda x: '%.3f' % x)

Use [function memory usage optimization dataframe](https://www.kaggle.com/ellavs/function-memory-usage-optimization-dataframe/):

In [None]:
def optimize_memory_usage(df, print_size = True):
    # Function optimizes memory usage in dataframe.
   
    # Types for optimization.
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    # Memory usage size before optimize (Mb).
    before_size = df.memory_usage().sum() / 1024**2    
    for column in df.columns:
        column_type = df[column].dtypes
        if column_type in numerics:
            column_min = df[column].min()
            column_max = df[column].max()
            if str(column_type).startswith('int'):
                if column_min > np.iinfo(np.int8).min and column_max < np.iinfo(np.int8).max:
                    df[column] = df[column].astype(np.int8)
                elif column_min > np.iinfo(np.int16).min and column_max < np.iinfo(np.int16).max:
                    df[column] = df[column].astype(np.int16)
                elif column_min > np.iinfo(np.int32).min and column_max < np.iinfo(np.int32).max:
                    df[column] = df[column].astype(np.int32)
                elif column_min > np.iinfo(np.int64).min and column_max < np.iinfo(np.int64).max:
                    df[column] = df[column].astype(np.int64)  
            else:
                if column_min > np.finfo(np.float32).min and column_max < np.finfo(np.float32).max:
                    df[column] = df[column].astype(np.float32)
                else:
                    df[column] = df[column].astype(np.float64)    
    # Memory usage size after optimize (Mb).
    after_size = df.memory_usage().sum() / 1024**2
    if print_size: print('Memory usage size: before {:5.4f} Mb - after {:5.4f} Mb ({:.1f}%).'.format(before_size, after_size, 100 * (before_size - after_size) / before_size))
    return df

In [None]:
def import_data_from_csv(file_path):
    # Load a dataframe from csv-file and optimize its memory usage.
    df = pd.read_csv(file_path, parse_dates = True, keep_date_col = True)
    df = optimize_memory_usage(df)
    return df

In [None]:
# Load train data
train_df = import_data_from_csv('../input/tabular-playground-series-nov-2021/train.csv')

In [None]:
# Load test data
test_df = import_data_from_csv('../input/tabular-playground-series-nov-2021/test.csv')

In [None]:
train_df.shape, test_df.shape

In [None]:
train_df.head()

In [None]:
train_df.describe()

In [None]:
# Make features list
features_black_list = ['id', 'target']
features_list = [x for x in train_df.columns if x not in features_black_list]

In [None]:
# Make X and y
X_train = train_df[features_list].values
y_train = train_df['target'].values

# Try XGBClassifier

In [None]:
cv = StratifiedKFold(n_splits = 3)
model = xgb.XGBClassifier(max_depth = 5, n_estimators = 100)

In [None]:
scores = cross_val_score(model, X_train, y_train, cv = cv, scoring='roc_auc')
np.mean(scores), np.std(scores)

In [None]:
model.fit(X_train, y_train)

# Save result

In [None]:
X_test = test_df[features_list].values
test_df['target'] = model.predict(X_test).astype('int8')

In [None]:
test_df[['id', 'target']].to_csv('Tabular_Playground_Series_Nov_21_simple_xgb.csv', index = False)