### Tabular Playground Series - Feb 2022
This notebook demonstrates simple and fast solution based on Catboost.<br>
All the job took me about 2 hours (including grid search) and about 45 lines of code.

In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier, Pool

In [None]:
train_raw = pd.read_csv('../input/tabular-playground-series-feb-2022/train.csv')
test_raw = pd.read_csv('../input/tabular-playground-series-feb-2022/test.csv')
train_raw.head(3)

#### Data preparation
Rounding and converting to int16 to optimize memory usage

In [None]:
def prepare(df_raw):
    df = df_raw.drop(['row_id'], axis=1).copy()
    df = (df * 1e4).astype('int16')
    return df
Encoder = LabelEncoder()

Label encoding

In [None]:
train = prepare(train_raw.drop('target', axis=1))
train['target'] = Encoder.fit_transform(train_raw.target)
test = prepare(test_raw)
train.sample(3)

In [None]:
Train, Valid = train_test_split(train, test_size = 0.2, random_state = 17, stratify = train['target'])
Train_pool = Pool (Train.drop('target', axis=1), label = Train.target)
Valid_pool = Pool (Valid.drop('target', axis=1), label = Valid.target)

In [None]:
params = {"iterations": 3000,
          "depth": 10,
          "random_seed": 17,
          "thread_count": -1,
          "verbose" : 500,
          "loss_function": "MultiClass",
          "eval_metric": 'Accuracy',
          "task_type": 'GPU'}
model = CatBoostClassifier(**params)
model.fit(Train_pool, eval_set=Valid_pool)
model.score(Valid_pool)

#### Most important features

In [None]:
FE = model.get_feature_importance(data=Valid_pool, thread_count=-1, verbose=False)
FEG = pd.DataFrame(FE, index = Valid.iloc[:,:-1].columns ).sort_values(0, ascending = False)
FEG[FEG[0]>0.5].plot.bar(figsize = (15,5), rot = 60)

#### Fin on a full training set

In [None]:
Full_pool = Pool (train.drop('target', axis=1), label = train.target.values.ravel())
Test_pool = Pool (test)
model.fit(Full_pool)

#### Prediction and submission

In [None]:
pred = model.predict(Test_pool)
predictions = Encoder.inverse_transform(pred[:])
sub = pd.read_csv('../input/tabular-playground-series-feb-2022/sample_submission.csv')
sub.target = predictions
sub.to_csv('submission.csv', index = False)