In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from catboost import CatBoostClassifier, Pool
from tqdm import tqdm
import optuna

from utility import make_BMI

In [2]:
data = pd.concat(
    [pd.read_csv('../data/train.csv', index_col='id'),
     pd.read_csv('../data/ObesityDataSet.csv')]
)
data.head()

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,Male,24.443011,1.699998,81.66995,yes,yes,2.0,2.983297,Sometimes,no,2.763573,no,0.0,0.976473,Sometimes,Public_Transportation,Overweight_Level_II
1,Female,18.0,1.56,57.0,yes,yes,2.0,3.0,Frequently,no,2.0,no,1.0,1.0,no,Automobile,Normal_Weight
2,Female,18.0,1.71146,50.165754,yes,yes,1.880534,1.411685,Sometimes,no,1.910378,no,0.866045,1.673584,no,Public_Transportation,Insufficient_Weight
3,Female,20.952737,1.71073,131.274851,yes,yes,3.0,3.0,Sometimes,no,1.674061,no,1.467863,0.780199,Sometimes,Public_Transportation,Obesity_Type_III
4,Male,31.641081,1.914186,93.798055,yes,yes,2.679664,1.971472,Sometimes,no,1.979848,no,1.967973,0.931721,Sometimes,Public_Transportation,Overweight_Level_II


In [3]:
data['BMI'] = make_BMI(data)

In [4]:
data, target = data.drop('NObeyesdad', axis=1), data['NObeyesdad']

numerical = data.dtypes[data.dtypes == 'float64'].index
categorical = data.dtypes[data.dtypes == 'object'].index

In [5]:
X_train, X_test, y_train, y_test = train_test_split(data,
                                                    target,
                                                    test_size=0.20,
                                                    random_state=3,
                                                    shuffle=True)

clf_all = CatBoostClassifier()
clf_all.fit(X_train, y_train, cat_features=list(categorical), eval_set=(X_test, y_test), silent=True, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x238d18dc940>

![График обучения](https://s435vlx.storage.yandex.net/rdisk/18916441f29f2f6033ef8cff40dfd64901e9c22fd4ad1d97bbd0744fc0e7b859/65e4deab/LyPBJX7DrmJ2vc9Oq453nVIbhoMIw0C91VAOFX4rTva0M3ZXSqYKnQY4vmFsTPl3BbCqProe-H-h5m5qFscm3A==?uid=285904750&filename=final1.png&disposition=inline&hash=&limit=0&content_type=image%2Fpng&owner_uid=285904750&fsize=26372&hid=85d6e14c90f6e6167082902dba99db79&media_type=image&tknv=v2&etag=86d3df4132b9cfe576704f0749782c1d&rtoken=TS5pG1TXIFmz&force_default=yes&ycrid=na-fb779bb95099a277510414eef4c4dc25-downloader5e&ts=612c786a4c0c0&s=a87cf83b957e2b4666ed3ae3a38f2795d208bde50b6395df2b0ed9ccaa2513c4&pb=U2FsdGVkX1-_N5lKEJlcIYYaPTaANcYaHpuPjlferNCB2Kj7XW0O9g_Qp8dPEfb9TU-Tyg3_cEiqOzNVJrwDsj6gatR46-ev-6BpGPiWLGE)

In [6]:
clf_all.score(X_test, y_test)

0.9142982072584172

In [7]:
obesity = target.apply(lambda x: 1 if x in ['Obesity_Type_I', 'Obesity_Type_II', 'Obesity_Type_III'] else 0)

X_train, X_test, y_train, y_test = train_test_split(data,
                                                    obesity,
                                                    test_size=0.20,
                                                    random_state=3,
                                                    shuffle=True)

clf_obesity = CatBoostClassifier()
clf_obesity.fit(X_train, y_train, cat_features=list(categorical), eval_set=(X_test, y_test), silent=True, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x238c98a7a60>

![График обучения](https://s989sas.storage.yandex.net/rdisk/16e2150044d00bacaa63b14911b2803dce79368dd90b5fdc895b78db219efe7e/65e4debd/LyPBJX7DrmJ2vc9Oq453ndSSP8YdSLmLF5UBdN1-Zzzok5A7UC1DUIGJrChIl5GkNtxR2829stGwI0n7SFPsdA==?uid=285904750&filename=final2.png&disposition=inline&hash=&limit=0&content_type=image%2Fpng&owner_uid=285904750&fsize=26427&hid=7c2bcce8de4aac0fa73f70695873d2e6&media_type=image&tknv=v2&etag=b2390ccb9c04b5ca2eae2592a2cc7e89&rtoken=sAUnsVGiKECs&force_default=yes&ycrid=na-ac367b2946f5aa369f73836cdafc0118-downloader4f&ts=612c787b76940&s=c44566a69ed717a683fc55a9878e42c33c17ef38a83b203c2cf52a3da3e0f33f&pb=U2FsdGVkX1-EBdlYFEnb-9kHa0NU0X6vdA-yw530i7nZ-NVCDM9CD08wwO9JYondYvEH3P4ebU_6f0X5K0ez4A_Jz2RfVbPF6HPhLY3u4eI)

In [8]:
clf_obesity.score(X_test, y_test)

0.9777000437254044

In [9]:
overweight = target.apply(lambda x: 1 if x in ['Overweight_Level_I', 'Overweight_Level_II'] else 0)

X_train, X_test, y_train, y_test = train_test_split(data,
                                                    overweight,
                                                    test_size=0.20,
                                                    random_state=3,
                                                    shuffle=True)

clf_overweight = CatBoostClassifier()
clf_overweight.fit(X_train, y_train, cat_features=list(categorical), eval_set=(X_test, y_test), silent=True, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x238d18ddab0>

![График обучения](https://s435vlx.storage.yandex.net/rdisk/9e4ea06d7e281f9b915a933bf5f7b07b8e334a7e7a3925f990a0bddf260f40fe/65e4df25/LyPBJX7DrmJ2vc9Oq453nbIJX82g2ywv3kxhuk87FxjkFdPjHxGGLFKIzroPGn4bcsqha2sHh3hUCv7xGsinZQ==?uid=285904750&filename=final3.png&disposition=inline&hash=&limit=0&content_type=image%2Fpng&owner_uid=285904750&fsize=25822&hid=0b946e5c2c464151f546bd412461a55e&media_type=image&tknv=v2&etag=3539c2c745b0b3365ff65b61803513cd&rtoken=1mQoCxcaBELK&force_default=yes&ycrid=na-014c4b602a0de19555c115ddd97f4d09-downloader5e&ts=612c78dea5340&s=6b106464698ea5c59b16df3809c3a36723079974aed6e42483d4c6623cd65d2e&pb=U2FsdGVkX18MXCFVkCkYuxbYR_J-Z2A9Oj04WpdFi03i6fKXwcHQAhnirkb5MdIqeBGkWlAOglZud8STcvtoemss7HiUz_JU6FcicbZNL-s)

In [10]:
clf_overweight.score(X_test, y_test)

0.9564932225623087

In [11]:
normal_and_less = target.apply(lambda x: 1 if x in ['Normal_Weight', 'Insufficient_Weight'] else 0)

X_train, X_test, y_train, y_test = train_test_split(data,
                                                    normal_and_less,
                                                    test_size=0.20,
                                                    random_state=3,
                                                    shuffle=True)

clf_normless = CatBoostClassifier()
clf_normless.fit(X_train, y_train, cat_features=list(categorical), eval_set=(X_test, y_test), silent=True, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x238d1917c40>

![График обучения](https://s183vlx.storage.yandex.net/rdisk/c02b238005b46d72011adf850f7d56cb69a7d9381fd1aeac7f816fb988983feb/65e4df4d/LyPBJX7DrmJ2vc9Oq453nfzYxcg40NL16W-q9PoEPtEocG4cy9dhjbhbZaWsqR61tiIgTauDPGEcCAXyAkqV2A==?uid=285904750&filename=final4.png&disposition=inline&hash=&limit=0&content_type=image%2Fpng&owner_uid=285904750&fsize=26147&hid=b8c56a96b382f605d7046496dedfd567&media_type=image&tknv=v2&etag=b89b1ac6eb01ccc1a1f9e95fcce20436&rtoken=xKIJxZ0Bu3p3&force_default=yes&ycrid=na-c0e92cd6ebada0f6d2812386b30800d9-downloader5e&ts=612c7904cad40&s=0c59f82d65ab87b36fb6534e18d21d339bf3a785bf21fe45358b4b33485e0b97&pb=U2FsdGVkX1915jOmbhpTC3DwzRh3Wr1VeXdfxVb5tHXgN2HGZ86c-QhK7wJ45Hzark2RP9lNYcjDw2-HP4dYEhwCppsZP40SUCXHWuEgUEE)

In [12]:
clf_normless.score(X_test, y_test)

0.9770441626585046

In [13]:
data_norm_against_less, target_norm_against_less = data[target.isin(['Normal_Weight', 'Insufficient_Weight'])], target[target.isin(['Normal_Weight', 'Insufficient_Weight'])]

X_train, X_test, y_train, y_test = train_test_split(data_norm_against_less,
                                                    target_norm_against_less,
                                                    test_size=0.20,
                                                    random_state=3,
                                                    shuffle=True)

clf_norm_against_less = CatBoostClassifier()
clf_norm_against_less.fit(X_train, y_train, cat_features=list(categorical), eval_set=(X_test, y_test), silent=True, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x238d1915b70>

![График обучения](https://s962sas.storage.yandex.net/rdisk/365ba697957c3c428802652af01d18e8ad7c219ae4479fb5d6f3e370d67efae4/65e4dfb0/LyPBJX7DrmJ2vc9Oq453nfeEbSJi4GgO_jwBvlTac_go5zUqX2jiEHRVz91gjTU9Yy3hK9L27EClfW4BW5ZLHg==?uid=285904750&filename=final5.png&disposition=inline&hash=&limit=0&content_type=image%2Fpng&owner_uid=285904750&fsize=25489&hid=ed98df54375bcf052478f224e724c8de&media_type=image&tknv=v2&etag=1a78161b28eeda5ef7ab0b56af2dbeed&rtoken=xMPtmjyu076o&force_default=yes&ycrid=na-0024d5a8c1430aef58379b4610eb3e87-downloader5e&ts=612c796334c00&s=610cae221dc0ac04a6c1b86bfd264480f420ee5c90ff2e5ea8c2294292aaad85&pb=U2FsdGVkX18PYAoME8FlY79stEehu_FgIsB_a-sl6d26hKNLc4KJ-vYp2IlzQwwU8GLlCem2gP7jZSXVPIsgkjLsSCaRjq3jFdlZT0SgIvo)

In [14]:
clf_norm_against_less.score(X_test, y_test)

0.9578264395782644

In [15]:
data_norm_against_overweight1, target_norm_against_overweight1 = data[target.isin(['Normal_Weight', 'Overweight_Level_I'])], target[target.isin(['Normal_Weight', 'Overweight_Level_I'])]

X_train, X_test, y_train, y_test = train_test_split(data_norm_against_overweight1,
                                                    target_norm_against_overweight1,
                                                    test_size=0.20,
                                                    random_state=3,
                                                    shuffle=True)

clf_norm_against_overweight1 = CatBoostClassifier()
clf_norm_against_overweight1.fit(X_train, y_train, cat_features=list(categorical), eval_set=(X_test, y_test), silent=True, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x238d1916c80>

![График обучения](https://s60klg.storage.yandex.net/rdisk/b6a28c7554665160450eeff70cf7985460ef5ee16775935ec255175d624bbb90/65e4e046/LyPBJX7DrmJ2vc9Oq453nWTzS0fR5bAcAsQbiaXSZOg-wPuUT7b0_02sIVvKDR20NWIquKhpYcueBOgnDrywVA==?uid=285904750&filename=final6.png&disposition=inline&hash=&limit=0&content_type=image%2Fpng&owner_uid=285904750&fsize=26219&hid=5068c41a6bb3742863f43281e8fb6932&media_type=image&tknv=v2&etag=b7f05685f8a9faad8c6d552c2b319437&rtoken=uqlVB7cMJ309&force_default=yes&ycrid=na-f2c2ebe58fc246af7b5ece8b175f3cb4-downloader4f&ts=612c79f241d80&s=7879017f573579a3d3dae42abffb0d962e811d88b6183da966dde8e8b2254a79&pb=U2FsdGVkX1-DfgyOW171s-DwyO7K_pjWev83MJ_U08AY2tu2AvA_rIbzmDjbn7SFNwWpm92ST6oDJ5oxgGmG7QBDTWZ1ieYFIJ-WjhOUXNI)

In [16]:
clf_norm_against_overweight1.score(X_test, y_test)

0.9376026272577996

In [17]:
data_overweight1_against_overweight2, target_overweight1_against_overweight2 = data[target.isin(['Overweight_Level_I', 'Overweight_Level_II'])], target[target.isin(['Overweight_Level_I', 'Overweight_Level_II'])]

X_train, X_test, y_train, y_test = train_test_split(data_overweight1_against_overweight2,
                                                    target_overweight1_against_overweight2,
                                                    test_size=0.20,
                                                    random_state=3,
                                                    shuffle=True)

clf_overweight1_against_overweight2 = CatBoostClassifier()
clf_overweight1_against_overweight2.fit(X_train, y_train, cat_features=list(categorical), eval_set=(X_test, y_test), silent=True, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x238d19174f0>

![График обучения](https://s425vlx.storage.yandex.net/rdisk/ce9dd5ccc685cd391f4951111c37f50725be009170545a61f6c226fade0f6ec0/65e4e060/LyPBJX7DrmJ2vc9Oq453nZLz-dIitEW68US4DcKH0cZG2U3X6JjOobXbD3yISyi7vAvZKt9r1EaLBJd7eZMUkA==?uid=285904750&filename=final7.png&disposition=inline&hash=&limit=0&content_type=image%2Fpng&owner_uid=285904750&fsize=26712&hid=19250d52756c686d0c83fcbc7b84a787&media_type=image&tknv=v2&etag=98859db0aede7478cdb105fb59bb8de3&rtoken=Ol73W9r62U7R&force_default=yes&ycrid=na-df78d4044c4e54ee0d918269c4bb22c5-downloader4f&ts=612c7a0b0d800&s=aa88725e922d07c6118eba9b647ace3092a0458491f43ef36337129d918344a0&pb=U2FsdGVkX193UVveyP1fQCjEZeRYOjz7yXaYsy47513JwIhZTeBLK6PwFWgFSuKSc_mgS8-J8EzUoU43O864YR4qKWSENRBw2V0kX9g34xM)

In [18]:
clf_overweight1_against_overweight2.score(X_test, y_test)

0.8960216998191681

In [19]:
data_obesity1_against_obesity2, target_obesity1_against_obesity2 = data[target.isin(['Obesity_Type_I', 'Obesity_Type_II'])], target[target.isin(['Obesity_Type_I', 'Obesity_Type_II'])]

X_train, X_test, y_train, y_test = train_test_split(data_obesity1_against_obesity2,
                                                    target_obesity1_against_obesity2,
                                                    test_size=0.20,
                                                    random_state=3,
                                                    shuffle=True)

clf_obesity1_against_obesity2 = CatBoostClassifier()
clf_obesity1_against_obesity2.fit(X_train, y_train, cat_features=list(categorical), eval_set=(X_test, y_test), silent=True, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x238d1916d40>

![График обучения](https://s114vla.storage.yandex.net/rdisk/c1a489c1d83cea5364b0a9de2a32171c08440143a0ddae25ccd8ff717636de98/65e4e089/LyPBJX7DrmJ2vc9Oq453nYaciR9wzoL58HEl12cnBeMQy6c8XW1n7RhYnTt1YQzBrIZF4Nul9iRQJLPgCo2CJA==?uid=285904750&filename=final8.png&disposition=inline&hash=&limit=0&content_type=image%2Fpng&owner_uid=285904750&fsize=26314&hid=8baea560c84a2a3055af2074a0bc47c6&media_type=image&tknv=v2&etag=defb8cb60daf32f4e7a4e651de39c726&rtoken=4beaZmtHMuyN&force_default=yes&ycrid=na-b04a65e4e9d79dc674e7f399daca2cc1-downloader4f&ts=612c7a3227440&s=899a106430483bb704295e78aa854c03fdadf9d71d7debc0c1dd30f4793acd71&pb=U2FsdGVkX18kZMk8WQq7DWbyMWQOpHORIBQS3W8fx1ftpg8YPPwl1coRCXcXwh5gpsAtOwyF-PAROBDbkHeSdeUzO1X4LMQX_uOx4JiZRhU)

In [20]:
clf_obesity1_against_obesity2.score(X_test, y_test)

0.972834067547724

In [21]:
models = [
    clf_all,
    clf_obesity,
    clf_overweight,
    clf_normless,
    clf_norm_against_less,
    clf_norm_against_overweight1,
    clf_overweight1_against_overweight2,
    clf_obesity1_against_obesity2    
]

In [22]:
clf_all.predict_proba(data)[:, :-1]

array([[1.95775494e-05, 2.05606137e-04, 1.40186083e-02, 2.01217320e-04,
        3.54175372e-06, 4.85827420e-02],
       [2.43320016e-02, 9.68697574e-01, 1.29277556e-04, 3.51764410e-06,
        6.44802190e-06, 4.06636457e-03],
       [9.22977422e-01, 8.67345285e-03, 1.06819118e-02, 6.83631487e-05,
        2.11983757e-05, 5.48778511e-02],
       ...,
       [6.91595181e-08, 4.79809622e-07, 3.65962109e-04, 4.67989333e-04,
        9.99155580e-01, 1.45550901e-06],
       [1.29932114e-07, 1.04109394e-06, 7.73608309e-04, 9.23903304e-04,
        9.98277418e-01, 3.69369533e-06],
       [1.28907657e-07, 1.05200975e-06, 7.64365021e-04, 7.60427315e-04,
        9.98442915e-01, 4.07549782e-06]])

In [23]:
preds = clf_all.predict_proba(data)[:, :-1]

for model in models[1:]:
    preds = np.hstack((preds, model.predict_proba(data)))
    
preds.shape

(22869, 20)

In [24]:
X_train, X_test, y_train, y_test = train_test_split(preds,
                                                    target,
                                                    test_size=0.20,
                                                    random_state=3,
                                                    shuffle=True)

clf_final = CatBoostClassifier()
clf_final.fit(X_train, y_train, eval_set=(X_test, y_test), silent=True, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x238d18de440>

![График обучения](https://s593vla.storage.yandex.net/rdisk/6d901dc7e0d3bf688284670374a061b9b507025281d213e827c7764f5b59f24e/65e4e0ae/LyPBJX7DrmJ2vc9Oq453nQj0HrxXkgPXSzAtQVHQbg63q5ZI8IoH4i6g80GJqCn7pvBX-uC_8Mtqw97rFHwE2w==?uid=285904750&filename=final9.png&disposition=inline&hash=&limit=0&content_type=image%2Fpng&owner_uid=285904750&fsize=27094&hid=9ddc2814733a3460c4a42b3d305cc91a&media_type=image&tknv=v2&etag=5eedff74398afcb20d49e02aeb6e5079&rtoken=XHM5juZ1hTF2&force_default=yes&ycrid=na-7f23e9b16ea3eccbfca667f0b6d27d9b-downloader4f&ts=612c7a5570780&s=7ab8758ed8b06c29e3ca03c5eff93f1db542eb1da766c63cca227053b0c61ab5&pb=U2FsdGVkX199yHz6cN2tJvi79bIhD4YMeUfq8d6G4If-iqEyZkD8-UmyEP_D1ZdVxhXMME27dBt_AEBEgZStYHXuG_iP0N2Et53B0Rh-Ql0)

In [25]:
clf_final.score(X_test, y_test)

0.9361609094884128

In [26]:
def objective(trial):
    X_train, X_valid, y_train, y_valid = train_test_split(
        preds,
        target,
        test_size=0.20,
        shuffle=True
    )

    param = {
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
        "depth": trial.suggest_int("depth", 1, 12),
        "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
        "bootstrap_type": trial.suggest_categorical(
            "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
        ),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
        #"used_ram_limit": "3gb",
    }

    if param["bootstrap_type"] == "Bayesian":
        param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
    elif param["bootstrap_type"] == "Bernoulli":
        param["subsample"] = trial.suggest_float("subsample", 0.1, 1)

    
    clf = CatBoostClassifier(**param, iterations=1200, logging_level='Silent')
    train = Pool(X_train, y_train)
    valid = Pool(X_valid, y_valid)

    clf.fit(train, eval_set=valid, verbose=False, early_stopping_rounds=100)

    accuracy = accuracy_score(y_valid, clf.predict(X_valid))
    return accuracy

In [27]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50, timeout=600)

[I 2024-03-03 19:06:50,250] A new study created in memory with name: no-name-ebd36fc5-7ec5-4794-9b7c-dbe645e89469
[I 2024-03-03 19:12:43,070] Trial 0 finished with value: 0.9420638390905116 and parameters: {'colsample_bylevel': 0.0836194970976684, 'depth': 11, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bernoulli', 'learning_rate': 0.005424462669758736, 'min_data_in_leaf': 40, 'subsample': 0.6952250029289982}. Best is trial 0 with value: 0.9420638390905116.
[I 2024-03-03 19:12:56,002] Trial 1 finished with value: 0.954744206383909 and parameters: {'colsample_bylevel': 0.09321369980768827, 'depth': 6, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'learning_rate': 0.056927854434741736, 'min_data_in_leaf': 81, 'subsample': 0.4941500292316874}. Best is trial 1 with value: 0.954744206383909.
[I 2024-03-03 19:13:02,091] Trial 2 finished with value: 0.9464363795365107 and parameters: {'colsample_bylevel': 0.047932667439327635, 'depth': 7, 'boosting_type': 'Plain', 'bootstrap_typ

In [28]:
print('Best hyperparameters:', study.best_params)
print('Best Accuracy:', study.best_value)

Best hyperparameters: {'colsample_bylevel': 0.09321369980768827, 'depth': 6, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'learning_rate': 0.056927854434741736, 'min_data_in_leaf': 81, 'subsample': 0.4941500292316874}
Best Accuracy: 0.954744206383909


In [29]:
best_params = study.best_params

In [30]:
X_train, X_test, y_train, y_test = train_test_split(preds,
                                                    target,
                                                    test_size=0.20,
                                                    random_state=3,
                                                    shuffle=True)

clf_final1 = CatBoostClassifier(**best_params)
clf_final1.fit(X_train, y_train, eval_set=(X_test, y_test), silent=True, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x238d1915e70>

![График обучения](https://s961sas.storage.yandex.net/rdisk/d8628a91648dadf13af10e154cc89735418d40830a25ba364bd3a10723094897/65e4e0c4/LyPBJX7DrmJ2vc9Oq453ndkbCTOzIKCkzu7bPLone6ZyYg3ycax9nvCByhQF_NgfJAaNqsMXGN5d146RHtz4eA==?uid=285904750&filename=final10.png&disposition=inline&hash=&limit=0&content_type=image%2Fpng&owner_uid=285904750&fsize=27101&hid=f229d785d5d29ed24f8ce7d9fdd0093b&media_type=image&tknv=v2&etag=277423c8836d7c393f81a2d4b334c75a&rtoken=Cyant86jK1Hq&force_default=yes&ycrid=na-c0ca26ec2a82b8698050e9f75102bf27-downloader4f&ts=612c7a6a6b900&s=f2b24332f3b6ff34b598e6d68228eb86e8a09565e3c6c8fd20642910155d5228&pb=U2FsdGVkX1-57_SWZ9OO2RgcoyrNU-RAyFzQF7RlUu1ezkU-wa7zcGsdqtvTJnj3h2rHLuyYxlyTfn0x-potCaiJaUmL40N74TnK2HnU4_0)

In [31]:
clf_final1.score(X_test, y_test)

0.9341932662877132

In [32]:
test = pd.read_csv('../data/test.csv', index_col='id')

test['BMI'] = make_BMI(test)

preds = clf_all.predict_proba(test)[:, :-1]

for model in models[1:]:
    preds = np.hstack((preds, model.predict_proba(test)))

test['pred4'] = clf_final.predict(preds)
test['pred5'] = clf_final1.predict(preds)

test['pred4']

id
20758        Obesity_Type_II
20759     Overweight_Level_I
20760       Obesity_Type_III
20761         Obesity_Type_I
20762       Obesity_Type_III
                ...         
34593    Overweight_Level_II
34594          Normal_Weight
34595    Insufficient_Weight
34596          Normal_Weight
34597        Obesity_Type_II
Name: pred4, Length: 13840, dtype: object

In [33]:
pd.Series(test['pred4'], index=test.index).to_csv('../preds/pred4.csv')
pd.Series(test['pred5'], index=test.index).to_csv('../preds/pred5.csv')