In [1]:
!pip install -U -q lightautoml

[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datasets 2.19.1 requires aiohttp, which is not installed.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.
beatrix-jupyterlab 2023.128.151533 requires jupyterlab~=3.6.0, but you have jupyterlab 4.2.1 which is incompatible.
dask-expr 1.1.1 requires pandas>=2, but you have pandas 1.5.3 which is incompatible.
featuretools 1.31.0 requires pandas>=2.0.0, but you have pandas 1.5.3 which is incompatible.
fitter 1.7.0 requires joblib<2.0.0,>=1.3.1, but you have joblib 1.2.0 which is incompatible.
libpysal 4.9.2 requires packaging>=22, but you have packaging 21.3 which is incompatible.
libpysal 4.9.2 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.
mizani 0.11.4 requires pandas>=2.1.0, but you have pandas 1.5.3 which is incompatible.
momepy 0.7

# Configs and Imports

In [2]:
from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task
import pandas as pd
import numpy as np
import torch
import pickle

In [3]:
TARGET_NAME = 'Target'
TEST_SIZE = 0.1
N_THREADS = 4
TIMEOUT = 3600 * 10
N_FOLDS = 5
SEED = 27

In [4]:
np.random.seed(SEED)
torch.set_num_threads(N_THREADS)

# Loading and Preprocessing the Datasets

In [5]:
train = pd.read_csv('/kaggle/input/playground-series-s4e6/train.csv', index_col='id')
test = pd.read_csv('/kaggle/input/playground-series-s4e6/test.csv', index_col='id')

train.shape, test.shape

((76518, 37), (51012, 36))

In [6]:
original = pd.read_csv('/kaggle/input/students-dropout-and-academic-success/data.csv', delimiter=';')
original = original.rename(columns={'Daytime/evening attendance\t': 'Daytime/evening attendance'})

train = pd.concat([train, original], axis=0, ignore_index=True)

train.shape

(80942, 37)

# Training

In [7]:
task = Task(
    name='multiclass',
    greater_is_better=True
)

In [8]:
automl = TabularAutoML(
    task=task,
    timeout=TIMEOUT,
    cpu_limit=N_THREADS,
    reader_params={
        'n_jobs': N_THREADS,
        'cv':  N_FOLDS,
        'random_state': SEED
    },
    tuning_params = {'max_tuning_time': 3600},
)

oof_preds = automl.fit_predict(
    train,
    roles={'target': TARGET_NAME},
    verbose=1
)

[09:47:00] Stdout logging level is INFO.
[09:47:00] Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer
[09:47:00] Task: multiclass

[09:47:00] Start automl preset with listed constraints:
[09:47:00] - time: 36000.00 seconds
[09:47:00] - CPU: 4 cores
[09:47:00] - memory: 16 GB

[09:47:00] [1mTrain data shape: (80942, 37)[0m

[09:47:12] Layer [1m1[0m train process start. Time left 35988.11 secs
[09:47:19] Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...
[09:47:36] Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m0.5364926138276152[0m
[09:47:36] [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed
[09:47:36] Time left 35964.23 secs

[09:47:58] [1mSelector_LightGBM[0m fitting and predicting completed
[09:48:07] Start fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m ...
[09:52:26] Fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m finished. score = [1m0.4365162506892628[0m
[09:52:26] [1mLvl_0_Pipe_1_Mod_0_Li

In [9]:
with open('lightautoml_oof_preds.pkl', 'wb') as f:
    pickle.dump(oof_preds.data, f)

# Post-Processing and Inference

In [10]:
preds = automl.predict(test).data

In [11]:
class_mapping = automl.reader.class_mapping
class_mapping = {v: k for k, v in class_mapping.items()}
class_mapping

{0: 'Graduate', 1: 'Dropout', 2: 'Enrolled'}

In [12]:
submission = pd.read_csv('/kaggle/input/playground-series-s4e6/sample_submission.csv', index_col='id')

submission[TARGET_NAME] = preds.argmax(axis=1)
submission[TARGET_NAME] = submission[TARGET_NAME].map(class_mapping)

submission.to_csv('submission.csv')
submission.head()

Unnamed: 0_level_0,Target
id,Unnamed: 1_level_1
76518,Dropout
76519,Graduate
76520,Graduate
76521,Graduate
76522,Enrolled
