In [None]:
## installing packages
!python3 -m pip install -q "mxnet<2.0.0"
!python3 -m pip install -q autogluon
!python3 -m pip install -q graphviz --upgrade

In [None]:
# imports
import gc
import os
import shutil
import pandas as pd
import numpy as np
from pathlib import Path

In [None]:
## define configuration
PATH_TRAIN = '../input/tabular-playground-series-aug-2021/train.csv'
PATH_TEST = '../input/tabular-playground-series-aug-2021/test.csv'
PATH_PSEUDOLABEL = '../input/pseudolabel/submission_weighted_ensemble_3 (1).csv'

time_limit = 20000 # (in secs. for train run: set as 300 which gave public score ~10)

In [None]:
train = pd.read_csv(PATH_TRAIN, index_col="id")
test = pd.read_csv(PATH_TEST, index_col="id")
pseudolabel = pd.read_csv(PATH_PSEUDOLABEL, index_col="id")

In [None]:
test_concat = test.join(pseudolabel, on='id')

In [None]:
# randomly, pick 30% of test data as psuedolabel
test_30 = test_concat.sample(frac = 0.3)
test_30.shape

In [None]:
temp = [train, test_30]
merged_df = pd.concat(temp)
merged_df.shape

In [None]:
train = merged_df

In [None]:
target_loss = train.loss
train.drop(['loss'], axis=1, inplace=True)

In [None]:
## imports
from autogluon.tabular import TabularPredictor, TabularDataset

In [None]:
## run model for carbon monoxide
train['target'] = target_loss

model_ag = TabularPredictor(problem_type='regression', label='target')

model_ag.fit(train_data=train, 
              time_limit=time_limit, 
              presets='best_quality',
              num_stack_levels = 3,
              num_bag_folds = 5,
              num_bag_sets = 1,)

del train['target']

In [None]:
## check leaderboard for carbon monoxide
model_ag.leaderboard()

In [None]:
preds_autogluon = model_ag.predict(TabularDataset(test))

In [None]:
## create submission
submission = pd.DataFrame({
    'id': preds_autogluon.index,
    'loss': preds_autogluon
})

submission.head()

In [None]:
## save submission
PATH_AUTOGLUON_SUBMISSION = 'submission_autogluon_pseudo' + str(time_limit) + '.csv'
PATH_AUTOGLUON_SUBMISSION

In [None]:
submission.to_csv(PATH_AUTOGLUON_SUBMISSION, index=False)

In [None]:
## clear memory
shutil.rmtree('AutogluonModels')

gc.collect()