In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

!pip -q install --upgrade autogluon

from autogluon.tabular import TabularDataset , TabularPredictor

In [None]:
train_data = TabularDataset('../input/tabular-playground-series-jul-2021/train.csv')

train_data

In [None]:
train_data.describe()

In [None]:
train_data.corr()

In [None]:
sns.heatmap(train_data.corr())

In [None]:
train_data.columns

In [None]:
train_data['date_time'] = pd.to_datetime(train_data['date_time'])
train_data['dayofweek'] = train_data['date_time'].dt.dayofweek

train_data['hourofday'] =train_data['date_time'].dt.hour #Daily Seasonality
train_data['time'] = train_data['date_time'].astype(np.int64) #Yearly Trend

train_data

In [None]:
from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.core.utils.utils import setup_outputdir
from autogluon.core.utils.loaders import load_pkl
from autogluon.core.utils.savers import save_pkl
import os.path

class MultilabelPredictor():

    multi_predictor_file = 'multilabel_predictor.pkl'

    def __init__(self, labels, path, problem_types=None, eval_metrics=None, consider_labels_correlation=True, **kwargs):
        if len(labels) < 2:
            raise ValueError("MultilabelPredictor is only intended for predicting MULTIPLE labels (columns), use TabularPredictor for predicting one label (column).")
        self.path = setup_outputdir(path, warn_if_exist=False)
        self.labels = labels
        self.consider_labels_correlation = consider_labels_correlation
        self.predictors = {}  # key = label, value = TabularPredictor or str path to the TabularPredictor for this label
        if eval_metrics is None:
            self.eval_metrics = {}
        else:
            self.eval_metrics = {labels[i] : eval_metrics[i] for i in range(len(labels))}
        problem_type = None
        eval_metric = None
        for i in range(len(labels)):
            label = labels[i]
            path_i = self.path + "Predictor_" + label
            if problem_types is not None:
                problem_type = problem_types[i]
            if eval_metrics is not None:
                eval_metric = self.eval_metrics[i]
            self.predictors[label] = TabularPredictor(label=label, problem_type=problem_type, eval_metric=eval_metric, path=path_i, **kwargs)

    def fit(self, train_data, tuning_data=None, **kwargs):

        if isinstance(train_data, str):
            train_data = TabularDataset(train_data)
        if tuning_data is not None and isinstance(tuning_data, str):
            tuning_data = TabularDataset(tuning_data)
        train_data_og = train_data.copy()
        if tuning_data is not None:
            tuning_data_og = tuning_data.copy()
        save_metrics = len(self.eval_metrics) == 0
        for i in range(len(self.labels)):
            label = self.labels[i]
            predictor = self.get_predictor(label)
            if not self.consider_labels_correlation:
                labels_to_drop = [l for l in self.labels if l!=label]
            else:
                labels_to_drop = [labels[j] for j in range(i+1,len(self.labels))]
            train_data = train_data_og.drop(labels_to_drop, axis=1)
            if tuning_data is not None:
                tuning_data = tuning_data_og.drop(labels_to_drop, axis=1)
            print(f"Fitting TabularPredictor for label: {label} ...")
            predictor.fit(train_data=train_data, tuning_data=tuning_data, **kwargs)
            self.predictors[label] = predictor.path
            if save_metrics:
                self.eval_metrics[label] = predictor.eval_metric
        self.save()

    def predict(self, data, **kwargs):
        return self._predict(data, as_proba=False, **kwargs)

    def predict_proba(self, data, **kwargs):
        return self._predict(data, as_proba=True, **kwargs)

    def evaluate(self, data, **kwargs):
        data = self._get_data(data)
        eval_dict = {}
        for label in self.labels:
            print(f"Evaluating TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            eval_dict[label] = predictor.evaluate(data, **kwargs)
            if self.consider_labels_correlation:
                data[label] = predictor.predict(data, **kwargs)
        return eval_dict

    def save(self):
        """ Save MultilabelPredictor to disk. """
        for label in self.labels:
            if not isinstance(self.predictors[label], str):
                self.predictors[label] = self.predictors[label].path
        save_pkl.save(path=self.path+self.multi_predictor_file, object=self)
        print(f"MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('{self.path}')")

    @classmethod
    def load(cls, path):
        """ Load MultilabelPredictor from disk `path` previously specified when creating this MultilabelPredictor. """
        path = os.path.expanduser(path)
        if path[-1] != os.path.sep:
            path = path + os.path.sep
        return load_pkl.load(path=path+cls.multi_predictor_file)

    def get_predictor(self, label):
        """ Returns TabularPredictor which is used to predict this label. """
        predictor = self.predictors[label]
        if isinstance(predictor, str):
            return TabularPredictor.load(path=predictor)
        return predictor

    def _get_data(self, data):
        if isinstance(data, str):
            return TabularDataset(data)
        return data.copy()

    def _predict(self, data, as_proba=False, **kwargs):
        data = self._get_data(data)
        if as_proba:
            predproba_dict = {}
        for label in self.labels:
            print(f"Predicting with TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            if as_proba:
                predproba_dict[label] = predictor.predict_proba(data, as_multiclass=True, **kwargs)
            data[label] = predictor.predict(data, **kwargs)
        if not as_proba:
            return data[self.labels]
        else:
            return predproba_dict


In [None]:

labels = ['target_carbon_monoxide'  ,  	'target_benzene' , 	'target_nitrogen_oxides']
problem_types = ['regression','regression','regression']  # type of each prediction problem
save_path = './saved_models/'  
time_limit = 600  

multi_predictor = MultilabelPredictor(labels=labels, problem_types=problem_types, path=save_path)
multi_predictor.fit(train_data, time_limit=time_limit)

In [None]:
test_data = TabularDataset('../input/tabular-playground-series-jul-2021/test.csv')
test_data ['date_time'] = pd.to_datetime(test_data ['date_time'])
test_data ['dayofweek'] = test_data ['date_time'].dt.dayofweek

test_data ['hourofday'] =test_data ['date_time'].dt.hour #Daily Seasonality
test_data ['time'] = test_data ['date_time'].astype(np.int64) #Yearly Trend

test_data 

In [None]:
preds = multi_predictor.predict(test_data)
preds

In [None]:
sub = test_data.copy()
sub_df = pd.read_csv('../input/tabular-playground-series-jul-2021/sample_submission.csv')

sub_df[labels] = preds
sub_df

In [None]:
sub_df.to_csv('./submission.csv' , index = False)