In [None]:
pip install lightautoml -q

# <p style="background-color:#80ccff; font-family:newtimeroman; font-size:150%; text-align:center; border-radius:  80px 5px; padding-top:8px; padding-bottom:8px;">Preprocessing</p>

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(10,6)})
sns.set(font_scale=1.3)
sns.color_palette("light:#5A9", as_cmap=True)
plt.style.use('fivethirtyeight')

from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
from lightautoml.tasks import Task

from sklearn.metrics import log_loss

import warnings
warnings.filterwarnings('ignore')

In [None]:
train = pd.read_csv('../input/tabular-playground-series-jun-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-jun-2021/test.csv')
sub = pd.read_csv('../input/tabular-playground-series-jun-2021/sample_submission.csv')

In [None]:
train.head()

In [None]:
# Buscando por valores ausentes

In [None]:
train.info()

In [None]:
train.drop(columns=['id']).describe().T.style.bar(subset=['mean'], color='#03a326')\
                            .background_gradient(subset=['std'], cmap='Reds')\
                            .background_gradient(subset=['50%'], cmap='Blues')

In [None]:
test.drop(columns=['id']).describe().T.style.bar(subset=['mean'], color='#fcf45d')\
                            .background_gradient(subset=['std'], cmap='Reds')\
                            .background_gradient(subset=['50%'], cmap='Blues')

In [None]:
train['target'] = train['target'].str.slice(start=6).astype(int)-1

# <p style="background-color:#80ccff; font-family:newtimeroman; font-size:150%; text-align:center; border-radius:  80px 5px; padding-top:8px; padding-bottom:8px;">Plots</p>

In [None]:
plt.figure(figsize=(12,6))
contagem = train['target'].value_counts()

ax = sns.countplot(data=train, x='target', order=train['target'].value_counts().index)

for c in ax.patches:
    ax.text(c.get_x()-.1 + c.get_width() / 3, c.get_height() + 2, c.get_height(), fontsize='16')
    
ax.set_xlabel('Number of Class', fontsize = 15)
ax.set_ylabel('Count', fontsize = 15);

In [None]:
plt.figure(figsize=(12,6))
ax = sns.countplot(data=train, x='target', order=train['target'].value_counts().index)
totals = []

for i in ax.patches:
    totals.append(i.get_height())
    
total = sum(totals)

for i in ax.patches:
    ax.text(i.get_x()-.03, i.get_height(), str(round((i.get_height()/total)*100, 2))+'%', fontsize=15)
    
ax.set_xlabel('% of Class', fontsize = 15)
ax.set_ylabel('Count', fontsize = 15);

# <p style="background-color:#80ccff; font-family:newtimeroman; font-size:150%; text-align:center; border-radius:  80px 5px; padding-top:8px; padding-bottom:8px;">AutoML</p>

In [None]:
task = Task('multiclass')
roles = {'target':'target','drop':['id']}

In [None]:
automl = TabularUtilizedAutoML(task=task, timeout=10800, cpu_limit= -1,
                              general_params = {'use_algos': [['lgb', 'cb', 'cb_tuned'], ['linear_l2', 'cb']]},
                               reader_params = {'n_jobs': 12})

pred = automl.fit_predict(train, roles=roles)

In [None]:
previsao = automl.predict(test)

In [None]:
sub.iloc[:, 1:] = previsao.data
sub.to_csv('sub1.csv', index=False)
sub