In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import datetime as dt
import pandas as pd

> # Data Prep

In [None]:
!pip install dataprep

In [None]:
from dataprep.eda import plot, plot_correlation, create_report, plot_missing

In [None]:
df = pd.read_csv('/kaggle/input/titanic/train.csv')
df

In [None]:
plot(df)

In [None]:
create_report(df)

In [None]:
plot(df, "Age")

In [None]:
plot(df, "Age", "Embarked")

> # Autoviz

In [None]:
df = pd.read_csv('../input/titanic/train.csv')
df.head()

In [None]:
! pip install Autoviz

In [None]:
! pip install xlrd

In [None]:
from autoviz.AutoViz_Class import AutoViz_Class

AV = AutoViz_Class()
dftc = AV.AutoViz(
    filename='', 
    sep='' , 
    depVar='Embarked', 
    dfte=df, 
    header=0, 
    verbose=1, 
    lowess=False, 
    chart_format='png', 
    max_rows_analyzed=300000, 
    max_cols_analyzed=30
)

> # Pandas Profiling

In [None]:
from pandas_profiling import ProfileReport

In [None]:
report = ProfileReport(df)
report

# SweetViz

In [None]:
!pip install sweetviz

In [None]:
import sweetviz as sv

In [None]:
advert_report = sv.analyze([df, 'Data'])

In [None]:
## define configuration
PATH_TRAIN = '../input/house-prices-advanced-regression-techniques/train.csv'
PATH_TEST = '../input/house-prices-advanced-regression-techniques/test.csv'

PATH_AUTOGLUON_SUBMISSION = 'submission_autogluon.csv'
PATH_EVALML_SUBMISSION = 'submission_evalml.csv'
PATH_FLAML_SUBMISSION = 'submission_flaml.csv'
PATH_H2OAML_SUBMISSION = 'submission_h2oaml.csv'
PATH_LAML_SUBMISSION = 'submission_laml.csv'

MAX_MODEL_RUNTIME_MINS = 15
MAX_MODEL_RUNTIME_SECS = MAX_MODEL_RUNTIME_MINS * 60

In [None]:
## prepare data
import gc
import os
import shutil
import datatable as dt
from pathlib import Path
import warnings

warnings.filterwarnings('ignore')

train = dt.fread(PATH_TRAIN)[:100000, :]
test = dt.fread(PATH_TEST)

target = train['SalePrice'].to_numpy().ravel()
test_ids = test['Id']

del train[:, ['Id', 'SalePrice']]
test = test[:, train.names]

> # AutoGluon ML

In [None]:
## install packages
!python3 -m pip install -q "mxnet<2.0.0"
!python3 -m pip install -q autogluon
!python3 -m pip install -q -U graphviz
!python3 -m pip install -q scikit-learn==0.24.2

In [None]:
! pip install delayed

In [None]:
## import packages
from autogluon.tabular import TabularPredictor

In [None]:
## run model
train['SalePrice'] = dt.Frame(target)

model_autogluon = TabularPredictor(label='SalePrice')
model_autogluon.fit(train_data=train.to_pandas(), excluded_model_types=['KNN'], time_limit=MAX_MODEL_RUNTIME_SECS)

del train['SalePrice']

In [None]:
## generate predictions
preds_autogluon = model_autogluon.predict_proba(test.to_pandas())

In [None]:
## create submission
submission = dt.Frame(id=test_ids, target=dt.Frame(preds_autogluon))
submission.head()

In [None]:
## clear memory
shutil.rmtree('AutogluonModels')
del model_autogluon

gc.collect()

> # H2o AutoML

In [None]:
## import packages
import h2o
from h2o.automl import H2OAutoML

In [None]:
## prepare data
h2o.init()

h2o_train = h2o.H2OFrame(train.to_pandas())
h2o_test = h2o.H2OFrame(test.to_pandas())

h2o_train['target'] = h2o.H2OFrame(target).asfactor()

In [None]:
## run model
features = [x for x in h2o_train.columns if x != 'target']

model_h2o = H2OAutoML(stopping_metric='AUC', max_runtime_secs=MAX_MODEL_RUNTIME_SECS)
model_h2o.train(x=features, y='target', training_frame=h2o_train)

In [None]:
## generate predictions
preds_h2o = model_h2o.leader.predict(h2o_test).as_data_frame()

In [None]:
preds_h2o

In [None]:
## create submission
submission = dt.Frame(id=test_ids, claim=dt.Frame(preds_h2o['predict']))
submission.head()

> # LightAuto ML

In [None]:
## install packages
!python3 -m pip install -q lightautoml
!python3 -m pip install -q -U torch
!python3 -m pip install -q -U torchvision

In [None]:
## import packages
from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

In [None]:
## run model
train['target'] = dt.Frame(target)

model_laml = TabularAutoML(task = Task('reg'), timeout = MAX_MODEL_RUNTIME_SECS)
model_laml.fit_predict(train_data=train.to_pandas(), roles={'target': 'target'})

del train['target']

In [None]:
## generate predictions
preds_laml = model_laml.predict(test.to_pandas()).data.ravel()

In [None]:
## create submission
submission = dt.Frame(id=test_ids, claim=preds_laml)
submission.head()

# Work in progress