# Deep AutoViML is a brand-new AutoML library for building deep learning models using tensorflow and keras using a single line of code!
## It will automatically do the following:
- Load a wide variety of performant DNN architectures such as deep and wide, deep and cross models, etc.
- Use a hypertuner named Storm-Tuner to select the best hyper params for each of the model architectures
- Select the best model and add pre-processing layers for feature transformation and do selective feature engineering
- For NLP tasks: it select a BERT or USE model along with text processors
- Train best model and run predictions using the trained model
- You can automatically save the model with its preprocessing layers and load it elsewhere or serve it using tf.serving on Cloud providers
## For github visit: [deep_autoviml](https://github.com/AutoViML/deep_autoviml)

In [None]:
import pandas as pd
import numpy as np
from load_kaggle import load_kaggle

In [None]:
subm, train, test = load_kaggle()
print(train.shape, test.shape, subm.shape)
train.head()

In [None]:
target = ['target_carbon_monoxide','target_benzene','target_nitrogen_oxides']
project_name = 'July_TPS'

# Install Deep AutoViML here

In [None]:
!pip install featurewiz --upgrade

In [None]:
!pip install deep_autoviml

In [None]:
import featurewiz as FW

In [None]:
from deep_autoviml import deep_autoviml as deepauto

In [None]:
### Let's split tyhe date time column into multiple fields using featurewiz
date_col = 'date_time'
train_rem_cols = [x for x in list(train) if x not in target+[date_col]]
len(train_rem_cols)

# Let's add 19 time series features using Featurewiz

In [None]:
ts_outs = FW.FE_create_time_series_features(train, date_col)

In [None]:
### Let us see what features have been created from that one column ##
ts_outs[0].head(1), ts_outs[1]

In [None]:
ts_cols = ts_outs[1]
train = ts_outs[0]

In [None]:
test, ts_cols = FW.FE_create_time_series_features(test, date_col, ts_cols)
test.shape

In [None]:
train_target = train[target].values
train_target.shape

In [None]:
train = train[ts_cols+train_rem_cols]
train.shape

In [None]:
test = test[ts_cols+train_rem_cols]
test.shape

In [None]:
train[target] = train_target
print(train.shape)
train.head(1)

# Let's run Deep AutoViML with 26 features for 3 targets

In [None]:
######   D E F A U L T S    S E T T I N G S   F O R   D E E P    A U T O  V I M L ###
keras_model_type = "fast" ## always try "fast" first, then "fast1", "fast2", etc.
### always set early_stopping to True first and then change it to False
#### You always need 15 max_trials to get something decent #####
#### always set tuner to "storm" and then "optuna". 
### NLP char limit kicks off NLP processing. Feature Cross later.
model_options = {'nlp_char_limit':50, 'cat_feat_cross_flag':False,
                 'max_trials': 10, "tuner": "storm"}
keras_options = {"patience":10, 'class_weight': True, 'early_stopping': True, 
                 'lr_scheduler': '', "optimizer": 'RMS'}

In [None]:
model, cat_vocab_dict = deepauto.fit(train, target, keras_model_type=keras_model_type,
		project_name=project_name, keras_options=keras_options,  
		model_options=model_options, save_model_flag=False, use_my_model='',
		model_use_case='', verbose=1)

In [None]:
predictions = deepauto.predict(model, project_name, test_dataset=test,
                                 keras_model_type=keras_model_type, 
                                 cat_vocab_dict=cat_vocab_dict)

In [None]:
y_preds = predictions[-1]
y_preds[:5]

In [None]:
subm[target] = y_preds
subm.head()

In [None]:
#subm.to_csv('fast_submission.csv', index=False)

# Since the model did very well on the first 2 targets, but did poorly on the third target, we will build a separate model for the third target variable
You will notice that the rows are not shuffled well for the 'target_nitrogen_oxides'. Let us shuffle the dataset and see.

In [None]:
drop_cols = ['target_carbon_monoxide','target_benzene']

In [None]:
train2 = train.sample(frac=1.0).sample(frac=1.0).drop(drop_cols, axis=1)
train2.head(2)

In [None]:
target2 = 'target_nitrogen_oxides'
keras_model_type =  "fast" ### let's try "fast", "fast1", "fast2" in that order

In [None]:
model2, cat_vocab_dict2 = deepauto.fit(train2, target2, keras_model_type=keras_model_type,
		project_name=project_name, keras_options=keras_options,  
		model_options=model_options, save_model_flag=True, use_my_model='',
		model_use_case='', verbose=1)

In [None]:
predictions2 = deepauto.predict(model2, project_name, test_dataset=test,
                                 keras_model_type=keras_model_type, 
                                 cat_vocab_dict=cat_vocab_dict2)

In [None]:
y_preds2 = predictions2[-1]
y_preds2[:5]

In [None]:
subm[target2] = y_preds2
subm.head()

In [None]:
subm.to_csv('submission_combo.csv', index=False)