### This notebook is derived from the following notebook by Divya. Our many thanks!!
https://www.kaggle.com/dgladha/lib-classification

# deep_autoviml is an AutoML library for building deep learning models using tensorflow and keras using a single line of code.
## For github visit: [deep_autoviml](https://github.com/AutoViML/deep_autoviml)
## It will automatically perform the following given train and test:
- Load a wide variety of performant DNN architectures such as deep and wide, deep and cross models, etc.
- Use a hypertuner named Storm-Tuner to select the best hyper params for each of the model architectures
- Select the best model and add pre-processing layers for feature transformation and do selective feature engineering
- For NLP tasks: it select a BERT or USE model along with text processors
- Train best model and run predictions using the trained model
- You can automatically save the model with its preprocessing layers and load it elsewhere or serve it using tf.serving on Cloud providers

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split

In [None]:
df_LIB = pd.read_csv("/kaggle/input/crystal-system-properties-for-liion-batteries/lithium-ion batteries.csv")
datapath = '../Ram/Data_Sets/'
filename = 'LIB.csv'
train_datafile = datapath + filename
#df_LIB = pd.read_csv(train_datafile)
print(df_LIB.shape)
df_LIB.head()

# Pip Install Deep AutoViML library here

In [None]:
!pip install deep_autoviml --upgrade

In [None]:
!pip install chemparse
import chemparse

In [None]:
LIB_dict = df_LIB.Formula.apply(chemparse.parse_formula)
LIB_dict = pd.json_normalize(LIB_dict)
LIB_dict = LIB_dict.fillna(0)
df_LIB = df_LIB.join(LIB_dict)

In [None]:
df_LIB.head()

In [None]:
df_LIB = df_LIB.drop(['Materials Id', 'Formula'], axis =1)

In [None]:
train, test = train_test_split(df_LIB, test_size=0.1, random_state=99)
print(train.shape, test.shape)

# Let's visualize the dataset using AutoViz

!pip install autoviz
!pip install xlrd
#importing Autoviz class
from autoviz.AutoViz_Class import AutoViz_Class

In [None]:
target = 'Crystal System'

av = AutoViz_Class()
df_autoviz = av.AutoViz(filename="",dfte=df_LIB, depVar=target)

# we will now use deep_autoviml to see how well it performs on this tiny dataset

In [None]:
from deep_autoviml import deep_autoviml as deepauto

In [None]:
######   D E F A U L T S    S E T T I N G S   F O R   D E E P    A U T O  V I M L ###
keras_model_type =  "fast1" ## always try "fast" first, then "fast2", "auto", etc.
### always set early_stopping to True first and then change it to False
#### You always need 15 max_trials to get something decent #####
#### always set tuner to "storm" and then "optuna". 
### NLP char limit kicks off NLP processing. Feature Cross later.
project_name = "LiB"
model_options = {'nlp_char_limit':50, 'cat_feat_cross_flag':False,
                 'max_trials': 10, "tuner": "storm"}
keras_options = {"patience":10, 'class_weight': True, 'early_stopping': True, 
                 'lr_scheduler': '', "optimizer": 'RMS'}


In [None]:
model, cat_vocab_dict = deepauto.fit(train, target, keras_model_type=keras_model_type,
		project_name=project_name, keras_options=keras_options,  
		model_options=model_options, save_model_flag=False, use_my_model='',
		model_use_case='', verbose=1)

In [None]:
predictions = deepauto.predict(model, project_name, test_dataset=test,
                                 keras_model_type=keras_model_type, 
                                 cat_vocab_dict=cat_vocab_dict)

In [None]:
y_preds = predictions[-1]
y_preds[:4]

In [None]:
y_test = test[target].values
y_test[:4]

In [None]:
from deep_autoviml import print_classification_model_stats, print_regression_model_stats
print_classification_model_stats(y_test, y_preds)

#### That's how you get 96% balanced accuracy on a very tiny but very difficult dataset!