# Neural Network Training Proess

First, start with Inline plotting, imports, and global variables used:

In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

**Global vatiables list:**  
NAME: nickname for our model, must match the folder name in DL folder  
INP: Directory where we will read our input file  
DIR: Directory where we'll save model and export our parameters  
cat_vars: List of categorical variables in our model  
cont_vars: List of continous variables in our model  
QP: Quantization Parameter  
Layers: Number of neurons per hidden layer in our network  
Dropouts: Percentage of dropout rate per hidden layer  
BN_use: Use batch normalization if set to True  

In [None]:
# Imports and Global Variable Declarations
from fastai import *
from fastai.tabular import *

defaults.device='cuda'
NAME='blowing'
INP='./DL'
DIR='./DL/{0}'.format(NAME)
cat_vars = ['Height', 'Width']
cont_vars = ['top_left', 'top_center', 'top_right', 'left', 'center', 'right', 
             'bottom_left', 'bottom_center', 'bottom_right']
QP=22
Layers=[22, 20]
Dropouts=[0.001, 0.01]
BN_use=True

### Helper Functions:

Helper functions to help us keep the code concise  
get_cv_idx(): return random list of indices from a list given a percentage value "borrowed from FastAIv0.7"  
read_proc(): Reads the input file, normalize and categorify, and return FastAI TabularDataBunch and Learner methods  
export_mapper(): export means and stds used for normalization "to be used for inference"  
save_model(): saves our model for later use  
export_parameters(): export each layer's weights and biases "to be used for inference"  

In [None]:
# Helper Functions

def read_proc():
    df = pd.read_csv('{0}/SSE_{1}.csv'.format(INP, QP), names=cont_vars+cat_vars+['y'])
    procs = [Categorify]
    normz = Normalize(cat_vars, cont_vars)
    normz(df)
    data = (TabularList.from_df(df, cat_names=cat_vars, cont_names=cont_vars, procs=procs)
           .split_by_rand_pct(seed=42)
           .label_from_df(cols='y')
           .databunch(bs=1024))
    learn = tabular_learner(data, layers=Layers, metrics=accuracy, emb_drop=0.001, ps=Dropouts, 
                            use_bn=BN_use, model_dir="./DL/models")
    return df, normz, data, learn

def export_mapper():
    mapper_df = pd.DataFrame(index=['mean', 'std'], columns=cont_vars)
    for column in cont_vars:
        mapper_df[column].loc['mean'] = normz.means[column]
        mapper_df[column].loc['std'] = normz.stds[column]
    mapper_df.to_csv('{0}/{1}/mapper_{1}.csv'.format(DIR, QP), index=False, 
                     header=None, line_terminator=';\n')    
    return
    
def save_model():
    acc = learn.recorder.metrics[-1][0].numpy() * 100
    learn.save(f'QP{QP}_{NAME}_acc{(acc.round(2))}')
    return

def export_parameters():
    e = l = bn = 0
    bn_list = ['weight', 'bias', 'running_mean', 'running_var']
    for idx, layer in enumerate(learn.layer_groups[0]):
        if isinstance(layer, nn.Embedding):
            pd.DataFrame(learn.layer_groups[0][idx].weight.data.cpu().numpy()).to_csv(
                '{0}/{1}/emb{2}-weight.csv'.format(DIR, QP, e), index=False, 
                header=None, line_terminator= ',\n')
            e+=1
        if isinstance(layer, nn.Linear):
            pd.DataFrame(learn.layer_groups[0][idx].weight.data.cpu().numpy()).to_csv(
                '{0}/{1}/lins{2}-weight.csv'.format(DIR, QP, l), index=False, 
                header=None, line_terminator= ',\n')
            pd.DataFrame(learn.layer_groups[0][idx].bias.data.cpu().numpy()).to_csv(
                '{0}/{1}/lins{2}-bias.csv'.format(DIR, QP, l), index=False, 
                header=None, line_terminator=", ")
            l+=1
        if isinstance(layer, nn.BatchNorm1d):
            for i in bn_list:
                pd.DataFrame(getattr(learn.layer_groups[0][idx], i).data.cpu().numpy()).to_csv(
                    '{0}/{1}/bns{2}-{3}.csv'.format(DIR, QP, bn, i), index=False, 
                    header=None, line_terminator=', ')
            bn += 1
    return

In [None]:
df = pd.read_csv('{0}/SSE_{1}.csv'.format(INP, QP), names=cont_vars+cat_vars+['y'], nrows=501688)
procs = [Categorify]
normz = Normalize(cat_vars, cont_vars)
normz(df)

In [None]:
defaults.device='cuda'
data = (TabularList.from_df(df, cat_names=cat_vars, cont_names=cont_vars, procs=procs)
#        .split_by_rand_pct(seed=42)
        .split_by_idx(list(range(len(df)-round(len(df)*0.2), len(df))))
       .label_from_df(cols='y')
       .databunch(bs=2048))
# data.train_dl = data.train_dl.new(shuffle=False)

In [None]:
learn = tabular_learner(data, layers=Layers, metrics=accuracy, emb_drop=0.001, ps=Dropouts, 
                        use_bn=BN_use, model_dir="./DL/models")
learn = learn.to_fp16()

In [None]:
learn.model.layers[2].momentum=1
learn.model.layers[6].momentum=1
learn.model.bn_cont.momentum=1

In [None]:
learn.load('FP16-BS2k-M1-Acc34.94-L2.18');

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(1, 1e-2)

In [None]:
learn.save('FP16-BS2k-M1-Acc34.27-L2.214')

In [None]:
learn.fit_one_cycle(9, 1e-2)

In [None]:
learn.save('FP16-BS2k-M1-Acc34.9-L2.18')

In [None]:
learn.fit_one_cycle(8, 1e-2)

In [None]:
learn.save('FP16-BS2k-M1-Acc34.94-L2.18');

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit(1, 1e-2)

In [None]:
learn.fit_one_cycle(1, 1e-3)

In [None]:
export_mapper()
save_model()
export_parameters()

In [None]:
for QP in [27, 32, 37]:
    if 'df' in globals(): del df, valid_idx, data, learn
    df, valid_idx, data, learn = read_proc()
    learn.fit_one_cycle(50, 1e-2)
    export_mapper()
    save_model()
    export_parameters()

In [None]:
mapper=pd.read_csv('./DL/mapper_22.csv', names=cont_vars)
test = pd.read_csv('{0}/SSE_{1}.csv'.format(INP, QP), names=cont_vars+cat_vars+['y'], nrows=1e5)
for i in cont_vars:
    test[i]=(test[i]-mapper.iloc[0][i]) / mapper.iloc[1][i]
learn.data.add_test(TabularList.from_df(test, cat_names=cat_vars, cont_names=cont_vars, procs=procs))

In [None]:
learn.load('FP16-BS2k-M1-Acc34.94-L2.18');

In [None]:
test.head()

In [None]:
learn.predict(test.iloc[1])

In [None]:
learn.model.embeds[0].weight

In [None]:
TEST = learn.get_preds(DatasetType.Test)

In [None]:
TEST[1]

## Export To C++

We can export our model to C++ in PyTorch v1 using Torch JIT compiler. More details:  
https://pytorch.org/tutorials/advanced/cpp_export.html

In [None]:
learn.model.eval();

In [None]:
learn.load('FP16-BS2k-M1-Acc34.94-L2.18', device='cpu');

In [None]:
learn.model = learn.model.cpu()  # Transfer model to CPU - can't export model to C++ due to HalfTensors

In [None]:
learn.model.layers[0].bias

In [None]:
ex_cont = torch.cuda.HalfTensor([[1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 2, 3, 4, 5, 6, 7, 8, 9]])
ex_cont

In [None]:
ex_cat = torch.cuda.LongTensor([[5, 6], [5, 6]])
ex_cat

In [None]:
XXX = torch.jit.trace(learn.model, (ex_cat, ex_cont))

In [None]:
cat = torch.cuda.LongTensor([[7,7], [7,6]])
cont = torch.cuda.HalfTensor(
    [[3.071918, 1.946268, 1.806007, 1.771071, 2.639419, 3.387859, 3.766593, 6.594494, 4.977126],
     [2.073078, 1.368932, 1.566441, 0.901851, 1.519509, 2.410473, 2.532072, 4.585548, 3.595045]])

In [None]:
cat[0].size()

In [None]:
cat[0].unsqueeze(0).size()

In [None]:
outpt = XXX(cat[0].unsqueeze(0), cont[0].unsqueeze(0))    # Only one Sample! Has to be unsqueezed(0) first

In [None]:
outpt = XXX(cat, cont)    # Batch "at least 2 samples"

In [None]:
v,i = outpt.max(1)

In [None]:
i

In [None]:
learn.predict(df.iloc[0])

In [None]:
XXX.save('FP16-model.pt')