# TabNet

#### Load Modules

In [7]:
%load_ext autoreload
%autoreload 2

In [8]:
import torch
import torch.nn as nn
import h5py
import numpy as np

In [None]:
from data.data_loader import NISDatabase
from utils.experiments import find_nlike_features, create_onehot_info
from utils.feature_utils import calc_output_dims

In [3]:
import os
os.chdir('/home/aisinai/work/repos/nis_patient_encoding/')

#### Set Initial Parameters

In [2]:
# device = torch.device('cuda:7')
device = torch.device('cpu')

In [None]:
DATA_FOLDER = 'data/raw/'
INPUT_FEATURES = {
    'AGE' : {'type': 'continuous', 'rep_func': None},
    'FEMALE' : {'type': 'one-hot', 'rep_func': None},
    'HCUP_ED' : {'type': 'one-hot', 'rep_func': None},
    'TRAN_IN' : {'type': 'one-hot', 'rep_func': None},
    'ELECTIVE' : {'type': 'one-hot', 'rep_func': None},
    'ZIPINC_QRTL' : {'type': 'one-hot', 'rep_func': None},
    'DXn' : {'type': 'embedding', 'rep_func': None},
    'ECODEn' : {'type': 'embedding', 'rep_func': None},
    'PRn' : {'type': 'embedding', 'rep_func': None},
}

DEFAULT_BUILD = {

    'encoding' : {
        'total_layers' : 3,
        'dimensions' : 128,
        'activation' : 'leaky_relu',
    },

    'latent' : {'dimensions' : 32},

    'decoding' : {
        'dimensions' : 128,
        'activation' : 'leaky_relu',
        'total_layers' : 3,
        'output_dims' : None
    }
}

EMBEDDING_DICTIONARY = {
    'DXn' : {
        'header_prefix' : b'DX',
        'num_classes' : 12583,
        'dimensions' : 256,
    },

    'PRn' : {
        'header_prefix' : b'PR',
        'num_classes' : 4445,
        'dimensions' : 64,
    },

    'ECODEn' : {
        'header_prefix' : b'ECODE',
        'num_classes' : 4445,
        'dimensions' : 32,
    },

    'CHRONBn' : {
        'header_prefix' : b'CHRONB',
        'num_classes' : 19,
        'dimensions' : 16,
    }
}

ONE_HOT_LIST = [b'ELECTIVE', b'FEMALE', b'HCUP_ED', b'TRAN_IN', b'ZIPINC_QRTL']
ONE_HOTS = { feature.decode('utf-8') : {} for feature in ONE_HOT_LIST }
ONE_HOTS['ELECTIVE']['num_classes'] = 2 # 0,1
ONE_HOTS['FEMALE']['num_classes'] = 2 # 0,1
ONE_HOTS['TRAN_IN']['num_classes'] = 3 # 0-2
ONE_HOTS['HCUP_ED']['num_classes'] = 5 # 0-4
ONE_HOTS['ZIPINC_QRTL']['num_classes'] = 5 # 1-4

CONTINUOUS = {'AGE' : {'feature_idx': 0} }

FEATURE_REPRESENTATIONS = {}
FEATURE_REPRESENTATIONS['embedding'] = EMBEDDING_DICTIONARY
FEATURE_REPRESENTATIONS['one_hots'] = ONE_HOTS
FEATURE_REPRESENTATIONS['continuous'] = CONTINUOUS

In [None]:
BATCH_SIZE = 512
NUM_WORKERS = 4
LEARNING_RATE = 5e-4
NUM_EPOCHS = 100

#### Load and modify data.

In [None]:
db = NISDatabase(DATA_FOLDER + 'NIS_2012_2014_proto_emb.h5', 'TRAIN', {})

In [None]:
# Perform all relevant feature modifications
find_nlike_features(db.headers, FEATURE_REPRESENTATIONS['embedding'])
FEATURE_REPRESENTATIONS['one_hots'] = create_onehot_info(db, FEATURE_REPRESENTATIONS['one_hots'], FEATURE_REPRESENTATIONS['embedding'])
DEFAULT_BUILD['features'] = FEATURE_REPRESENTATIONS
calc_output_dims(DEFAULT_BUILD)