# 1. Importing Libraries

In [11]:
import pandas as pd
import numpy as np

import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from pytorch_tabnet.tab_model import TabNetClassifier
import imblearn as imb

import os

### Testing Tensorflow GPU

In [12]:
tf.test.is_built_with_cuda()

True

# 2. Project Variables

In [13]:
from functions import *

In [14]:
DATA_DIR = '../train-test-data'
NUM_FOLDS = 2
TASKS_TO_RUN = ['2aii', '2aiii']

# 3. Model Training

In [15]:
def train_tabnet(train, test, task):
    X_train, y_train = df_to_nparray(train, task)
    X_test, y_test = df_to_nparray(test, task)

    model = TabNetClassifier()
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    tp, fp, tn, fn = [0,0,0,0]
    for p,a in zip(preds, y_test):
        if (p == 'INCREASED RISK' and a == 'INCREASED RISK'): tp += 1
        elif (p == 'INCREASED RISK' and a == 'REDUCED RISK'): fp += 1
        elif (p == 'REDUCED RISK' and a == 'INCREASED RISK'): fn += 1
        else: tn += 1

    accuracy = (tp+tn)/(tp+tn+fp+fn)
    sensitivity = tp/(tp+fn)
    specificity = tn/(tn+fp)

    return -1, accuracy, sensitivity, specificity

In [10]:
metrics = {}

included_cols = ['CHILD_SEX','IDD_SCORE','AGE','HHID_count','HH_AGE','FOOD_EXPENSE_WEEKLY',
                 'NON-FOOD_EXPENSE_WEEKLY','HDD_SCORE','FOOD_INSECURITY','YoungBoys','YoungGirls',
                 'AverageMonthlyIncome','BEN_4PS','AREA_TYPE','FOOD_EXPENSE_WEEKLY_pc',
                 'NON-FOOD_EXPENSE_WEEKLY_pc','AverageMonthlyIncome_pc']

for task in TASKS_TO_RUN:
    metric = train_kfold(NUM_FOLDS, task, included_cols, train_nn)
    metrics[task] = metric

ValueError: The number of folds must be of Integral type. ['CHILD_SEX', 'IDD_SCORE', 'AGE', 'HHID_count', 'HH_AGE', 'FOOD_EXPENSE_WEEKLY', 'NON-FOOD_EXPENSE_WEEKLY', 'HDD_SCORE', 'FOOD_INSECURITY', 'YoungBoys', 'YoungGirls', 'AverageMonthlyIncome', 'BEN_4PS', 'AREA_TYPE', 'FOOD_EXPENSE_WEEKLY_pc', 'NON-FOOD_EXPENSE_WEEKLY_pc', 'AverageMonthlyIncome_pc'] of type <class 'list'> was passed.

In [18]:
for task in TASKS_TO_RUN:
    print(f'{task}: ACCURACY: {metrics[task]["ACCURACY"]["MEAN"]} SENSITIVITY: {metrics[task]["SENSITIVITY"]["MEAN"]} SPECIFICITY: {metrics[task]["SPECIFICITY"]["MEAN"]}')

2aii: ACCURACY: 0.706881719827652 SENSITIVITY: 0.7921538461538462 SPECIFICITY: 0.2833333333333333
2aiii: ACCURACY: 0.6767741978168488 SENSITIVITY: 0.819927536231884 SPECIFICITY: 0.15476190476190474


In [8]:
metrics['2aii']['ACCURACY']['ALL']

[61.29032373428345,
 67.7419364452362,
 74.19354915618896,
 69.9999988079071,
 73.33333492279053,
 80.0000011920929,
 80.0000011920929,
 73.33333492279053,
 83.33333134651184,
 83.33333134651184]

# Model Evaluation

### Note: This runs evaluates the models with the testing set. Run only at the end.