# 1. Importing Libraries

In [4]:
import pandas as pd
import numpy as np

import tensorflow as tf
from sklearn.model_selection import StratifiedKFold

import os

### Testing Tensorflow GPU

In [5]:
tf.test.is_built_with_cuda()

True

# 2. Project Variables

In [6]:
DATA_DIR = '../train-test-data'
NUM_FOLDS = 10
TASKS_TO_RUN = ['2aii', '2aiii']

# 3. Model Training

In [7]:
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, label, shuffle=True, batch_size=8):
    dataframe = dataframe.copy()
    dataframe['target'] = np.where(dataframe[label]=='INCREASED RISK', 1, 0)
    dataframe = dataframe.drop(columns=label)
    
    dataframe = dataframe.copy()
    labels = dataframe.pop('target')
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
        ds = ds.batch(batch_size)
    return ds

In [25]:
def train_kfold(task, included_cols):
    # Read train csv
    train_df = pd.read_csv(os.path.join(DATA_DIR, f'{task}_train.csv'), index_col=0)

    # Metric arrays
    acc_per_fold = []
    loss_per_fold = []
    
    kfold = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)
    fold_no = 1
    for train_idx, val_idx in kfold.split(train_df.drop(task, axis=1), train_df[[task]]):
        train = train_df.iloc[train_idx] 
        test = train_df.iloc[val_idx]

        # Generate feauture columns
        feature_columns = []
        for col in included_cols:
            feature_columns.append(tf.feature_column.numeric_column(col))

        # Generating a tensorflow dataset
        train_ds = df_to_dataset(train, task)
        test_ds = df_to_dataset(test, task)

        # Building model
        model = tf.keras.Sequential([
            tf.keras.layers.DenseFeatures(feature_columns),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1)
        ])

        model.compile(optimizer='adam',
                             loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                             metrics=['accuracy'])
        
        # Fitting Model
        history = model.fit(train_ds, 
                            epochs=10, 
                            verbose=1)

        # Evaluate Model
        scores = model.evaluate(test_ds, verbose=0)
        acc_per_fold.append(scores[1] * 100)
        loss_per_fold.append(scores[0])
        
        fold_no += 1
    
    metrics = {
        'ACCURACY': {
            'ALL': acc_per_fold,
            'MEAN': np.mean(acc_per_fold),
            'STDEV': np.std(acc_per_fold)
        }
    }
    return metrics

In [26]:
metrics = {}

included_cols = ['CHILD_SEX','IDD_SCORE','AGE','HHID_count','HH_AGE','FOOD_EXPENSE_WEEKLY',
                 'NON-FOOD_EXPENSE_WEEKLY','HDD_SCORE','FOOD_INSECURITY','YoungBoys','YoungGirls',
                 'AverageMonthlyIncome','BEN_4PS','AREA_TYPE','FOOD_EXPENSE_WEEKLY_pc',
                 'NON-FOOD_EXPENSE_WEEKLY_pc','AverageMonthlyIncome_pc']

for task in TASKS_TO_RUN:
    metric = train_kfold(task, included_cols)
    metrics[task] = metric

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [27]:
for task in TASKS_TO_RUN:
    print(f'{task}: {metrics[task]["ACCURACY"]["MEAN"]}')

2aii: 71.97849422693253
2aiii: 74.55913960933685


# Model Evaluation

### Note: This runs evaluates the models with the testing set. Run only at the end.

In [28]:
def train_and_test(task, included_cols, models):
    train_df = pd.read_csv(os.path.join(DATA_DIR, f'{task}_train.csv'), index_col=0)
    test_df = pd.read_csv(os.path.join(DATA_DIR, f'{task}_test.csv'), index_col=0)
    
    # Generate feauture columns
    feature_columns = []
    for col in included_cols:
        feature_columns.append(tf.feature_column.numeric_column(col))

    # Generating a tensorflow dataset
    train_ds = df_to_dataset(train_df, task)
    test_ds = df_to_dataset(test_df, task)

    # Building model
    models[task] = tf.keras.Sequential([
        tf.keras.layers.DenseFeatures(feature_columns),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1)
    ])

    models[task].compile(optimizer='adam',
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    # Fitting Model
    history = models[task].fit(train_ds, 
                        epochs=10, 
                        verbose=1)

    # Evaluate Model
    scores = models[task].evaluate(test_ds, verbose=0)
    
    metrics = {
        'ACCURACY': scores[1]*100
    }
    return metrics

In [29]:
models = {}
metrics = {}
included_cols = ['CHILD_SEX','IDD_SCORE','AGE','HHID_count','HH_AGE','FOOD_EXPENSE_WEEKLY',
                 'NON-FOOD_EXPENSE_WEEKLY','HDD_SCORE','FOOD_INSECURITY','YoungBoys','YoungGirls',
                 'AverageMonthlyIncome','BEN_4PS','AREA_TYPE','FOOD_EXPENSE_WEEKLY_pc',
                 'NON-FOOD_EXPENSE_WEEKLY_pc','AverageMonthlyIncome_pc']

for task in TASKS_TO_RUN:
    metric = train_and_test(task, included_cols, models)
    metrics[task] = metric

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [30]:
for task in TASKS_TO_RUN:
    print(f'{task}: {metrics[task]["ACCURACY"]}')

2aii: 69.63696479797363
2aiii: 78.54785323143005
