In [7]:
import pandas as pd
import numpy as np

import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE 

import os
import shutil

In [8]:
tf.test.is_built_with_cuda()

True

In [9]:
from functions import *

In [10]:
DATA_DIR = '../train-test-data'
features = ['CHILD_SEX','IDD_SCORE','AGE','HHID_count','HH_AGE','FOOD_EXPENSE_WEEKLY',
                 'NON-FOOD_EXPENSE_WEEKLY','HDD_SCORE','FOOD_INSECURITY','YoungBoys','YoungGirls',
                 'AverageMonthlyIncome','BEN_4PS','AREA_TYPE','FOOD_EXPENSE_WEEKLY_pc',
                 'NON-FOOD_EXPENSE_WEEKLY_pc','AverageMonthlyIncome_pc']
train_df = pd.read_csv(os.path.join(DATA_DIR, '2aii_train.csv'), index_col = 0)
label = '2aii'

OUT_DIR = '../preliminary-image-data/2aii'

In [11]:
def train_images(train, test, label):
    dir = os.path.join(OUT_DIR)
    if not os.path.isdir(dir):
        os.mkdir(dir)
        
    dir = os.path.join(OUT_DIR, 'train')
    if not os.path.isdir(dir):
        os.mkdir(dir)
        
    dir = os.path.join(OUT_DIR, 'test')
    if not os.path.isdir(dir):
        os.mkdir(dir)
    sm = SMOTE(random_state = 42)
    X_train = train[features]
    y_train = train['2aii']

    X_train_oversampled, y_train_oversampled = sm.fit_resample(X_train, y_train)
    sm_train = pd.DataFrame(X_train_oversampled, columns=X_train.columns)
    sm_train['2aii'] = y_train_oversampled
    
    
    def sigmoid(x):
        return 1/(1+np.exp(-x))

    # Normalizing Variables
    normalize = ['IDD_SCORE','AGE','HHID_count','HH_AGE','FOOD_EXPENSE_WEEKLY',
                'NON-FOOD_EXPENSE_WEEKLY','HDD_SCORE','FOOD_INSECURITY','YoungBoys','YoungGirls',
                'AverageMonthlyIncome','FOOD_EXPENSE_WEEKLY_pc','NON-FOOD_EXPENSE_WEEKLY_pc',
                 'AverageMonthlyIncome_pc']
    train_normal = sm_train.copy()
    test_normal = test.copy()
    for f in normalize:
        train_normal[f] = sigmoid((train_normal[f]-train_normal[f].mean())/train_normal[f].std())
        test_normal[f] = sigmoid((test_normal[f]-test_normal[f].mean())/test_normal[f].std())

    train_normal['BEN_4PS'] = train_normal['BEN_4PS']-1 
    test_normal['BEN_4PS'] = test_normal['BEN_4PS']-1 

    train_normal['label'] = np.where(train_normal['2aii']=="INCREASED RISK", 1, 0)
    test_normal['label'] = np.where(test_normal['2aii']=="INCREASED RISK", 1, 0)
    
    
    IMG_SIZE = (160, 160)
    
    # Converting to Image
    n = len(features)
    w, h = IMG_SIZE
    nw = 5
    nh = (n+nw-1)//nw

    for index, row in train_normal.iterrows():
        img = Image.new("RGB", IMG_SIZE)
        for i in range(0,nh):
            for j in range(0,nw):
                idx = i*nw+j
                if idx >= n: break 
                val = int(sigmoid(row[features[idx]])*255)

                r = ImageDraw.Draw(img)
                x = i*(h//nh)
                y = j*(w//nw)
                r.rectangle([(y,x), (y+w//nw, x+h//nh)], fill=(val,val,val))
        
        dir = os.path.join(OUT_DIR,'train', str(row['label']))
        if not os.path.isdir(dir):
            os.mkdir(dir)
        img.save(os.path.join(dir, f'{index}.png'))
    
    for index, row in test_normal.iterrows():
        img = Image.new("RGB", IMG_SIZE)
        for i in range(0,nh):
            for j in range(0,nw):
                idx = i*nw+j
                if idx >= n: break 
                val = int(sigmoid(row[features[idx]])*255)

                r = ImageDraw.Draw(img)
                x = i*(h//nh)
                y = j*(w//nw)
                r.rectangle([(y,x), (y+w//nw, x+h//nh)], fill=(val,val,val))

        dir = os.path.join(OUT_DIR,'test', str(row['label']))
        if not os.path.isdir(dir):
            os.mkdir(dir)
        img.save(os.path.join(dir, f'{index}.png'))
    
    # Converting to TF Dataset
    train_ds = tf.keras.utils.image_dataset_from_directory(os.path.join(OUT_DIR, 'train'),
                                                           shuffle=True,
                                                           batch_size=8,
                                                           image_size=IMG_SIZE)
    test_ds = tf.keras.utils.image_dataset_from_directory(os.path.join(OUT_DIR, 'test'),
                                                           shuffle=True,
                                                           batch_size=8,
                                                           image_size=IMG_SIZE)
    
    
    # Building Model
    preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
    rescale = tf.keras.layers.Rescaling(1./127.5, offset=-1)
    IMG_SHAPE = IMG_SIZE + (3,)
    base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                                   include_top=False,
                                                   weights='imagenet')


    image_batch, label_batch = next(iter(train_ds))
    feature_batch = base_model(image_batch)
    print(feature_batch.shape)

    global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
    feature_batch_average = global_average_layer(feature_batch)
    print(feature_batch_average.shape)

    prediction_layer = tf.keras.layers.Dense(1, activation='sigmoid')
    prediction_batch = prediction_layer(feature_batch_average)
    print(prediction_batch.shape)

    inputs = tf.keras.Input(shape=(160, 160, 3))
    x = preprocess_input(inputs)
    x = base_model(x, training=False)
    x = global_average_layer(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    outputs = prediction_layer(x)
    model = tf.keras.Model(inputs, outputs)
    
    base_learning_rate = 0.00001
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=['accuracy', 
                           tf.keras.metrics.TruePositives(),
                           tf.keras.metrics.TrueNegatives(),
                           tf.keras.metrics.FalsePositives(),
                           tf.keras.metrics.FalseNegatives()])
    
    history = model.fit(train_ds, epochs=10)
    output = np.asarray([x[0] for x in model.predict(test_ds)])
    predicted = np.where(output >= 0.5, 1, 0)

    shutil.rmtree(OUT_DIR)
    return predicted

In [12]:
metrics = train_kfold(train_df, label, 10, train_images)
metrics

Found 450 files belonging to 2 classes.
Found 31 files belonging to 2 classes.
(8, 5, 5, 1280)
(8, 1280)
(8, 1)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Found 452 files belonging to 2 classes.
Found 31 files belonging to 2 classes.
(8, 5, 5, 1280)
(8, 1280)
(8, 1)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Found 452 files belonging to 2 classes.
Found 31 files belonging to 2 classes.
(8, 5, 5, 1280)
(8, 1280)
(8, 1)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Found 452 files belonging to 2 classes.
Found 30 files belonging to 2 classes.
(8, 5, 5, 1280)
(8, 1280)
(8, 1)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Found 452 files belonging to 2 classes.
Found 30 files belonging to 2 classes.
(8, 5, 5, 1280)
(8, 1280)
(8,

{'ACCURACY': {'ALL': [0.5806451612903226,
   0.5806451612903226,
   0.6451612903225806,
   0.7333333333333333,
   0.8,
   0.43333333333333335,
   0.6333333333333333,
   0.6666666666666666,
   0.7333333333333333,
   0.6666666666666666],
  'MEAN': 0.6473118279569893,
  'STDEV': 0.09681838895249781},
 'SENSITIVITY': {'ALL': [0.6538461538461539,
   0.72,
   0.76,
   0.76,
   0.92,
   0.4,
   0.64,
   0.76,
   0.8,
   0.8],
  'MEAN': 0.7213846153846154,
  'STDEV': 0.13067408303229588},
 'SPECIFICITY': {'ALL': [0.2,
   0.0,
   0.16666666666666666,
   0.6,
   0.2,
   0.6,
   0.6,
   0.2,
   0.4,
   0.0],
  'MEAN': 0.29666666666666663,
  'STDEV': 0.22531459488161584}}