# Pain Data Preparation
This notebook prepares the pain dataset in to be able to successfully train a convolutional neural network. Data augmentation techniques such as greyscaling, histogram equalization, etc. are employed.

In [1]:
# Relevant imports
import os
import sys
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import math
import tensorflow as tf

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from Scripts import Data_Loader_Functions as DL
from Scripts import Image_Processor as IP

In [2]:
# Define folder paths
RAW_DATA = os.path.join(module_path, "Data", "Raw Data", "Pain")
AUGMENTED_DATA = os.path.join(module_path, "Data", "Augmented Data", "Pain")
AUGMENTED_DATA_TWOSTEP = os.path.join(module_path, "Data", "Augmented Data", "Pain Two-Step Augmentation")
AUGMENTED_DATA_FLEXIBLE = os.path.join(module_path, "Data", "Augmented Data", "Flexible Augmentation")

In [3]:
# Set Seed
np.random.seed(123)

## Explore Data

In [None]:
# Get original pain distribution
img_paths = np.array(DL.get_image_paths(RAW_DATA))
labels = np.array(DL.get_labels(img_paths))
no_pain_labels = labels[labels[:,4].astype(int)==0]
pain_labels = labels[labels[:,4].astype(int)>0]
print("Pain Labels:", len(pain_labels))
print("No Pain Labels:", len(no_pain_labels))

In [None]:
# Get number of clients per group
g1_img_paths = [x for x in os.listdir(os.path.join(RAW_DATA, "group_1")) if x != '.DS_Store']
g2_img_paths = [x for x in os.listdir(os.path.join(RAW_DATA, "group_2")) if x != '.DS_Store']
print("Group 1:", len(g1_img_paths))
print("Group 2:", len(g2_img_paths))

In [None]:
# Get number of sessions per client
g1_img_paths = np.array(DL.get_image_paths(os.path.join(RAW_DATA, "group_1")))
g2_img_paths = np.array(DL.get_image_paths(os.path.join(RAW_DATA, "group_2")))
g1_labels = np.array(DL.get_labels(g1_img_paths))
g2_labels = np.array(DL.get_labels(g2_img_paths))
df_1 = pd.DataFrame(g1_labels, columns=['Person','Session','Culture','Frame','Pain']).astype(int)
df_2 = pd.DataFrame(g2_labels, columns=['Person','Session','Culture','Frame','Pain']).astype(int)
df_1['Group'] = 1
df_2['Group'] = 2
df = pd.concat([df_1, df_2])
sess_num = pd.DataFrame(df.groupby(['Person', 'Group'])['Session'].nunique()).sort_values(['Group','Person'])
sess_num

In [None]:
# Average number of sessions per group
print("Average Sessions Group 1: {0:.2f}".format(df_1.groupby('Person')['Session'].nunique().mean()))
print("Average Sessions Group 2: {0:.2f}".format(df_2.groupby('Person')['Session'].nunique().mean()))

In [None]:
# Pain / No Pain per group
print("Group 1 Pain/No Pain/Ratio: {} | {}".format(df[(df['Group'] == 1) & (df['Pain'] == 0)].count()[0], df[(df['Group'] == 1) & (df['Pain'] > 0)].count()[0]))
print("Group 2 Pain/No Pain/Ratio: {} | {}".format(df[(df['Group'] == 2) & (df['Pain'] == 0)].count()[0], df[(df['Group'] == 2) & (df['Pain'] > 0)].count()[0]))

## Process Images
We will now process the images. Preprocessing includes converting to greyscale, and histogram equalization.

In [None]:
# Preprocess images
IP.bulk_process_images(RAW_DATA, PREPROCESSED_DATA, ".jpg")

In [None]:
# Flip images and copy originals into augmented data folder
IP.bulk_augment_images(PREPROCESSED_DATA, AUGMENTED_DATA_TWOSTEP, ".jpg", "flip", "pain", label_threshold=-1)
IP.bulk_augment_images(PREPROCESSED_DATA, AUGMENTED_DATA_TWOSTEP, ".jpg", "original", "pain", label_threshold=-1)

In [None]:
# Rotate Originals and flipped images, and ensure that naming conventions stay consistent
IP.bulk_augment_images(AUGMENTED_DATA_TWOSTEP, AUGMENTED_DATA_TWOSTEP, "_flipped.jpg", "rotate_crop", "pain", label_threshold=-1)
IP.bulk_augment_images(AUGMENTED_DATA_TWOSTEP, AUGMENTED_DATA_TWOSTEP, "_original.jpg", "rotate_crop", "pain", label_threshold=-1)
IP.bulk_rename_files(AUGMENTED_DATA_TWOSTEP, AUGMENTED_DATA_TWOSTEP, "_rotated", "_straight")

In [None]:
# Crop images to same maximum width and height (10-degree rotation in previous step cropped rotated images 
# down to (215, 215), so this is chosen as a max width/height)
IP.bulk_crop_images(AUGMENTED_DATA_TWOSTEP, AUGMENTED_DATA_TWOSTEP, (215, 215))

In [None]:
# Downsample augmented data
DL.downsample_data(os.path.join(AUGMENTED_DATA_TWOSTEP, "group_1"))
DL.downsample_data(os.path.join(AUGMENTED_DATA_TWOSTEP, "group_2"))

# Flexible Data Augmentation
Code snippets that allow to move doubly augmented data around quickly. Main purpose is to ensure that the same images in original or augmented form are not being used for training and testing at the same time.

### Step 1: Double-augment images

In [None]:
# Mirror folder structure
print("Mirror Folder Structure")
DL.mirror_folder_structure(RAW_DATA, AUGMENTED_DATA_FLEXIBLE)

# Flip images and copy originals into augmented data folder
print("Flip Images")
IP.bulk_augment_images(PREPROCESSED_DATA, AUGMENTED_DATA_FLEXIBLE, ".jpg", "flip", "pain", label_threshold=-1)
IP.bulk_augment_images(PREPROCESSED_DATA, AUGMENTED_DATA_FLEXIBLE, ".jpg", "original", "pain", label_threshold=-1)

# Rotate Originals and flipped images, and ensure that naming conventions stay consistent
print("Rotate Images")
IP.bulk_augment_images(AUGMENTED_DATA_FLEXIBLE, AUGMENTED_DATA_FLEXIBLE, "_flipped.jpg", "rotate_crop", "pain", label_threshold=-1)
IP.bulk_augment_images(AUGMENTED_DATA_FLEXIBLE, AUGMENTED_DATA_FLEXIBLE, "_original.jpg", "rotate_crop", "pain", label_threshold=-1)
IP.bulk_rename_files(AUGMENTED_DATA_FLEXIBLE, AUGMENTED_DATA_FLEXIBLE, "_rotated", "_straight")

# Crop images to same maximum width and height (10-degree rotation in previous step cropped rotated images 
# down to (215, 215), so this is chosen as a max width/height)
print("Crop Images")
IP.bulk_crop_images(AUGMENTED_DATA_FLEXIBLE, AUGMENTED_DATA_FLEXIBLE, (215, 215), ".jpg")

### Step 2: Reset Folder Structure

In [4]:
# Moving all images into the "raw" subfolder
DL.reset_to_raw(AUGMENTED_DATA_FLEXIBLE)

In [5]:
# Deleting all empty folders
DL.delete_empty_folders(AUGMENTED_DATA_FLEXIBLE)

### Step 3: Load DataFrame

In [6]:
# Get all image paths and corresponding labels into a dataframe
img_paths = np.array(DL.get_image_paths(AUGMENTED_DATA_FLEXIBLE))
labels = np.array(DL.get_labels(img_paths))
df = pd.DataFrame(labels, columns=['Person','Session','Culture','Frame','Pain', 'Trans_1', 'Trans_2'])
df[['Person','Session','Culture','Frame','Pain']] = df[['Person','Session','Culture','Frame','Pain']].astype(int)
df['img_path'] = img_paths
df[['Trans_1', 'Trans_2', 'img_path']] = df[['Trans_1', 'Trans_2', 'img_path']].astype(str)
df = df.sort_values(['Person', 'Session', 'Frame', 'Trans_1', 'Trans_2'], ascending=[True, True, True, False, False]).reset_index(drop=True)
df['temp_id'] = df['Person'].astype(str) + df['Session'].astype(str) + df['Frame'].astype(str)

#### Step 3.1: Remove Subject 101 from the data
Subject 101 only has negative examples "0" and will therefore show "0%" on metrics like "Recall" or "Precision", skewing output graphs.

In [7]:
# Proving that subject 101 only has 0 labels
subject = 101
print("# Pain Labels Subject {} : ".format(subject), np.sum(df[df['Person'] == subject]['Pain']))

# Pain Labels Subject 101 :  0


In [8]:
# Removing subject 101 from the data
df = df[df['Person'] != 101]

### Step 4: Redistribute Data for Training
Do one of the subsection steps.

In [9]:
# Distribution checking
def print_distribution(df_train, df_test):
    print("\033[1mTrain\t\t\t\t   |Test\033[0m")
    for train, test in zip(df_train.groupby('Person'), df_test.groupby('Person')):
        print("Subject {} Train:\t{}\t{:.0%}|{:.0%}  Subject {} Test:\t{}"
              .format(train[0], len(train[1]), len(train[1]) / (len(train[1]) + len(test[1])),
                      len(test[1]) / (len(train[1]) + len(test[1])), test[0], len(test[1])))
    print("-" * 68)
    print("Total Original Train:\t{}\t{:.0%}|{:.0%}  Total Original Test:\t{}"
          .format(len(df_train), len(df_train) / (len(df_train) + len(df_test)),
                  len(df_test) / (len(df_train) + len(df_test)), len(df_test)))

def print_pain_distribution(df_train, df_test):
    print("Train:          {:.0%} |".format(len(df_train) / (len(df_test) + len(df_train))),
          "Test:          {:.0%}".format(len(df_test) / (len(df_test) + len(df_train))), )
    print("Train No Pain: {} |".format(len(df_train[df_train['Pain'] == 0])),
          "Test No Pain: {}".format(len(df_test[df_test['Pain'] == 0])))
    print("Train Pain:    {} |".format(len(df_train[df_train['Pain'] > 0])),
          "Test Pain:    {}".format(len(df_test[df_test['Pain'] > 0])))
    print("Train Total:  {} |".format(len(df_train)), "Test Total:   {}".format(len(df_test)))
    print()
    print("Total:        {}".format(len(df_train) + len(df_test)))
    print("----------------------------------------")
    print("Duplicates:", sum(df_train['temp_id'].isin(df_test['temp_id'])))

In [10]:
# Split Data into two groups
group_1 = [42, 47, 49, 66, 95, 97, 103, 106, 108, 121, 123, 124]
df_1 = df[df['Person'].isin(group_1)]
df_2 = df[~df['Person'].isin(group_1)]

In [11]:
df_2['Person'].unique()

array([ 43,  48,  52,  59,  64,  80,  92,  96, 107, 109, 115, 120])

#### Step 4.1: Redistribute Naively
In this step, the we will just downsample the data and put it into two groups, without accounting for potential duplicates in test and train data (e.g. "original" in train, and "flipped" in test.

In [None]:
# Downsample first group
df_1_pain_1 = df_1[df_1['Pain'] > 0]
df_1_pain_0 = df_1[df_1['Pain'] == 0].sample(len(df_1_pain_1))
df_1_downsampled = pd.concat((df_1_pain_0, df_1_pain_1))

In [None]:
# Downsample second group
df_2_pain_1 = df_2[df_2['Pain'] > 0]
df_2_pain_0 = df_2[df_2['Pain'] == 0].sample(len(df_2_pain_1))
df_2_downsampled = pd.concat((df_2_pain_0, df_2_pain_1))

#### Step 4.2: Redistribute - No Mutation Duplicates
In this step the data is split so that the same image in a mutated form is not in train and test data.

In [None]:
# Downsample first group
df_1_pain_1 = df_1[df_1['Pain'] > 0]
df_1_pain_0 = df_1[df_1['Pain'] == 0].sample(len(df_1_pain_1))
df_1_downsampled = pd.concat((df_1_pain_0, df_1_pain_1))

In [None]:
# Split Pain Frames into Train and Test 60 / 40
ratio = 0.6

temp_ids_pain = df_2[df_2['Pain'] > 0]['temp_id'].unique()
temp_ids_pain_train = np.random.choice(temp_ids_pain, int(ratio * len(temp_ids_pain)), replace=False)
temp_ids_pain_test = temp_ids_pain[np.isin(temp_ids_pain, temp_ids_pain_train) == False]
df_2_pain_train = df_2[df_2['temp_id'].isin(temp_ids_pain_train)]
df_2_pain_test = df_2[df_2['temp_id'].isin(temp_ids_pain_test)]

In [None]:
# Split Pain Frames into Train and Test 60 / 40, with the same number of Train / Test Samples as Pain
temp_ids_no_pain = df_2[df_2['Pain'] == 0]['temp_id'].unique()
temp_ids_no_pain_train = np.random.choice(temp_ids_no_pain, len(df_2_pain_train), replace=False)
temp_ids_no_pain_test = np.random.choice(temp_ids_no_pain[np.isin(temp_ids_no_pain, temp_ids_no_pain_train) == False], len(df_2_pain_test), replace=False)
df_2_pain_0_train = df_2[df_2['temp_id'].isin(temp_ids_no_pain_train)].sample(len(df_2_pain_train))
df_2_pain_0_test = df_2[df_2['temp_id'].isin(temp_ids_no_pain_test)].sample(len(df_2_pain_test))

In [None]:
# Concatenate train and test
df_2_train = pd.concat((df_2_pain_train, df_2_pain_0_train))
df_2_test = pd.concat((df_2_pain_test, df_2_pain_0_test))

In [None]:
# Verify that everything went well
print_pain_distribution(df_2_train, df_2_test)
print()
print_distribution(df_2_train, df_2_test)

#### Step 4.3: Redistribute - No Test Augmentation
In this step we redistribute the data so that only the train dataset is balanced. The test dataset maintains its original composition.

In [12]:
# Downsample first group
df_1_pain_1 = df_1[df_1['Pain'] > 0]
df_1_pain_0 = df_1[df_1['Pain'] == 0].sample(len(df_1_pain_1))
df_1_downsampled = pd.concat((df_1_pain_0, df_1_pain_1))

In [13]:
df_2_originals = df_2[(df_2['Trans_1'] == 'original') & (df_2['Trans_2'] == 'straight')]

In [14]:
# Split original images into train and test, on a per person basis, 60/40
ratio = 0.6

df_2_originals_train = pd.DataFrame(columns=df_2_originals.columns)
df_2_originals_test = pd.DataFrame(columns=df_2_originals.columns)
for df_person in df_2_originals.groupby('Person'):
    df_person_train = df_person[1].sample(frac=ratio)
    df_person_test = df_person[1].drop(df_person_train.index)
    df_2_originals_train = pd.concat((df_2_originals_train, df_person_train))
    df_2_originals_test = pd.concat((df_2_originals_test, df_person_test))

In [15]:
# Verify that everything went well in this first stage. Expected output is a 60/40 split for every subject,
# as well as significantly more 'no pain' than 'pain' subjects
print_pain_distribution(df_2_originals_train, df_2_originals_test)
print("\n--------------------------------------------------------------------\n")
print_distribution(df_2_originals_train, df_2_originals_test)

Train:          60% | Test:          40%
Train No Pain: 9861 | Test No Pain: 6601
Train Pain:    1866 | Test Pain:    1217
Train Total:  11727 | Test Total:   7818

Total:        19545
----------------------------------------
Duplicates: 0

--------------------------------------------------------------------

[1mTrain				   |Test[0m
Subject 43 Train:	672	60%|40%  Subject 43 Test:	448
Subject 48 Train:	529	60%|40%  Subject 48 Test:	353
Subject 52 Train:	1565	60%|40%  Subject 52 Test:	1044
Subject 59 Train:	464	60%|40%  Subject 59 Test:	309
Subject 64 Train:	929	60%|40%  Subject 64 Test:	620
Subject 80 Train:	1178	60%|40%  Subject 80 Test:	786
Subject 92 Train:	901	60%|40%  Subject 92 Test:	601
Subject 96 Train:	1412	60%|40%  Subject 96 Test:	941
Subject 107 Train:	1225	60%|40%  Subject 107 Test:	816
Subject 109 Train:	1142	60%|40%  Subject 109 Test:	761
Subject 115 Train:	770	60%|40%  Subject 115 Test:	513
Subject 120 Train:	940	60%|40%  Subject 120 Test:	626
-------------------------

In [16]:
df_2_train_ids = df_2_originals_train['temp_id'].unique()
df_2_train = df_2[df_2['temp_id'].isin(df_2_train_ids)]
df_2_train_pain = df_2_train[df_2_train['Pain'] > 0]
df_2_train_no_pain = df_2_train[df_2_train['Pain'] == 0].sample(len(df_2_train_pain))
df_2_train = pd.concat((df_2_train_pain, df_2_train_no_pain))

In [17]:
df_2_test = df_2_originals_test

In [18]:
# Print final distribution with augmented train images
print_pain_distribution(df_2_train, df_2_test)
print("\n--------------------------------------------------------------------\n")
print_distribution(df_2_train, df_2_test)

Train:          66% | Test:          34%
Train No Pain: 7464 | Test No Pain: 6601
Train Pain:    7464 | Test Pain:    1217
Train Total:  14928 | Test Total:   7818

Total:        22746
----------------------------------------
Duplicates: 0

--------------------------------------------------------------------

[1mTrain				   |Test[0m
Subject 43 Train:	686	60%|40%  Subject 43 Test:	448
Subject 48 Train:	569	62%|38%  Subject 48 Test:	353
Subject 52 Train:	1421	58%|42%  Subject 52 Test:	1044
Subject 59 Train:	585	65%|35%  Subject 59 Test:	309
Subject 64 Train:	1054	63%|37%  Subject 64 Test:	620
Subject 80 Train:	3005	79%|21%  Subject 80 Test:	786
Subject 92 Train:	1583	72%|28%  Subject 92 Test:	601
Subject 96 Train:	1387	60%|40%  Subject 96 Test:	941
Subject 107 Train:	1779	69%|31%  Subject 107 Test:	816
Subject 109 Train:	1205	61%|39%  Subject 109 Test:	761
Subject 115 Train:	784	60%|40%  Subject 115 Test:	513
Subject 120 Train:	870	58%|42%  Subject 120 Test:	626
-----------------------

In [30]:
df_pain = df_2_train[df_2_train['Pain']>0].groupby('Person').count().T
df_no_pain = df_2_train[df_2_train['Pain']==0].groupby('Person').count().T

In [47]:
df_test = pd.concat((df_pain[:1], df_no_pain[:1]))
df_test['Total'] = df_test.sum(axis=1)
df_test = pd.concat((df_test, df_test[:1] / df_test.sum()))

In [48]:
df_test

Person,43,48,52,59,64,80,92,96,107,109,115,120,Total
Session,212.0,200.0,280.0,312.0,400.0,2596.0,1128.0,392.0,1084.0,432.0,236.0,192.0,7464.0
Session,474.0,369.0,1141.0,273.0,654.0,409.0,455.0,995.0,695.0,773.0,548.0,678.0,7464.0
Session,0.309038,0.351494,0.197044,0.533333,0.379507,0.863894,0.712571,0.282624,0.609331,0.358506,0.30102,0.22069,0.5


#### Step 4.4: Original Distribution
In this step, the data used is only the original images that have been preprocessed but not augmented.

In [None]:
# Get original images
df_1_original = df_1[(df_1['Trans_1'] == 'original') & (df_1['Trans_2'] == 'straight')]
df_2_original = df_2[(df_2['Trans_1'] == 'original') & (df_2['Trans_2'] == 'straight')]

In [None]:
# Split df_2 into train and test on a per person basis
ratio = 0.6

df_2_originals_train = pd.DataFrame(columns=df_2_original.columns)
df_2_originals_test = pd.DataFrame(columns=df_2_original.columns)
for df_person in df_2_original.groupby('Person'):
    df_person_train = df_person[1].sample(frac=ratio)
    df_person_test = df_person[1].drop(df_person_train.index)
    df_2_originals_train = pd.concat((df_2_originals_train, df_person_train))
    df_2_originals_test = pd.concat((df_2_originals_test, df_person_test))

In [None]:
df_2_train = df_2_originals_train
df_2_test = df_2_originals_test

In [None]:
# Print Group 1 Distribution
print("Group 1 Pain:    ", np.sum(df_1_original['Pain'] > 0),"\t|  {:.0%}".format(np.sum(df_1_original['Pain'] > 0) / len(df_1_original)))
print("Group 1 No Pain: ", np.sum(df_1_original['Pain'] == 0),"|  {:.0%}".format(np.sum(df_1_original['Pain'] == 0) / len(df_1_original)))
print("\n\t\tPain \t| No Pain\tPain\t| No Pain")
print("-"*35,"-"*25)
for df_person in df_1_original.groupby('Person'):
    print("Person {}:\t{}\t| {}\t\t{:.0%}\t|  {:.0%}".format(df_person[0], np.sum(df_person[1]['Pain'] > 0), np.sum(df_person[1]['Pain'] == 0), np.sum(df_person[1]['Pain'] > 0) / len(df_person[1]), np.sum(df_person[1]['Pain'] == 0) / len(df_person[1])))

In [None]:
# Print final distribution with augmented train images
print_pain_distribution(df_2_originals_train, df_2_originals_test)
print("\n--------------------------------------------------------------------\n")
print_distribution(df_2_originals_train, df_2_originals_test)

#### Step 4.4: Sessions

In [11]:
# Downsample first group
df_1_pain_1 = df_1[df_1['Pain'] > 0]
df_1_pain_0 = df_1[df_1['Pain'] == 0].sample(len(df_1_pain_1))
df_1_downsampled = pd.concat((df_1_pain_0, df_1_pain_1))

In [12]:
# Split dataframe into sessions
session_dfs_2 = np.array([idx_df for idx_df in df_2.groupby('Session')])
session_paths = [os.path.join(AUGMENTED_DATA_FLEXIBLE, "group_2", "session_" + str(sess)) for sess in session_dfs_2[:,0]]

### Step 5: Allocate Groups
Move the image files into the correct folders for training.

In [13]:
def allocate_group(df, path):
    if not os.path.isdir(path):
        os.mkdir(path)

    for f_path in df['img_path'].values:
        os.rename(f_path, os.path.join(path, os.path.basename(f_path)))

In [14]:
# Allocate Group 1
group_1_path = os.path.join(AUGMENTED_DATA_FLEXIBLE, "group_1")
allocate_group(df_1_downsampled, group_1_path)

#### Step 5.1: Allocate group 2 into train/test

In [None]:
# Allocate Group 2 Train / Test
train_path = os.path.join(AUGMENTED_DATA_FLEXIBLE, 'group_2_train')
test_path = os.path.join(AUGMENTED_DATA_FLEXIBLE, 'group_2_test')

allocate_group(df_2_train, train_path)
allocate_group(df_2_test, test_path)

In [None]:
# Verify Success, expected outcome is no instances of pain images in the "Raw" folder, a large group one,
# and smaller group 2 train and test
print("Group 1:        {}".format(len(os.listdir(group_1_path))))
print("Group 2 Train:  {}".format(len(os.listdir(train_path))))
print("Group 2 Test:   {}".format(len(os.listdir(test_path))))
print("Raw:            {}".format(len(os.listdir(os.path.join(AUGMENTED_DATA_FLEXIBLE, 'raw')))))
print("Raw Pain Img's: {}".format(np.sum(np.minimum(np.array(DL.get_labels(DL.get_image_paths(os.path.join(AUGMENTED_DATA_FLEXIBLE, 'raw'))))[:,4].astype(int), 1))))

#### Step 5.2: Allocation group 2 into sessions

In [None]:
# Allocate into sessions
if not os.path.isdir(os.path.join(AUGMENTED_DATA_FLEXIBLE, "group_2")):
    os.mkdir(os.path.join(AUGMENTED_DATA_FLEXIBLE, "group_2"))
for df, path in zip(session_dfs_2[:,1], session_paths):
    allocate_group(df, path)

In [None]:
# Verify Success
for path in sorted(session_paths):
    print("Session {}:".format(path[-1]), len(os.listdir(path)))

In [8]:
import numpy as np

In [9]:
a = np.array([[0.6, 0.4], [1, 0], [1, 0], [0, 1]])
b = np.array([[0.6, 0.4], [0.4, 0.6], [0.3, 0.7], [0, 1]])

In [31]:
c = np.array([0,0])

In [32]:
d = np.array([1,1])

In [35]:
TP = tf.keras.metrics.TruePositives()

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.


<tf.Tensor 'confusion_matrix/SparseTensorDenseAdd:0' shape=(5, 5) dtype=int32>

In [45]:
with tf.Session().as_default():
    print(tf.math.confusion_matrix(np.array([0, 1]), np.array([0, 0])).eval())

[[1 0]
 [1 0]]


In [4]:
def TP(y_true, y_pred):
    y_pred = tf.argmax(y_pred, 1)
    y_true = tf.argmax(y_true, 1)
    return tf.math.count_nonzero(y_pred * y_true)


def FP(y_true, y_pred):
    y_pred = tf.argmax(y_pred, 1)
    y_true = tf.argmax(y_true, 1)
    return tf.math.count_nonzero(y_pred * (y_true - 1))


def TN(y_true, y_pred):
    y_pred = tf.argmax(y_pred, 1)
    y_true = tf.argmax(y_true, 1)
    return tf.math.count_nonzero((y_pred - 1) * (y_true - 1))


def FN(y_true, y_pred):
    y_pred = tf.argmax(y_pred, 1)
    y_true = tf.argmax(y_true, 1)
    return tf.math.count_nonzero((y_pred - 1) * y_true)