In [8]:
# Import dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from zipfile import ZipFile
import time
from datetime import datetime
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Conv2D, AveragePooling2D, GlobalAveragePooling2D, Dropout
from tensorflow.keras import utils 
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import tensorflow_hub as hub
# Setting random seeds to reduce the amount of randomness in the neural net weights and results
# The results may still not be exactly reproducible
np.random.seed(42)
tf.random.set_seed(42)

2023-10-02 10:56:25.215468: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-02 10:56:25.405703: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-02 10:56:26.038107: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/larkinan/miniconda3/envs/tf/lib/python3.9/site-packages/cv2/../../lib

In [6]:
# Testing to ensure GPU is being utilized
# Ensure that the Runtime Type for this notebook is set to GPU
# If a GPU device is not found, change the runtime type under:
# Runtime>> Change runtime type>> Hardware accelerator>> GPU
# and run the notebook from the beginning again.

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))


Found GPU at: /device:GPU:0


2023-09-29 16:28:07.337614: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-29 16:28:07.337697: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-29 16:28:07.337718: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-29 16:28:07.337945: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-29 16:28:07.337956: I tensorflow/core/co

In [7]:
BATCH_SIZE = 64

def convertCodeToNumpy(code):
    encoding = []
    for charIndex in range(len(code)):
        curCar = code[charIndex]
        if curCar =='0':
            encoding.append(0)
        elif curCar =='1':
            encoding.append(1)
    return(encoding)

In [8]:
# Importing the augmented training dataset and testing dataset to create tensors of images using the filename paths.
train_aug_df = pd.read_csv("/mnt/h/Aspire/BERT/place/train/trainData.csv")
test_df = pd.read_csv("/mnt/h/Aspire/BERT/place/test/testData.csv")
train_aug_df['filename'] = train_aug_df['filename'].str.replace("H:/", "/mnt/h/", case = False)
train_aug_df['target'] = train_aug_df['encoding'].map(convertCodeToNumpy)
test_df['target'] = test_df['encoding'].map(convertCodeToNumpy)
test_df['filename'] = test_df['filename'].str.replace("H:/","/mnt/h/",case=False)
test_labels_list = list(test_df['target'])
print(train_aug_df.head())

                                            filename         encoding   
0  /mnt/h/Aspire/Bert/place/train/images/aug/3_88...  [0 0 0 0 0 0 1]  \
1  /mnt/h/Aspire/Bert/place/train/images/aug/3_88...  [0 0 0 0 0 0 1]   
2  /mnt/h/Aspire/Bert/place/train/images/aug/3_88...  [0 0 0 0 0 0 1]   
3  /mnt/h/Aspire/Bert/place/train/images/aug/3_88...  [0 0 0 0 0 0 1]   
4  /mnt/h/Aspire/Bert/place/train/images/aug/3_88...  [0 0 0 0 0 0 1]   

                  target  
0  [0, 0, 0, 0, 0, 0, 1]  
1  [0, 0, 0, 0, 0, 0, 1]  
2  [0, 0, 0, 0, 0, 0, 1]  
3  [0, 0, 0, 0, 0, 0, 1]  
4  [0, 0, 0, 0, 0, 0, 1]  


In [9]:
# Defining a function to read the image, decode the image from given tensor and one-hot encode the image label class.
# Changing the channels para in tf.io.decode_jpeg from 3 to 1 changes the output images from RGB coloured to grayscale.
num_classes = 7
def _parse_function(filename, label):   
    image_string = tf.io.read_file(filename)
    image_decoded = tf.io.decode_jpeg(image_string, channels=3)    # channels=1 to convert to grayscale, channels=3 to convert to RGB.
    #image_decoded = tf.image.resize(image_decoded, [224, 224])
    image_decoded = tf.cast(image_decoded, dtype=tf.float32)
    
    return(image_decoded,label)

In [10]:
# Converting the filenames and target class labels into lists for augmented train and test datasets.
train_aug_filenames_list = list(train_aug_df['filename'])
train_aug_labels_list = list(train_aug_df['target'])
test_filenames_list = list(test_df['filename'])
test_labels_list = list(test_df['target'])
# Creating tensorflow constants of filenames and labels for augmented train and test datasets from the lists defined above.
train_aug_filenames_tensor = tf.constant(train_aug_filenames_list)
train_aug_labels_tensor = tf.constant(train_aug_labels_list)
test_filenames_tensor = tf.constant(test_filenames_list)
test_labels_tensor = tf.constant(test_labels_list)
test_dataset = tf.data.Dataset.from_tensor_slices((test_filenames_tensor, test_labels_tensor))
test_dataset = test_dataset.map(_parse_function)
# test_dataset = test_dataset.repeat(3)
test_dataset = test_dataset.batch(BATCH_SIZE)    # Same as batch_size hyperparameter in model.fit() below.

2023-09-29 16:28:07.781514: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-29 16:28:07.781603: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-29 16:28:07.781626: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-29 16:28:07.781863: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-29 16:28:07.781887: I tensorflow/compile

In [11]:
model = tf.keras.Sequential([
    #hub.KerasLayer("https://tfhub.dev/sayakpaul/vit_s16_fe/1", trainable=True),
    #hub.KerasLayer("https://tfhub.dev/sayakpaul/vit_b16_fe/1", trainable=True),
    #hub.KerasLayer("https://tfhub.dev/sayakpaul/vit_b32_fe/1", trainable=False),
     #hub.KerasLayer("https://tfhub.dev/sayakpaul/vit_r26_s32_lightaug_fe/1", trainable=True),
    #hub.KerasLayer("https://tfhub.dev/sayakpaul/vit_r50_l32_fe/1", trainable=True),
hub.KerasLayer("https://tfhub.dev/google/imagenet/resnet_v2_101/feature_vector/5",
               trainable=False, arguments=dict(batch_norm_momentum=0.997)),
    tf.keras.layers.Dense(100,activation='relu'),
    tf.keras.layers.Dense(7,activation='sigmoid')
])
model.compile(
    loss=[tf.keras.losses.BinaryCrossentropy(from_logits=True)],
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),metrics=['accuracy'])#,run_eagerly=True)
weightFile = "/mnt/h/Aspire/BERT/place/place_image_model_checkpoint.h5"
checkpoint = ModelCheckpoint(filepath=weightFile,
                                monitor='val_loss',
                                save_best_only=True,
                                save_weights_only=True,
                                verbose=1,
                                initial_value_threshold=  None#0.97828
                                )
model(np.zeros((1,224,224,3)))
if(os.path.exists(weightFile)):
    model.load_weights(weightFile)

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
2023-09-29 16:28:10.478642: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8500
2023-09-29 16:28:10.991758: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


In [13]:
# Generating predictions from the model above.
placeImagePred = model.predict(test_dataset)
#final_cnn_pred = final_cnn.predict(train_aug_dataset)
print(placeImagePred[0])
imgBinary = (placeImagePred>0.5)*1
print(np.max(placeImagePred,axis=0))

[0.02541909 0.11607458 0.26550087 0.11254267 0.06386312 0.2380479
 0.75822085]
[0.41196102 0.12263011 0.35446662 0.1695063  0.07724446 0.8379662
 0.77491796]


In [26]:
imgBinary[1]

array([0, 0, 0, 0, 0, 1, 0])

In [30]:
#BERT_pred = final_BERT.predict(testData)
def getPredictions(dataset,model):
    labelSet,predictions = [],[]
    for text, labels in dataset.take(1000):
        pred = model.predict(text)
        if(len(labelSet)==0):
            labelSet = labels.numpy()
            predictions = pred
        else:
            labelSet = np.concatenate((labelSet,labels))
            predictions = np.concatenate((predictions,pred))
    return([labelSet,predictions])
labels,predictions = getPredictions(test_dataset,placeImagePred)

AttributeError: 'numpy.ndarray' object has no attribute 'predict'

In [36]:
np.sum(test_labels_list,axis=0)

array([ 35,  53, 225, 141,  21, 465, 321])

In [14]:
def calcTruePositive(labels,predictions):
    predictionsRound = tf.round(predictions)
    correct = np.multiply(labels,predictionsRound)
    nCorrect = np.sum(correct,axis=0)
    percCorrect = nCorrect/np.sum(labels,axis=0)*100
    return(nCorrect,percCorrect)
calcTruePositive(test_labels_list,placeImagePred)

(array([  0.,   0.,   0.,   0.,   0., 155., 230.]),
 array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        33.33333333, 71.65109034]))

In [45]:
test_labels_list = np.asarray(test_labels_list)
def calcTrueNegative(labels,predictions):
    predictionsRound = tf.round(predictions)
    labZeros = np.where(labels<1,1,0)
    predZeros = np.where(predictionsRound==0,1,0)
    correct = np.multiply(labZeros,predZeros)
    nCorrect = np.sum(correct,axis=0)
    percCorrect = nCorrect/np.sum(labZeros,axis=0)*100
    return(nCorrect,percCorrect)
calcTrueNegative(test_labels_list,placeImagePred)

(array([955, 937, 765, 849, 969, 476, 515]),
 array([100.        , 100.        , 100.        , 100.        ,
        100.        ,  90.66666667,  76.98056801]))

In [48]:
def calcFalseNegative(labels,predictions):
    predictionsRound = tf.round(predictions)
    predZeros = np.where(predictionsRound==0,1,0)
    correct = np.multiply(labels,predZeros)
    nCorrect = np.sum(correct,axis=0)
    percCorrect = nCorrect/np.sum(labels,axis=0)*100
    return(nCorrect,percCorrect)
calcFalseNegative(test_labels_list,placeImagePred)

(array([ 35,  53, 225, 141,  21, 310,  91]),
 array([100.        , 100.        , 100.        , 100.        ,
        100.        ,  66.66666667,  28.34890966]))

In [49]:
def calcFalsePositive(labels,predictions):
    predictionsRound = tf.round(predictions)
    labZeros = np.where(labels==0,1,0)
    correct = np.multiply(labZeros,predictionsRound)
    nCorrect = np.sum(correct,axis=0)
    print(nCorrect)
    percCorrect = nCorrect/np.sum(labels,axis=0)*100
    return(nCorrect,percCorrect)
calcFalsePositive(test_labels_list,placeImagePred)

[  0.   0.   0.   0.   0.  49. 154.]


(array([  0.,   0.,   0.,   0.,   0.,  49., 154.]),
 array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        10.53763441, 47.97507788]))

In [4]:
import pandas as ps
import os
allData = ps.read_csv("/mnt/h/Aspire/BERT/place/placeLabelsTrainTest_Sep26_23.csv")
testData = allData[allData['test']==1]
isImage = []
for curNum in range(testData.count()[0]):
    curRecord = testData.iloc[curNum]
    curImage = curRecord['img_http']
    if(os.path.exists("/mnt/h/Aspire/BERT/place/test/images/base/" + curImage[:-4] + "_padded.jpg")):
        isImage.append(1)
    else:
        isImage.append(0)
testData['isImage'] = isImage
imageTest = testData[testData['isImage']==1]
imageTest.to_csv('/mnt/h/Aspire/BERT/place/test/imageTweets.csv',index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testData['isImage'] = isImage


In [51]:
a = imageTest['text']
print(a)
tweetText = list(imageTest['text'])
tweetText = list(map(mapNewLineReplace,tweetText))
tweetLabels = list(map(convertPlaceCodeToLabels,list(imageTest['location'])))
tweetLabels2 = list(map(convertOutdoorCodeToLabels,list(imageTest['location_cat'])))
tweetLabels3 = np.concatenate((tweetLabels, tweetLabels2),axis=1)
print(tweetLabels3[0:10])

0       Lots of signs outside the Nannie Lee Center in...
20      First game as a high schooler today!! #sisterl...
23      Caregivers, please complete the reopening surv...
30      Yes a thousand times yes!!! Especially for us ...
40      @DCCCAInc CEO @LoriAlvarado1 says their women'...
                              ...                        
4955    “@Lil_Hands_Sign: Signing children tend to be ...
4969    #1stgrade Ss teaching Kindergarten Ss how to u...
4982    “@BarackObama: Tell a friend, tell a family me...
4985    OMG #avis rental car line. This is a nightmare...
4990    Congrats @callanmccarthy on your last ever hig...
Name: text, Length: 990, dtype: object
[[0 0 0 0 0 0 1]
 [0 0 1 0 0 1 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0]
 [1 0 0 1 0 0 1]
 [0 0 1 0 0 1 0]
 [0 0 0 0 0 1 0]
 [0 0 0 0 0 0 1]]


In [10]:
# Import dependencies
import pandas as ps
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from zipfile import ZipFile
import time
from datetime import datetime
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Conv2D, AveragePooling2D, GlobalAveragePooling2D, Dropout
from tensorflow.keras import utils 
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import glob

# Setting random seeds to reduce the amount of randomness in the neural net weights and results
# The results may still not be exactly reproducible
np.random.seed(42)
tf.random.set_seed(42)
import tensorflow as tf
import os
import glob
import numpy as np
from transformers import BertTokenizer, TFBertForMaskedLM
from keras.callbacks import ModelCheckpoint
from transformers import *
import transformers
import keras
import shutil
import time


Loading custom CUDA kernels...
Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
Loading custom CUDA kernels...
Using /home/larkinan/.cache/torch_extensions/py39_cu117 as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /home/larkinan/.cache/torch_extensions/py39_cu117/cuda_kernel/build.ninja...
Building extension module cuda_kernel...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module cuda_kernel...


ninja: no work to do.


In [35]:
def mapNewLineReplace(origLine):
    newLine = origLine.replace("\n",". ")
    return(newLine)

In [33]:
def setupModelInputs(tweetData,debug=False):
    tweetText = list(tweetData['text'])
    tweetText = list(map(mapNewLineReplace,tweetText))
    inputs = TOKEN(tweetText,max_length=100,truncation=True,padding='max_length',return_tensors="tf")
    inp_ids = tf.convert_to_tensor(inputs['input_ids'])
    inputs['input_ids'] = inp_ids
    return(inputs)

In [32]:
#modelFile = '/mnt/h/Aspire/BERT/child/checkpiont_3layers/age_model_checkpoint.h5'
modelFile = '/mnt/h/Aspire/BERT/place/place_model_checkpoint.h5'
final_BERT = tf.keras.models.load_model(modelFile,custom_objects={"TFBertModel": transformers.TFBertModel})
final_BERT.compile(loss=[tf.keras.losses.BinaryCrossentropy()],optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),metrics=['accuracy'])

Model config BertConfig {
  "_name_or_path": "/mnt/h/Aspire/BERT/testSaveBERT",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.33.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30623
}



In [52]:
TOKEN = BertTokenizer.from_pretrained('expandedTokenBase')
b = setupModelInputs(imageTest)
preds = final_BERT.predict([b['input_ids'],b['attention_mask']])
predsInt = (preds>0)*1
print(predsInt)

loading file vocab.txt
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
Adding 😂 to the vocabulary
Adding ❤ to the vocabulary
Adding 😭 to the vocabulary
Adding 😍 to the vocabulary
Adding 🎉 to the vocabulary
Adding 😊 to the vocabulary
Adding 😘 to the vocabulary
Adding 😩 to the vocabulary
Adding 💕 to the vocabulary
Adding 🏽 to the vocabulary
Adding 🏼 to the vocabulary
Adding 🏾 to the vocabulary
Adding 💯 to the vocabulary
Adding 🔥 to the vocabulary
Adding 🏻 to the vocabulary
Adding 🙏 to the vocabulary
Adding 🙌 to the vocabulary
Adding 🤣 to the vocabulary
Adding 😒 to the vocabulary
Adding 💙 to the vocabulary
Adding ♀ to the vocabulary
Adding 🙄 to the vocabulary
Adding 😁 to the vocabulary
Adding 👏 to the vocabulary
Adding ☺ to the vocabulary
Adding 💀 to the vocabulary
Adding 👌 to the vocabulary
Adding 🎈 to the vocabulary
Adding 💜 to the vocabulary
Adding 💗 to the vocabulary
Adding 😅 to the vocabulary
Adding 💖 to the vocabulary
Adding 🤔 t

[[0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 1 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 1 0]]


In [53]:
print(np.sum(predsInt,axis=1))

[1 0 2 0 0 1 2 1 1 1 2 1 2 1 2 2 0 2 1 2 1 1 1 2 1 1 0 2 0 1 1 2 1 1 2 1 2
 0 2 1 0 2 1 0 1 1 2 0 1 1 1 2 0 1 0 2 0 1 0 2 2 0 1 2 1 2 1 0 0 1 1 2 2 2
 0 2 0 2 2 0 1 2 2 2 2 0 2 1 2 0 0 1 2 1 2 1 0 0 2 0 2 1 1 2 2 0 1 2 2 1 1
 1 1 1 1 2 2 2 1 1 1 1 2 1 1 1 2 2 1 2 2 1 1 2 1 1 2 2 2 1 2 2 0 1 1 1 2 2
 1 0 1 0 2 0 1 1 1 1 2 2 1 2 2 0 2 1 1 1 0 2 2 1 1 1 2 0 0 1 0 1 1 0 1 1 1
 2 0 2 0 0 1 1 1 1 0 1 2 1 2 1 2 1 0 0 1 0 0 2 0 1 0 0 1 2 2 1 1 1 0 2 0 1
 2 2 2 2 0 2 0 1 2 1 1 2 2 0 2 1 1 2 1 0 2 1 1 1 2 2 0 2 1 1 0 1 2 0 1 2 2
 2 2 2 1 1 2 2 0 2 0 2 2 2 1 1 0 2 1 2 1 1 0 0 0 0 1 2 1 2 1 1 2 1 1 2 0 1
 1 0 2 1 1 1 0 2 1 1 1 0 2 2 0 2 1 1 0 1 1 2 1 1 2 2 1 2 2 2 2 1 0 1 1 3 1
 2 1 1 1 2 2 2 1 2 0 1 1 1 2 0 0 2 1 2 1 0 1 0 2 0 2 0 1 2 0 2 0 2 1 0 2 1
 2 1 1 1 2 0 1 2 2 1 2 1 1 0 0 1 1 1 2 1 2 0 0 0 2 2 0 1 2 2 1 0 1 0 0 1 2
 0 0 0 1 0 0 2 1 2 3 0 2 1 2 2 2 0 2 0 0 0 1 0 2 1 2 1 2 0 0 1 0 2 1 2 0 2
 2 2 2 1 1 2 0 0 2 2 0 2 1 0 2 0 1 1 2 2 2 1 2 0 1 2 1 1 2 0 1 1 1 1 2 2 0
 2 2 1 2 1 2 0 1 0 2 2 0 

In [46]:
placeCodingDict = {
            'Childcare/daycare':0,
            'Park/playground/child sports center':1,
            'A home':2,
            'School':3,
            'Neigborhood (but not on home property, etc)':4,
            'Indoor location':0,
            'Outdoor location':1,
        }

def convertPlaceCodeToLabels(placeCode):
    keys = list(placeCodingDict.keys())
    codeArr = [0 for x in range(5)]
    for key in keys:
        if key in placeCode:
            codeArr[placeCodingDict[key]] = 1
    return(np.asarray(codeArr))
    
def convertOutdoorCodeToLabels(outdoorCode):
    keys = list(placeCodingDict.keys())
    codeArr = [0 for x in range(2)]
    for key in keys:
        if key in outdoorCode:
            codeArr[placeCodingDict[key]] = 1
    return(np.asarray(codeArr))

In [56]:
def calcTruePositive(labels,predictions):
    correct = np.multiply(labels,predictions)
    nCorrect = np.sum(correct,axis=0)
    percCorrect = nCorrect/np.sum(labels,axis=0)*100
    return(nCorrect,percCorrect)
calcTruePositive(tweetLabels3,predsInt)

(array([ 13,  30, 127,  92,   9, 317, 202]),
 array([37.14285714, 56.60377358, 56.44444444, 65.24822695, 42.85714286,
        68.17204301, 62.92834891]))

In [57]:
def calcTrueNegative(labels,predictions):
    labZeros = np.where(labels==0,1,0)
    predZeros = np.where(predictions==0,1,0)
    correct = np.multiply(labZeros,predZeros)
    nCorrect = np.sum(correct,axis=0)
    percCorrect = nCorrect/np.sum(labZeros,axis=0)*100
    return(nCorrect,percCorrect)
calcTrueNegative(tweetLabels3,predsInt)

(array([950, 931, 727, 809, 960, 376, 605]),
 array([99.47643979, 99.35965848, 95.03267974, 95.28857479, 99.07120743,
        71.61904762, 90.43348281]))

In [58]:
def calcFalsePositive(labels,predictions):
    labZeros = np.where(labels==0,1,0)
    correct = np.multiply(labZeros,predictions)
    nCorrect = np.sum(correct,axis=0)
    print(nCorrect)
    percCorrect = nCorrect/np.sum(labels,axis=0)*100
    return(nCorrect,percCorrect)
calcFalsePositive(tweetLabels3,predsInt)

[  5   6  38  40   9 149  64]


(array([  5,   6,  38,  40,   9, 149,  64]),
 array([14.28571429, 11.32075472, 16.88888889, 28.36879433, 42.85714286,
        32.04301075, 19.9376947 ]))

In [61]:
def calcFalseNegative(labels,predictions):
    predZeros = np.where(predictions==0,1,0)
    correct = np.multiply(labels,predZeros)
    nCorrect = np.sum(correct,axis=0)
    percCorrect = nCorrect/np.sum(labels,axis=0)*100
    return(nCorrect,percCorrect)
calcFalseNegative(tweetLabels3,predsInt)

(array([ 22,  23,  98,  49,  12, 148, 119]),
 array([62.85714286, 43.39622642, 43.55555556, 34.75177305, 57.14285714,
        31.82795699, 37.07165109]))