In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
from tensorflow import keras
from sklearn import preprocessing
import keras.backend as K

#precision & recall by: https://github.com/keras-team/keras/issues/5400
#Code Source: https://cloud.google.com/blog/products/gcp/intro-to-text-classification-with-keras-automatically-tagging-stack-overflow-posts

trainDF = pd.concat([pd.read_pickle('../../data/raw_data/MasterData_2015.pkl.gz'),
                    pd.read_pickle('../../data/raw_data/MasterData_2014.pkl.gz'),
                    pd.read_pickle('../../data/raw_data/MasterData_2013.pkl.gz'),
                    pd.read_pickle('../../data/raw_data/MasterData_2012.pkl.gz')])
                                   
trainDF = trainDF[trainDF.TEXT.notna() & trainDF.NTEE.notna()]
trainDF['text'] = trainDF['TEXT'].astype(str)
trainDF['label'] = trainDF['NTEE'].astype(str)

trainDF = trainDF.drop(['NTEE', 'NTEECC', 'IRS_URL', 'TEXT'], axis=1)
trainDF = trainDF[~ (trainDF.label == 'nan')]

import statistics

counts = trainDF['label'].value_counts().sort_index().to_frame()
counts['category'] = counts.index
counts['train_sample']=(counts['label']/2).astype(int)
    
train_df, test_df = np.split(trainDF, [int(.7*len(trainDF))])

#tf.logging.set_verbosity(tf.logging.ERROR)

train_posts = train_df['text']
train_tags = train_df['label']
test_posts = test_df['text']
test_tags = test_df['label']
vocab_size = 1000
tokenize = keras.preprocessing.text.Tokenizer(num_words=vocab_size)
tokenize.fit_on_texts(train_posts)

x_train = tokenize.texts_to_matrix(train_posts)
x_test = tokenize.texts_to_matrix(test_posts)

encoder = preprocessing.LabelBinarizer()
encoder.fit(train_tags)
y_train = encoder.transform(train_tags)
y_test = encoder.transform(test_tags)
    
def precision(y_true, y_pred):
    """Precision metric.
    Only computes a batch-wise average of precision.
    Computes the precision, a metric for multi-label classification of
    how many selected items are relevant.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def recall(y_true, y_pred):
    """Recall metric.
     Only computes a batch-wise average of recall.
     Computes the recall, a metric for multi-label classification of
    how many relevant items are selected.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

num_labels=len(train_df['label'].drop_duplicates())
batch_size = 500
epochs = 50

# Recreate the exact same model, including weights and optimizer.
new_model = keras.models.load_model('model_ntee_50epochs.h5', custom_objects={'precision': precision, 'recall' : recall})

new_score = new_model.evaluate(x_test, y_test, batch_size=batch_size, verbose=1)
print(new_score)

[0.8402533923364086, 0.8643846342917275, 0.886735872199002, 0.852459955874956]
