# Model Evaluation

## 1. Setting Up Spark Context

In [1]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession

In [2]:
sc = SparkContext.getOrCreate(SparkConf().setMaster("local[*]"))

spark = SparkSession \
    .builder \
    .getOrCreate()

## 2. Download data from Object Store

In [3]:
import os
import getpass

def get_or_set_environment_variable(variable):
    try:
        var = os.environ[variable]
    except KeyError:
        var = getpass.getpass('Please enter value for {:}: '.format(variable))
    
    os.environ[variable] = var
    return var

ibm_api_key_id = get_or_set_environment_variable('IBM_API_KEY_ID')
ibm_cloud_store_bucket = get_or_set_environment_variable('IBM_OBJECT_STORE_BUCKET')

Please enter value for IBM_API_KEY_ID: ········
Please enter value for IBM_OBJECT_STORE_BUCKET: ········


### 2.1 Loading Data for Evaluation

In [4]:
import json
import os

import types
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0


client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=ibm_api_key_id,
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client.get_object(Bucket=ibm_cloud_store_bucket,
                         Key='validation_parquet_files.json')['Body']
# add missing __iter__ method

if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

files = json.load(body)
files

{'validation': ['disaster_detection_validation_validation-0000.parquet'],
 'train': ['disaster_detection_clean_train-0000.parquet'],
 'test': ['disaster_detection_clean_test-0000.parquet'],
 'label': ['disaster_detection_label-0000.parquet']}

In [5]:
def load_dataframe(files, **kargs):
    dfs = []
    for fn in files:
        body = client.get_object(Bucket=ibm_cloud_store_bucket,
                                 Key=fn)['Body']
        if not hasattr(body, "__iter__"):
            body.__iter__ = types.MethodType( __iter__, body )
        
        tfn = 'temp_{:}'.format(fn)
        with open(tfn, 'wb') as temp:
            temp.write(body.read())
        dfs.append(spark.read.options(**kargs).parquet(tfn))
    df = dfs.pop()
    for other in dfs:
        df = df.union(other)
    return df

df_validation = load_dataframe(files['validation'])

### 2.1 Loading Trained Models

In [6]:
body = client.get_object(Bucket=ibm_cloud_store_bucket,
                         Key='model_train_files.json')['Body']

if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

model_files = json.load(body)
model_files

{'LogisticRegressionModel_count_trained.ai.zip': 'spark',
 'LogisticRegressionModel_tfidf_trained.ai.zip': 'spark',
 'NaiveBayes_eadba888dbea_count_trained.ai.zip': 'spark',
 'NaiveBayes_9056b7fbe3e8_tfidf_trained.ai.zip': 'spark',
 'Sequential_NN_w2v_trained.ai.h5': 'keras'}

In [7]:
def download_model_files(files):
    temp_files = []
    for fn in files:
        body = client.get_object(Bucket=ibm_cloud_store_bucket,
                                 Key=fn)['Body']
        if not hasattr(body, "__iter__"):
            body.__iter__ = types.MethodType( __iter__, body )
        
        tfn = os.path.join(os.path.curdir, 'temp_{:}'.format(fn))
        with open(tfn, 'wb') as temp:
            temp.write(body.read())
            
        temp_files.append(tfn)
    return temp_files

model_temp_files = download_model_files(model_files.keys())
model_temp_files

['./temp_LogisticRegressionModel_count_trained.ai.zip',
 './temp_LogisticRegressionModel_tfidf_trained.ai.zip',
 './temp_NaiveBayes_eadba888dbea_count_trained.ai.zip',
 './temp_NaiveBayes_9056b7fbe3e8_tfidf_trained.ai.zip',
 './temp_Sequential_NN_w2v_trained.ai.h5']

In [8]:
import zipfile

def unzip_file(path):
    with zipfile.ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(os.curdir)
        extracted = zip_ref.namelist()[0]
    return os.path.join(os.curdir, extracted)

extracted_models = [unzip_file(path) for path in model_temp_files[:-1]]
extracted_models

['./LogisticRegressionModel_count_trained.ai/',
 './LogisticRegressionModel_tfidf_trained.ai/',
 './NaiveBayes_eadba888dbea_count_trained.ai/',
 './NaiveBayes_9056b7fbe3e8_tfidf_trained.ai/']

#### 2.2.1 Logistic Regression

In [9]:
from pyspark.ml.classification import LogisticRegressionModel

lrs = [LogisticRegressionModel.load(t_file) for t_file in extracted_models[:2]]
lrs

[LogisticRegressionModel: uid = LogisticRegression_c8b0de18d229, numClasses = 2, numFeatures = 2266,
 LogisticRegressionModel: uid = LogisticRegression_20d068424298, numClasses = 2, numFeatures = 2500]

#### 2.2.2 Naive Bayes

In [10]:
from pyspark.ml.classification import NaiveBayesModel

nbs = [NaiveBayesModel.load(t_file) for t_file in extracted_models[2:4]]
nbs

[NaiveBayes_eadba888dbea, NaiveBayes_9056b7fbe3e8]

#### 2.2.3 Convolutional Neural Network

In [11]:
from tensorflow import keras

model = keras.models.load_model(model_temp_files[-1])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 82)                8282      
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 82)                0         
_________________________________________________________________
dropout (Dropout)            (None, 82)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 82)                6806      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 82)                0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 82)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 8

## 3. Evaluating the Models

### 3.1 Prediction with Spark Models

In [12]:
spark_models = lrs + nbs

predictions = [s_model.transform(df_validation).select('id', 'text', 'label', 'probability', 'prediction') for s_model in spark_models]

predictions[0].limit(10).toPandas()

Unnamed: 0,id,text,label,probability,prediction
0,4,Forest fire near La Ronge Sask. Canada,1,"[0.23567137036217115, 0.7643286296378288]",1.0
1,5,All residents asked to 'shelter in place' are ...,1,"[0.14551242137202158, 0.8544875786279784]",1.0
2,6,"13,000 people receive #wildfires evacuation or...",1,"[0.21197156233106593, 0.788028437668934]",1.0
3,19,#Flood in Bago Myanmar #We arrived Bago,1,"[0.4185391530356736, 0.5814608469643264]",1.0
4,24,I love fruits,0,"[0.750577642172992, 0.24942235782700797]",0.0
5,38,Was in NYC last week!,0,"[0.5900225281551591, 0.4099774718448409]",0.0
6,40,Cooool :),0,"[0.6511570807500717, 0.3488429192499282]",0.0
7,41,Do you like pasta?,0,"[0.6686506145214854, 0.3313493854785145]",0.0
8,52,Crying out for more! Set me ablaze,0,"[0.6926132831693501, 0.3073867168306498]",0.0
9,53,On plus side LOOK AT THE SKY LAST NIGHT IT WAS...,0,"[0.7429837644384868, 0.25701623556151315]",0.0


In [13]:
predictions[3].limit(10).toPandas()

Unnamed: 0,id,text,label,probability,prediction
0,4,Forest fire near La Ronge Sask. Canada,1,"[0.250273742181468, 0.7497262578185321]",1.0
1,5,All residents asked to 'shelter in place' are ...,1,"[4.5827595379350904e-08, 0.9999999541724046]",1.0
2,6,"13,000 people receive #wildfires evacuation or...",1,"[6.055679116518278e-22, 1.0]",1.0
3,19,#Flood in Bago Myanmar #We arrived Bago,1,"[0.001928444648477989, 0.998071555351522]",1.0
4,24,I love fruits,0,"[0.9999905091316569, 9.490868343122522e-06]",0.0
5,38,Was in NYC last week!,0,"[0.7287157745019819, 0.2712842254980181]",0.0
6,40,Cooool :),0,"[0.9976339971488191, 0.0023660028511810257]",0.0
7,41,Do you like pasta?,0,"[0.31206827910863427, 0.6879317208913657]",1.0
8,52,Crying out for more! Set me ablaze,0,"[0.9999997282570584, 2.7174294161396315e-07]",0.0
9,53,On plus side LOOK AT THE SKY LAST NIGHT IT WAS...,0,"[0.999999630792904, 3.6920709609690645e-07]",0.0


### 3.2 Prediction with Keras

In [14]:
import numpy as np

Xv = np.array(df_validation.select('features_w2v').collect())
Xv = Xv.reshape(-1, 100)
yv = np.array(df_validation.select('label').collect()).reshape(-1,)

ypr = model.predict(Xv)
ypv = np.round(ypr)

ypv

array([[1.],
       [0.],
       [1.],
       ...,
       [0.],
       [1.],
       [1.]], dtype=float32)

In [15]:
prediction = spark.createDataFrame([(float(l), float(p), [float(1 - r), float(r)]) for l, p, r in zip(yv, ypv, ypr)], schema=['label', 'prediction', 'probability'])
predictions.append(prediction)

prediction.limit(10).toPandas()

Unnamed: 0,label,prediction,probability
0,1.0,1.0,"[0.18710458278656006, 0.8128954172134399]"
1,1.0,0.0,"[0.5118845701217651, 0.48811542987823486]"
2,1.0,1.0,"[0.08511394262313843, 0.9148860573768616]"
3,1.0,1.0,"[0.2835635542869568, 0.7164364457130432]"
4,0.0,0.0,"[0.7994929552078247, 0.2005070447921753]"
5,0.0,0.0,"[0.779953122138977, 0.22004690766334534]"
6,0.0,0.0,"[0.7518106698989868, 0.24818935990333557]"
7,0.0,0.0,"[0.9357646107673645, 0.0642353892326355]"
8,0.0,0.0,"[0.7851623892784119, 0.21483761072158813]"
9,0.0,0.0,"[0.827872633934021, 0.1721273958683014]"


In [16]:
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
evaluatorMulti = MulticlassClassificationEvaluator(labelCol='label', predictionCol='prediction')
import pandas as pd

pd.DataFrame({'model': ['Logistic Regression'] * 2 + ['Naive Bayes'] * 2 + ['Convolutional Neural Network'],
              'features': ['Count', 'TF-IDF'] * 2 + ['Word2Vec'],
              'precision': [evaluatorMulti.evaluate(prediction, {evaluatorMulti.metricName: 'weightedPrecision'}) for prediction in predictions],
              'recall': [evaluatorMulti.evaluate(prediction, {evaluatorMulti.metricName: 'weightedRecall'}) for prediction in predictions],
              'accuracy': [evaluatorMulti.evaluate(prediction, {evaluatorMulti.metricName: 'accuracy'}) for prediction in predictions],
              'F1': [evaluatorMulti.evaluate(prediction, {evaluatorMulti.metricName: 'f1'}) for prediction in predictions]})

Unnamed: 0,model,features,precision,recall,accuracy,F1
0,Logistic Regression,Count,0.8058,0.798494,0.798494,0.792991
1,Logistic Regression,TF-IDF,0.761828,0.759965,0.759965,0.754789
2,Naive Bayes,Count,0.796149,0.79628,0.79628,0.79425
3,Naive Bayes,TF-IDF,0.739969,0.740478,0.740478,0.740186
4,Convolutional Neural Network,Word2Vec,0.719769,0.710806,0.710806,0.695786


In [17]:
import pyspark.sql.functions as sfun

total_pos = df_validation.select(sfun.sum('label')).first()[0]
total_neg = df_validation.count() - total_pos

'{} disastrous Tweets and {} non-disastrous Tweets in validation data set'.format(total_pos, total_neg)

'962 disastrous Tweets and 1296 non-disastrous Tweets in validation data set'

In [18]:
from pyspark.mllib.evaluation import BinaryClassificationMetrics

# Scala version implements .roc() and .pr()
# Python: https://spark.apache.org/docs/latest/api/python/_modules/pyspark/mllib/common.html
# Scala: https://spark.apache.org/docs/latest/api/java/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.html
class CurveMetrics(BinaryClassificationMetrics):
    def __init__(self, *args):
        super(CurveMetrics, self).__init__(*args)

    def _to_list(self, rdd):
        points = []
        # Note this collect could be inefficient for large datasets 
        # considering there may be one probability per datapoint (at most)
        # The Scala version takes a numBins parameter, 
        # but it doesn't seem possible to pass this from Python to Java
        for row in rdd.collect():
            # Results are returned as type scala.Tuple2, 
            # which doesn't appear to have a py4j mapping
            points += [(float(row._1()), float(row._2()))]
        return points

    def get_curve(self, method):
        rdd = getattr(self._java_model, method)().toJavaRDD()
        return self._to_list(rdd)

points = []
mods = []
for mod, prediction in zip(['Logistic Regression', 'Naive Bayes', 'Convolutional Neural Network'], predictions[::2]):
    preds = prediction.select('label','probability').rdd.map(lambda row: (float(row['probability'][1]), float(row['label'])))
    curve = CurveMetrics(preds).get_curve('roc')
    points += curve
    mods += [mod] * len(curve)

points[:20]

[(0.0, 0.0),
 (0.0, 0.0010395010395010396),
 (0.0, 0.002079002079002079),
 (0.0, 0.0031185031185031187),
 (0.0, 0.004158004158004158),
 (0.0, 0.005197505197505198),
 (0.0, 0.006237006237006237),
 (0.0, 0.008316008316008316),
 (0.0, 0.009355509355509356),
 (0.0, 0.010395010395010396),
 (0.0, 0.011434511434511435),
 (0.0, 0.012474012474012475),
 (0.0, 0.013513513513513514),
 (0.0, 0.014553014553014554),
 (0.0, 0.015592515592515593),
 (0.0, 0.016632016632016633),
 (0.0, 0.017671517671517672),
 (0.0, 0.018711018711018712),
 (0.0, 0.01975051975051975),
 (0.0, 0.02079002079002079)]

In [19]:
import pandas as pd

fpr, tpr = zip(*points)
sample = len(points) // 3
df_roc = pd.DataFrame({'True Positive Rate': tpr, 'False Positive Rate': fpr,
                       'Model': mods})
df_roc

Unnamed: 0,True Positive Rate,False Positive Rate,Model
0,0.000000,0.000000,Logistic Regression
1,0.001040,0.000000,Logistic Regression
2,0.002079,0.000000,Logistic Regression
3,0.003119,0.000000,Logistic Regression
4,0.004158,0.000000,Logistic Regression
...,...,...,...
6232,1.000000,0.996142,Convolutional Neural Network
6233,1.000000,0.997685,Convolutional Neural Network
6234,1.000000,0.998457,Convolutional Neural Network
6235,1.000000,1.000000,Convolutional Neural Network


In [20]:
import plotly.io as pio
pio.renderers.default = 'notebook_connected'

In [21]:
import plotly.express as px

fig = px.line(df_roc, x='False Positive Rate', y='True Positive Rate', color='Model',
              title='Receiver Operating Characteristic')
fig.show()