### List of tasks
- evaluate the performance of the model on the following metrics
    - precision
    - recall
    - accuracy
    - f-measure

In [1]:
#import packages
import pandas as pd
import numpy as np

In [2]:
#import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

In [3]:
#import files
df_hv = pd.read_csv('APPAREL_ODOM_1_2019.csv')

df_pm = pd.read_csv('APPAREL_ids_1_2019.csv')

In [4]:
#check ground truth
df_hv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 298 entries, 0 to 297
Data columns (total 16 columns):
_id                        298 non-null object
domain_global_string       298 non-null object
review_rating              298 non-null int64
notes                      97 non-null object
review_text                298 non-null object
review_title               278 non-null object
use_sentiment_label        110 non-null object
use_theme_exists           298 non-null int64
fit_sentiment_label        209 non-null object
fit_theme_exists           298 non-null int64
value_sentiment_label      101 non-null object
value_theme_exists         298 non-null int64
style_sentiment_label      120 non-null object
style_theme_exists         298 non-null int64
quality_sentiment_label    189 non-null object
quality_theme_exists       297 non-null float64
dtypes: float64(1), int64(5), object(10)
memory usage: 37.3+ KB


In [5]:
#check predictive model
#note that they do not match number of entries
df_pm.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 15 columns):
_id                        300 non-null object
domain_global_string       300 non-null object
review_rating              300 non-null int64
review_text                300 non-null object
review_title               278 non-null object
use_sentiment_label        146 non-null object
use_theme_exists           156 non-null float64
fit_sentiment_label        220 non-null object
fit_theme_exists           226 non-null float64
value_sentiment_label      93 non-null object
value_theme_exists         99 non-null float64
style_sentiment_label      180 non-null object
style_theme_exists         183 non-null float64
quality_sentiment_label    201 non-null object
quality_theme_exists       215 non-null float64
dtypes: float64(5), int64(1), object(9)
memory usage: 35.2+ KB


In [6]:
#find missing entries in predictive model and remove them
df12 = pd.merge(df_hv, df_pm, on='_id', how='inner')     #extract common rows with merge
df2 = df_pm[~df_pm['_id'].isin(df12['_id'])]

In [7]:
#check missing entries
print(df2)

                 _id domain_global_string  review_rating  \
287  walmart79904828              APPAREL              5   
297    zappos5307009              APPAREL              5   

                                           review_text review_title  \
287  We have tried this product for the last week a...          NaN   
297  I was traveling through Seoul, South Korea whe...          NaN   

    use_sentiment_label  use_theme_exists fit_sentiment_label  \
287                 NaN               NaN                 pos   
297                 NaN               1.0                 pos   

     fit_theme_exists value_sentiment_label  value_theme_exists  \
287               1.0                   NaN                 NaN   
297               1.0                   neg                 1.0   

    style_sentiment_label  style_theme_exists quality_sentiment_label  \
287                   NaN                 NaN                     NaN   
297                   pos                 1.0                

In [8]:
#confirm that missing entries from ground truth is in fact in predictive model before removing them
df_pm.loc[df_pm['_id'] == 'walmart79904828']

Unnamed: 0,_id,domain_global_string,review_rating,review_text,review_title,use_sentiment_label,use_theme_exists,fit_sentiment_label,fit_theme_exists,value_sentiment_label,value_theme_exists,style_sentiment_label,style_theme_exists,quality_sentiment_label,quality_theme_exists
287,walmart79904828,APPAREL,5,We have tried this product for the last week a...,,,,pos,1.0,,,,,,1.0


In [9]:
#remove entries from predictive model that isn't in ground truth
df_pm = df_pm[df_pm['_id'] != 'walmart79904828']

df_pm = df_pm[df_pm['_id'] != 'zappos5307009']

In [10]:
#check to confirm that we have the same number of entries
df_pm.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 298 entries, 0 to 299
Data columns (total 15 columns):
_id                        298 non-null object
domain_global_string       298 non-null object
review_rating              298 non-null int64
review_text                298 non-null object
review_title               278 non-null object
use_sentiment_label        146 non-null object
use_theme_exists           155 non-null float64
fit_sentiment_label        218 non-null object
fit_theme_exists           224 non-null float64
value_sentiment_label      92 non-null object
value_theme_exists         98 non-null float64
style_sentiment_label      179 non-null object
style_theme_exists         182 non-null float64
quality_sentiment_label    200 non-null object
quality_theme_exists       213 non-null float64
dtypes: float64(5), int64(1), object(9)
memory usage: 37.2+ KB


In [11]:
#create confusion matrix df for theme_exists
themes = [ 'use', 'fit', 'value', 'style', 'quality']
confusion_t = []
for t in themes:
    c1 = t + '_theme_exists'
    y_true = df_hv[c1].fillna( 0. )
    y_pred = df_pm[c1].fillna ( 0. )
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()    
    confusion_m = {
        'theme':t,
        'tn':tn,
        'fp':fp,
        'fn':fn, 
        'tp':tp, 
    }
    confusion_t.append( confusion_m )
df_ct = pd.DataFrame( confusion_t )
cols = ['theme', 'tn', 'fp', 'fn', 'tp']
df_ct = df_ct[cols]
df_ct

Unnamed: 0,theme,tn,fp,fn,tp
0,use,133,55,10,100
1,fit,64,32,10,192
2,value,181,22,19,76
3,style,96,82,20,100
4,quality,69,44,16,169


In [12]:
#define functions to calculate metrics
def accuracy(row):
    return (row['tp'] + row['tn']) / (row['tp'] + row['tn'] + row['fp'] + row['fn'])
    
def recall(row):
    return row['tp'] / (row['tp'] + row['fn'])
    
def precision(row):
    return row['tp'] / (row['tp'] + row['fp'])

def fmeasure(row):
    return (2*row['recalls']*row['precisions']) / (row['recalls']+row['precisions'])

In [38]:
df_ct['accuracies'] = df_ct.apply(accuracy, axis=1)
df_ct['recalls'] = df_ct.apply(recall, axis=1)
df_ct['precisions'] = df_ct.apply(precision, axis=1)
df_ct['fmeasures'] = df_ct.apply(fmeasure, axis=1)
df_theme = df_ct.drop(['tn', 'fp', 'fn', 'tp'], axis=1)
df_theme.set_index('theme', inplace=True)
df_theme = df_theme.stack().reset_index()

In [37]:
#combine metrics with theme to create one column for theme_exists
df_theme['metrics'] = df_theme['level_1'].str.cat(df_theme['theme'], sep='.')
df_theme.rename(columns = {0:'theme_exists'}, inplace = True)
df_themes = df_theme.drop(['theme', 'level_1'], axis=1)

In [16]:
#change neg and pos to floats for easy handling of NaNs
sent_map = {'pos':1, 'neg':2}
df_hv['use_sentiment_label'] = df_hv['use_sentiment_label'].map(sent_map)
df_hv['fit_sentiment_label'] = df_hv['fit_sentiment_label'].map(sent_map)
df_hv['value_sentiment_label'] = df_hv['value_sentiment_label'].map(sent_map)
df_hv['style_sentiment_label'] = df_hv['style_sentiment_label'].map(sent_map)
df_hv['quality_sentiment_label'] = df_hv['quality_sentiment_label'].map(sent_map)

df_pm['use_sentiment_label'] = df_pm['use_sentiment_label'].map(sent_map)
df_pm['fit_sentiment_label'] = df_pm['fit_sentiment_label'].map(sent_map)
df_pm['value_sentiment_label'] = df_pm['value_sentiment_label'].map(sent_map)
df_pm['style_sentiment_label'] = df_pm['style_sentiment_label'].map(sent_map)
df_pm['quality_sentiment_label'] = df_pm['quality_sentiment_label'].map(sent_map)

#fill remaining NaNs with float 0. to find confusion matrix for only 1 and 2
df_hv['use_sentiment_label'] = df_hv['use_sentiment_label'].fillna( 0. )
df_hv['fit_sentiment_label'] = df_hv['fit_sentiment_label'].fillna( 0. )
df_hv['value_sentiment_label'] = df_hv['value_sentiment_label'].fillna( 0. )
df_hv['style_sentiment_label'] = df_hv['style_sentiment_label'].fillna( 0. )
df_hv['quality_sentiment_label'] = df_hv['quality_sentiment_label'].fillna( 0. )

df_pm['use_sentiment_label'] = df_pm['use_sentiment_label'].fillna( 0. )
df_pm['fit_sentiment_label'] = df_pm['fit_sentiment_label'].fillna( 0. )
df_pm['value_sentiment_label'] = df_pm['value_sentiment_label'].fillna( 0. )
df_pm['style_sentiment_label'] = df_pm['style_sentiment_label'].fillna( 0. )
df_pm['quality_sentiment_label'] = df_pm['quality_sentiment_label'].fillna( 0. )

In [17]:
#check to make sure things were done properly
df_pm.head()

Unnamed: 0,_id,domain_global_string,review_rating,review_text,review_title,use_sentiment_label,use_theme_exists,fit_sentiment_label,fit_theme_exists,value_sentiment_label,value_theme_exists,style_sentiment_label,style_theme_exists,quality_sentiment_label,quality_theme_exists
0,academy58403947,APPAREL,5,The shorts were excellent in color and style. ...,,0.0,,0.0,,1.0,1.0,1.0,1.0,0.0,
1,adidas100338674,APPAREL,4,Fab running shorts. Only thing I'd change is t...,Retro vibe running!,1.0,1.0,0.0,,0.0,,0.0,,1.0,1.0
2,adidas102938471,APPAREL,5,"Just looks amazing and classy. Plus, it is sup...",Easy and stress free online experience. This f...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,
3,amazonR17DE72WNC7FQM,APPAREL,1,I should have taken a little more time when lo...,Disappointing,0.0,,0.0,,0.0,,0.0,,1.0,1.0
4,amazonR19HL1JO6GEKJK,APPAREL,4,WAS SOME WHAT LARGE FOR 8 AND 9 YEAR OLD AND W...,ASICS WOMWNS CIRCUIT 7 SINGLET X SMAL,0.0,,2.0,1.0,1.0,1.0,0.0,,0.0,


In [18]:
#loop through only sentiment_label for confusion matrix
confusion_s = []
for t in themes:
    c2 = t + '_sentiment_label'
    y_true = df_hv[c2]
    y_pred = df_pm[c2]
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[1, 2]).ravel()
    confusion_ms = {
        'theme':t,
        'tn':tn,
        'fp':fp,
        'fn':fn, 
        'tp':tp, 
    }
    confusion_s.append( confusion_ms )
df_cs = pd.DataFrame( confusion_s )
colss = ['theme', 'tn', 'fp', 'fn', 'tp']
df_cs = df_cs[colss]
df_cs

Unnamed: 0,theme,tn,fp,fn,tp
0,use,88,3,3,2
1,fit,166,3,15,6
2,value,62,1,4,5
3,style,91,5,1,1
4,quality,108,5,26,19


In [33]:
#calculate and create dataframe of metrics for only sentiment_label
df_cs['accuracies'] = df_cs.apply(accuracy, axis=1)
df_cs['recalls'] = df_cs.apply(recall, axis=1)
df_cs['precisions'] = df_cs.apply(precision, axis=1)
df_cs['fmeasures'] = df_cs.apply(fmeasure, axis=1)
df_sent = df_cs.drop(['tn', 'fp', 'fn', 'tp'], axis=1)
df_sent.set_index('theme', inplace=True)
df_sent = df_sent.stack().reset_index()

In [36]:
#combine metrics and theme columns for easy viewing
df_sent['metrics'] = df_sent['level_1'].str.cat(df_sent['theme'], sep='.')

df_sent.rename(columns = {0:'theme_sentiment'}, inplace = True)
df_sents = df_sent.drop(['theme', 'level_1'], axis=1)

In [39]:
#combine both dataframes for one final one to show results
df_final = df_themes.merge(df_sents, left_on='metrics', right_on='metrics')
columns = ['metrics', 'theme_exists', 'theme_sentiment']
df_final = df_final[columns]
df_final

Unnamed: 0,metrics,theme_exists,theme_sentiment
0,accuracies.use,0.781879,0.9375
1,recalls.use,0.909091,0.4
2,precisions.use,0.645161,0.4
3,fmeasures.use,0.754717,0.4
4,accuracies.fit,0.85906,0.905263
5,recalls.fit,0.950495,0.285714
6,precisions.fit,0.857143,0.666667
7,fmeasures.fit,0.901408,0.4
8,accuracies.value,0.862416,0.930556
9,recalls.value,0.8,0.555556


### compared to the below functions, performing standard formula calculations produced the same results.

In [11]:
#find metrics for theme_exists
themes = [ 'use', 'fit', 'value', 'style', 'quality']
theme_exists = []
for t in themes:
    c1 = t + '_theme_exists'
    y_true = df_hv[c1].fillna( 0. )
    y_pred = df_pm[c1].fillna ( 0. )
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    fmeasure = f1_score(y_true, y_pred)
    theme_exist = {
        'theme_exists':t,
        'precision':precision,
        'recall':recall,
        'accuracy':accuracy, 
        'fmeasures':fmeasure, 
    }
    theme_exists.append( theme_exist )
df3 = pd.DataFrame( theme_exists )
df3

Unnamed: 0,accuracy,fmeasures,precision,recall,theme_exists
0,0.781879,0.754717,0.645161,0.909091,use
1,0.85906,0.901408,0.857143,0.950495,fit
2,0.862416,0.787565,0.77551,0.8,value
3,0.657718,0.662252,0.549451,0.833333,style
4,0.798658,0.849246,0.793427,0.913514,quality


In [12]:
#some reason I can't put this in a loop without messing up my metrics loop so here it is in all its glory
#copyandpaste
#change string to float
sent_map = {'pos':1, 'neg':2}
df_hv['use_sentiment_label'] = df_hv['use_sentiment_label'].map(sent_map)
df_hv['fit_sentiment_label'] = df_hv['fit_sentiment_label'].map(sent_map)
df_hv['value_sentiment_label'] = df_hv['value_sentiment_label'].map(sent_map)
df_hv['style_sentiment_label'] = df_hv['style_sentiment_label'].map(sent_map)
df_hv['quality_sentiment_label'] = df_hv['quality_sentiment_label'].map(sent_map)

df_pm['use_sentiment_label'] = df_pm['use_sentiment_label'].map(sent_map)
df_pm['fit_sentiment_label'] = df_pm['fit_sentiment_label'].map(sent_map)
df_pm['value_sentiment_label'] = df_pm['value_sentiment_label'].map(sent_map)
df_pm['style_sentiment_label'] = df_pm['style_sentiment_label'].map(sent_map)
df_pm['quality_sentiment_label'] = df_pm['quality_sentiment_label'].map(sent_map)

In [16]:
#if I put this in a loop, I get whack metric numbers once I fill nas
#copyandpaste
#wonder what i'm doing wrong here...
df_hv['use_sentiment_label'] = df_hv['use_sentiment_label'].fillna( 0. )
df_hv['fit_sentiment_label'] = df_hv['fit_sentiment_label'].fillna( 0. )
df_hv['value_sentiment_label'] = df_hv['value_sentiment_label'].fillna( 0. )
df_hv['style_sentiment_label'] = df_hv['style_sentiment_label'].fillna( 0. )
df_hv['quality_sentiment_label'] = df_hv['quality_sentiment_label'].fillna( 0. )

In [18]:
df_pm['use_sentiment_label'] = df_pm['use_sentiment_label'].fillna( 0. )
df_pm['fit_sentiment_label'] = df_pm['fit_sentiment_label'].fillna( 0. )
df_pm['value_sentiment_label'] = df_pm['value_sentiment_label'].fillna( 0. )
df_pm['style_sentiment_label'] = df_pm['style_sentiment_label'].fillna( 0. )
df_pm['quality_sentiment_label'] = df_pm['quality_sentiment_label'].fillna( 0. )

In [22]:
#my familar loop to find the metrics for theme_sentiment
themes = [ 'use', 'fit', 'value', 'style', 'quality']
theme_sentiments = []

for t in themes:
    c2 = t + '_sentiment_label'
    y_true = df_hv[c2]
    y_pred = df_pm[c2]
    precision = precision_score(y_true, y_pred, average = 'micro')
    recall = recall_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    fmeasure = f1_score(y_true, y_pred, average = 'micro')
    theme_sentiment = {
        'theme_sentiment':t,
        'precision':precision,
        'recall':recall,
        'accuracy':accuracy, 
        'fmeasures':fmeasure, 
    }
    theme_sentiments.append( theme_sentiment )
df4 = pd.DataFrame( theme_sentiments )
df4

Unnamed: 0,accuracy,fmeasures,precision,recall,theme_sentiment
0,0.765101,0.765101,0.765101,0.765101,use
1,0.805369,0.805369,0.805369,0.805369,fit
2,0.838926,0.838926,0.838926,0.838926,value
3,0.634228,0.634228,0.634228,0.634228,style
4,0.66443,0.66443,0.66443,0.66443,quality
