### List of tasks
- evaluate the performance of the model on the following metrics
    - precision
    - recall
    - accuracy
    - f-measure

In [1]:
#import packages
import pandas as pd
import numpy as np

In [2]:
#import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

In [3]:
#import files
df_hv = pd.read_csv('APPAREL_ODOM_1_2019.csv')

df_pm = pd.read_csv('APPAREL_ids_1_2019.csv')

In [4]:
#check ground truth
df_hv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 298 entries, 0 to 297
Data columns (total 16 columns):
_id                        298 non-null object
domain_global_string       298 non-null object
review_rating              298 non-null int64
notes                      97 non-null object
review_text                298 non-null object
review_title               278 non-null object
use_sentiment_label        110 non-null object
use_theme_exists           298 non-null int64
fit_sentiment_label        209 non-null object
fit_theme_exists           298 non-null int64
value_sentiment_label      101 non-null object
value_theme_exists         298 non-null int64
style_sentiment_label      120 non-null object
style_theme_exists         298 non-null int64
quality_sentiment_label    189 non-null object
quality_theme_exists       297 non-null float64
dtypes: float64(1), int64(5), object(10)
memory usage: 37.3+ KB


In [5]:
#check predictive model
#note that they do not match number of entries
df_pm.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 15 columns):
_id                        300 non-null object
domain_global_string       300 non-null object
review_rating              300 non-null int64
review_text                300 non-null object
review_title               278 non-null object
use_sentiment_label        146 non-null object
use_theme_exists           156 non-null float64
fit_sentiment_label        220 non-null object
fit_theme_exists           226 non-null float64
value_sentiment_label      93 non-null object
value_theme_exists         99 non-null float64
style_sentiment_label      180 non-null object
style_theme_exists         183 non-null float64
quality_sentiment_label    201 non-null object
quality_theme_exists       215 non-null float64
dtypes: float64(5), int64(1), object(9)
memory usage: 35.2+ KB


In [6]:
#find missing entries in predictive model and remove them
df12 = pd.merge(df_hv, df_pm, on='_id', how='inner')     #extract common rows with merge
df2 = df_pm[~df_pm['_id'].isin(df12['_id'])]

In [7]:
#check missing entries
print(df2)

                 _id domain_global_string  review_rating  \
287  walmart79904828              APPAREL              5   
297    zappos5307009              APPAREL              5   

                                           review_text review_title  \
287  We have tried this product for the last week a...          NaN   
297  I was traveling through Seoul, South Korea whe...          NaN   

    use_sentiment_label  use_theme_exists fit_sentiment_label  \
287                 NaN               NaN                 pos   
297                 NaN               1.0                 pos   

     fit_theme_exists value_sentiment_label  value_theme_exists  \
287               1.0                   NaN                 NaN   
297               1.0                   neg                 1.0   

    style_sentiment_label  style_theme_exists quality_sentiment_label  \
287                   NaN                 NaN                     NaN   
297                   pos                 1.0                

In [8]:
#confirm that missing entries from ground truth is in fact in predictive model before removing them
df_pm.loc[df_pm['_id'] == 'walmart79904828']

Unnamed: 0,_id,domain_global_string,review_rating,review_text,review_title,use_sentiment_label,use_theme_exists,fit_sentiment_label,fit_theme_exists,value_sentiment_label,value_theme_exists,style_sentiment_label,style_theme_exists,quality_sentiment_label,quality_theme_exists
287,walmart79904828,APPAREL,5,We have tried this product for the last week a...,,,,pos,1.0,,,,,,1.0


In [9]:
#remove entries from predictive model that isn't in ground truth
df_pm = df_pm[df_pm['_id'] != 'walmart79904828']

df_pm = df_pm[df_pm['_id'] != 'zappos5307009']

In [10]:
#check to confirm that we have the same number of entries
df_pm.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 298 entries, 0 to 299
Data columns (total 15 columns):
_id                        298 non-null object
domain_global_string       298 non-null object
review_rating              298 non-null int64
review_text                298 non-null object
review_title               278 non-null object
use_sentiment_label        146 non-null object
use_theme_exists           155 non-null float64
fit_sentiment_label        218 non-null object
fit_theme_exists           224 non-null float64
value_sentiment_label      92 non-null object
value_theme_exists         98 non-null float64
style_sentiment_label      179 non-null object
style_theme_exists         182 non-null float64
quality_sentiment_label    200 non-null object
quality_theme_exists       213 non-null float64
dtypes: float64(5), int64(1), object(9)
memory usage: 37.2+ KB


In [23]:
#find metrics for theme_exists
themes = [ 'use', 'fit', 'value', 'style', 'quality']
theme_exists = []
for t in themes:
    c1 = t + '_theme_exists'
    y_true = df_hv[c1].fillna( 0. )
    y_pred = df_pm[c1].fillna ( 0. )
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    fmeasure = f1_score(y_true, y_pred)
    theme_exist = {
        'theme_exists':t,
        'precision':precision,
        'recall':recall,
        'accuracy':accuracy, 
        'fmeasures':fmeasure, 
    }
    theme_exists.append( theme_exist )
df3 = pd.DataFrame( theme_exists )
df3

Unnamed: 0,accuracy,fmeasures,precision,recall,theme_exists
0,0.781879,0.754717,0.645161,0.909091,use
1,0.85906,0.901408,0.857143,0.950495,fit
2,0.862416,0.787565,0.77551,0.8,value
3,0.657718,0.662252,0.549451,0.833333,style
4,0.798658,0.849246,0.793427,0.913514,quality


In [12]:
#some reason I can't put this in a loop without messing up my metrics loop so here it is in all its glory
#copyandpaste
#change string to float
sent_map = {'pos':1, 'neg':2}
df_hv['use_sentiment_label'] = df_hv['use_sentiment_label'].map(sent_map)
df_hv['fit_sentiment_label'] = df_hv['fit_sentiment_label'].map(sent_map)
df_hv['value_sentiment_label'] = df_hv['value_sentiment_label'].map(sent_map)
df_hv['style_sentiment_label'] = df_hv['style_sentiment_label'].map(sent_map)
df_hv['quality_sentiment_label'] = df_hv['quality_sentiment_label'].map(sent_map)

df_pm['use_sentiment_label'] = df_pm['use_sentiment_label'].map(sent_map)
df_pm['fit_sentiment_label'] = df_pm['fit_sentiment_label'].map(sent_map)
df_pm['value_sentiment_label'] = df_pm['value_sentiment_label'].map(sent_map)
df_pm['style_sentiment_label'] = df_pm['style_sentiment_label'].map(sent_map)
df_pm['quality_sentiment_label'] = df_pm['quality_sentiment_label'].map(sent_map)

In [16]:
#if I put this in a loop, I get whack metric numbers once I fill nas
#copyandpaste
#wonder what i'm doing wrong here...
df_hv['use_sentiment_label'] = df_hv['use_sentiment_label'].fillna( 0. )
df_hv['fit_sentiment_label'] = df_hv['fit_sentiment_label'].fillna( 0. )
df_hv['value_sentiment_label'] = df_hv['value_sentiment_label'].fillna( 0. )
df_hv['style_sentiment_label'] = df_hv['style_sentiment_label'].fillna( 0. )
df_hv['quality_sentiment_label'] = df_hv['quality_sentiment_label'].fillna( 0. )

In [18]:
df_pm['use_sentiment_label'] = df_pm['use_sentiment_label'].fillna( 0. )
df_pm['fit_sentiment_label'] = df_pm['fit_sentiment_label'].fillna( 0. )
df_pm['value_sentiment_label'] = df_pm['value_sentiment_label'].fillna( 0. )
df_pm['style_sentiment_label'] = df_pm['style_sentiment_label'].fillna( 0. )
df_pm['quality_sentiment_label'] = df_pm['quality_sentiment_label'].fillna( 0. )

In [22]:
#my familar loop to find the metrics for theme_sentiment
themes = [ 'use', 'fit', 'value', 'style', 'quality']
theme_sentiments = []

for t in themes:
    c2 = t + '_sentiment_label'
    y_true = df_hv[c2]
    y_pred = df_pm[c2]
    precision = precision_score(y_true, y_pred, average = 'micro')
    recall = recall_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    fmeasure = f1_score(y_true, y_pred, average = 'micro')
    theme_sentiment = {
        'theme_sentiment':t,
        'precision':precision,
        'recall':recall,
        'accuracy':accuracy, 
        'fmeasures':fmeasure, 
    }
    theme_sentiments.append( theme_sentiment )
df4 = pd.DataFrame( theme_sentiments )
df4

Unnamed: 0,accuracy,fmeasures,precision,recall,theme_sentiment
0,0.765101,0.765101,0.765101,0.765101,use
1,0.805369,0.805369,0.805369,0.805369,fit
2,0.838926,0.838926,0.838926,0.838926,value
3,0.634228,0.634228,0.634228,0.634228,style
4,0.66443,0.66443,0.66443,0.66443,quality
