# LOAD IMPORTS

In [1]:
import re
import shap
import random
import string
import pickle
import pandas as pd
from scipy import sparse

import nltk
from nltk.corpus import stopwords 
nltk.download("stopwords")  
nltk.download('punkt')
nltk.download('wordnet')     
from nltk.stem import WordNetLemmatizer

[nltk_data] Downloading package stopwords to C:\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to C:\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# LOAD HELPERS

In [2]:
def parse_single_input(description):
        description = re.sub("[^a-zA-Z]"," ",description)
        description = description.lower()   
        description = nltk.word_tokenize(description)
        lemma = nltk.WordNetLemmatizer()
        description = [ lemma.lemmatize(word) for word in description]
        description = " ".join(description)
        return description

In [3]:
vectorizer = pickle.load(open('vectorizer.pk', 'rb'))

In [4]:
model = pickle.load(open('prediction_model.sav', 'rb'))

In [5]:
x_train = sparse.load_npz("x_train.npz")

# LOAD FILE

In [6]:
file = pd.read_csv('fitbit(30).csv', sep='\t', encoding = 'utf-8')
file

Unnamed: 0,Name,Title,Review,Rating,Prediction
0,Amazon Customer,Not worth the hassle,The strap is really hard to change. I managed ...,1.0,0.526897
1,Aubrey,Listen to the poor reviews,I've used fitbit products for years. My well l...,1.0,0.687864
2,Amazon Customer,It stopped working after 3 months,My fitbit stopped working after 3 months. The ...,2.0,2.005359
3,Sophia,scratched and not that great,I have had this watch for less than a month an...,3.0,3.050132
4,Cassandra Reising,All in all a good watch,I have had this for about a month and am very ...,4.0,3.999938
5,Gearhead,Works as designed.,Bought this for my girlfriend. Thought I was t...,4.0,4.057881
6,speedemonj,Great for fitness,"Great for fitness, sleep, etc but you have to ...",4.0,4.112775
7,Tyler,Good bang for your buck,I had a Fitbit blaze before I got the versa 2 ...,4.0,4.344578
8,Kindle Customer,HUGE improvement over the first Fitbit Versa,My first Fitbit Versa was great and died 4 mon...,5.0,4.50267
9,Nicole Quigley,Love It!!,After buying a cheaper type of fitness tracker...,5.0,4.955996


# LOOP OVER RECORDS

In [7]:
def predictRecord(record):
    input_score = int(record.Rating)
    input_review = record.Review
    
    new_x = parse_single_input(input_review)
    new_y = input_score
    
    input_array = []
    input_array.append(new_x)
    new_result = vectorizer.transform(input_array)
    new_prediction = model.predict(new_result)
    
    explainer = shap.LinearExplainer(model, x_train, feature_perturbation="interventional")
    shap_values = explainer.shap_values(new_result)
    
    df_shap = pd.DataFrame({'shap': explainer.shap_values(new_result)[0], 'term': vectorizer.get_feature_names(), 'occurrence': new_result.toarray()[0]})
        
    return df_shap

In [8]:
def getSHAPValues(df_shap):
    df_shap_lower = df_shap.sort_values(by=['shap'],ascending=True).head(20)
    df_shap_higher = df_shap.sort_values(by=['shap'],ascending=False).head(20)
    df_result = pd.concat([df_shap_higher, df_shap_lower])
    df_result['orientation'] = ["negative" if x < 0 else "positive" for x in df_result['shap']]   
    
    # Sort in order of summary plot
    n = df_result.reindex(df_result.shap.abs().sort_values(ascending=False).index)
    return n.head(20)

In [9]:
def getFactor(shapvalue, occurrence):
    factor = shapvalue
    if (occurrence > 1):
        factor = factor / occurrence
        
    return round(abs(factor),2)

In [10]:
def createTermID(term, shap):
    k = str(abs(shap)).replace("0.","")
    q = ''.join(random.choice(string.ascii_uppercase) for i in range(3))
    return q + k + "_" + term

In [11]:
def parseLine(df_result):
    factors = []
    ids = []
    for i in df_result.index:
        row = df_result.loc[i]
        shap = row['shap']
        term = row['term']
        occ = row['occurrence']
        factor = getFactor(shap, occ)
        identifier = createTermID(term, shap)
        factors.append(factor)
        ids.append(identifier)
    new = df_result.copy()
    new['factor'] = factors
    new['id'] = ids
    return new

In [12]:
def getJSON(df_og):
    result = "["
    df_result = parseLine(df_og)
    length = len(df_result.index)
    for i in df_result.index:
        row = df_result.loc[i].to_json()
        if (length == 1):
            result += row
        else:
            t = row + ","
            result += t
        length -= 1
    result += "]"
    return result

In [14]:
# LOOP
json_collection = []
result = file.copy()

for i in file.index:
    record = file.loc[i]
    p = predictRecord(record)
    sv = getSHAPValues(p)
    jayson = getJSON(sv)
    json_collection.append(jayson)
    
# append jayson to record
result['Shap_json'] = json_collection
result.index.name = "Iterator"
result.to_csv('fitbit_shap.csv', sep='\t')
result

Unnamed: 0_level_0,Name,Title,Review,Rating,Prediction,Shap_json
Iterator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,Amazon Customer,Not worth the hassle,The strap is really hard to change. I managed ...,1.0,0.526897,"[{""shap"":-1.2216773556,""term"":""attach"",""occurr..."
1,Aubrey,Listen to the poor reviews,I've used fitbit products for years. My well l...,1.0,0.687864,"[{""shap"":-1.0183404569,""term"":""poor"",""occurren..."
2,Amazon Customer,It stopped working after 3 months,My fitbit stopped working after 3 months. The ...,2.0,2.005359,"[{""shap"":1.2231704767,""term"":""prompted"",""occur..."
3,Sophia,scratched and not that great,I have had this watch for less than a month an...,3.0,3.050132,"[{""shap"":1.3777308968,""term"":""alta"",""occurrenc..."
4,Cassandra Reising,All in all a good watch,I have had this for about a month and am very ...,4.0,3.999938,"[{""shap"":0.4017336522,""term"":""amazing"",""occurr..."
5,Gearhead,Works as designed.,Bought this for my girlfriend. Thought I was t...,4.0,4.057881,"[{""shap"":-0.4577140271,""term"":""apps"",""occurren..."
6,speedemonj,Great for fitness,"Great for fitness, sleep, etc but you have to ...",4.0,4.112775,"[{""shap"":0.31252238,""term"":""great"",""occurrence..."
7,Tyler,Good bang for your buck,I had a Fitbit blaze before I got the versa 2 ...,4.0,4.344578,"[{""shap"":-1.0821097325,""term"":""horrible"",""occu..."
8,Kindle Customer,HUGE improvement over the first Fitbit Versa,My first Fitbit Versa was great and died 4 mon...,5.0,4.50267,"[{""shap"":0.7109636365,""term"":""great"",""occurren..."
9,Nicole Quigley,Love It!!,After buying a cheaper type of fitness tracker...,5.0,4.955996,"[{""shap"":0.6609941128,""term"":""motivates"",""occu..."
