<a href="https://colab.research.google.com/github/sv650s/amazon-review-classification/blob/master/notebooks/deep_learning/6.5.5-LSTMB16-GloVe-problematic-categories-all-ratings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In this notebook, we will look at review samples from mis-classified examples

We are loading pre-trained model with 1mil example dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import sys
DRIVE_DIR = "drive/My Drive/Springboard/capstone"
sys.path.append(DRIVE_DIR)


%tensorflow_version 2.x


import tensorflow as tf
# checl to make sure we are using GPU here
tf.test.gpu_device_name()

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
TensorFlow 2.x selected.


'/device:GPU:0'

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Embedding, \
    SpatialDropout1D, Flatten, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.utils import model_to_dot
from tensorflow.keras.initializers import Constant


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight


import pandas as pd
import numpy as np
from IPython.display import SVG
import pickle
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import os
import logging


import util.dict_util as du
import util.plot_util as pu
import util.file_util as fu
import util.keras_util as ku
import util.report_util as ru

import random

# fix random seeds
tf.compat.v1.reset_default_graph()
tf.compat.v1.set_random_seed(1)
random.seed(1)
np.random.seed(1)

logging.basicConfig(level=logging.ERROR)

%matplotlib inline
sns.set()


DATE_FORMAT = '%Y-%m-%d'
TIME_FORMAT = '%Y-%m-%d %H:%M:%S'
LABEL_COLUMN = "star_rating"
REVIEW_COLUMN = "review_body"


DEBUG = False


LSTM_DIM = 16 # total LSTM units
MODEL_NAME = f"LSTMB{LSTM_DIM}"
ARCHITECTURE = f"1x{LSTM_DIM}"
DESCRIPTION = f"1 Layer {LSTM_DIM} LSTM Units, No Dropout, GloVe Embedding, Balanced Weights"
FEATURE_SET_NAME = "glove"
PATIENCE = 4

SAMPLES = "2m"

if DEBUG:
  DATA_FILE = f'{DRIVE_DIR}/data/amazon_reviews_us_Wireless_v1_00-test-preprocessed.csv'
  MODEL_NAME = f'test-{MODEL_NAME}'
  MISSING_WORDS_FILE = f'{DRIVE_DIR}/reports/glove_embedding-missing_words-test.csv'
  ku.ModelWrapper.set_report_filename('test-dl_prototype-report.csv')
else:
  DATA_FILE = f"{DRIVE_DIR}/data/amazon_reviews_us_Wireless_v1_00-{SAMPLES}-preprocessed.csv"
  MISSING_WORDS_FILE = f'{DRIVE_DIR}/reports/glove_embedding-missing_words-{SAMPLES}.csv'
  ku.ModelWrapper.set_report_filename('glove_embedding-dl_prototype-report.csv')


EMBEDDING_FILE = f'{DRIVE_DIR}/data/embeddings/glove.840B.300d.txt'

# first layer filter
FILTER1 = 32
# Network Settings
KERNEL_SIZE=3



# length of our embedding - 300 is standard
EMBED_SIZE = 300
EPOCHS  = 50
BATCH_SIZE = 128

# From EDA, we know that 90% of review bodies have 100 words or less, 
# we will use this as our sequence length
MAX_SEQUENCE_LENGTH = 100




In [3]:
# Load Report file
report = pd.read_csv(f'{DRIVE_DIR}/reports/glove_embedding-dl_prototype-report.csv', quotechar="'")
report = report[(report.model_name == 'LSTMB16') & (report.train_examples == 746766)]
report

Unnamed: 0,accuracy,architecture,batch_size,class_weight,classification_report,confusion_matrix,description,embedding,epochs,evaluate_time_min,feature_set_name,file,loss,max_sequence_length,model_file,model_json_file,model_name,predict_time_min,roc_auc,sampling_type,status,status_date,test_examples,test_features,tokenizer_file,train_examples,train_features,train_time_min,weights_file,network_history_file
2,0.706036,1x16,128.0,"[1.4228484463085092, 3.0270973626206583, 2.210...","{""1"": {""precision"": 0.6911714605538521, ""recal...","[[28403, 1636, 2325, 582, 2217], [7276, 2016, ...","1 Layer 16 LSTM Units, No Dropout, GloVe Embed...",300.0,16.0,0.91,glove,drive/My Drive/Springboard/capstone/data/amazo...,0.753876,100.0,drive/My Drive/Springboard/capstone/models/LST...,drive/My Drive/Springboard/capstone/models/LST...,LSTMB16,0.6,"{""auc_1"": 0.9629269444937428, ""auc_2"": 0.89005...",none,success,2020-02-07 23:19:59,248922.0,100.0,drive/My Drive/Springboard/capstone/models/dl-...,746766.0,100.0,20.65,drive/My Drive/Springboard/capstone/models/LST...,


In [4]:
DATA_FILE = report.file.values[0]
print(f'Reading datafile: {DATA_FILE}')
df = pd.read_csv(DATA_FILE, encoding='utf8', engine='python')

rating = df[LABEL_COLUMN]
reviews = df[REVIEW_COLUMN]

Reading datafile: drive/My Drive/Springboard/capstone/data/amazon_reviews_us_Wireless_v1_00-1m-preprocessed.csv


# Preprocessing

In [5]:


# pre-process our lables
# one hot encode our star ratings since Keras/TF requires this for the labels
ohe = OneHotEncoder()
y = ohe.fit_transform(rating.values.reshape(len(rating), 1)).toarray()


# split our data into train and test sets
reviews_train, reviews_test, y_train, y_test = train_test_split(reviews, y, random_state=1)

with open(report.tokenizer_file.values[0], 'rb') as file:
  t = pickle.load(file)

# Pre-process our features (review body)
# t = Tokenizer(oov_token="<UNK>")
# fit the tokenizer on the documents
# t.fit_on_texts(reviews_train)
# tokenize both our training and test data
train_sequences = t.texts_to_sequences(reviews_train)
test_sequences = t.texts_to_sequences(reviews_test)

print("Vocabulary size={}".format(len(t.word_counts)))
print("Number of Documents={}".format(t.document_count))


# pad our reviews to the max sequence length
X_train = sequence.pad_sequences(train_sequences, maxlen=MAX_SEQUENCE_LENGTH)
X_test = sequence.pad_sequences(test_sequences, maxlen=MAX_SEQUENCE_LENGTH)

print('Train review vectors shape:', X_train.shape, ' Test review vectors shape:', X_test.shape)


Vocabulary size=566
Number of Documents=74
Train review vectors shape: (746766, 100)  Test review vectors shape: (248922, 100)


# Load Our Pre-trained Model

In [0]:
from tensorflow.keras.models import load_model

model = load_model(report.model_file.values[0])

Double checking our accuracy. It should be: 70.60%

In [7]:
scores = model.evaluate(X_test, y_test, verbose=1)


Accuracy: 47.68%


In [21]:
print("Loss: %.2f%%" % (scores[0]*100))
print("Accuracy: %.2f%%" % (scores[1]*100))


Loss: 165.64%
Accuracy: 47.68%


In [8]:
y_predict = model.predict(X_test)
y_predict[:5]

array([[0.01989064, 0.02191228, 0.10914093, 0.5643757 , 0.28468046],
       [0.1958018 , 0.08149023, 0.13391519, 0.18083635, 0.40795645],
       [0.00592806, 0.016965  , 0.09245069, 0.3508915 , 0.5337648 ],
       [0.12215877, 0.05809065, 0.10037878, 0.16314633, 0.5562254 ],
       [0.00149657, 0.00171353, 0.00779722, 0.07453021, 0.91446245]],
      dtype=float32)

# Unencode our labels back to 1 to 5 so we can look at confusion matrix and classification report

In [0]:
y_test_unencoded = ku.unencode(y_test)
y_predict_unencoded = ku.unencode(y_predict)

In [10]:
from sklearn.metrics import confusion_matrix, classification_report

cr = classification_report(y_test_unencoded, y_predict_unencoded)
print(cr)

              precision    recall  f1-score   support

           1       0.27      0.27      0.27     35163
           2       0.08      0.00      0.00     16498
           3       0.11      0.06      0.07     22359
           4       0.19      0.12      0.15     41315
           5       0.59      0.77      0.67    133587

    accuracy                           0.48    248922
   macro avg       0.25      0.24      0.23    248922
weighted avg       0.40      0.48      0.43    248922



In [11]:
confusion_matrix = confusion_matrix(y_test_unencoded, y_predict_unencoded)
print(confusion_matrix)

[[  9419    142   2643   3758  19201]
 [  3660     39   1056   1915   9828]
 [  4138     74   1259   2645  14243]
 [  5164     73   1759   5062  29257]
 [ 13072    180   4682  12756 102897]]


In [20]:
classification_report = classification_report(y_test_unencoded, y_predict_unencoded)
print(classification_report)

              precision    recall  f1-score   support

           1       0.27      0.27      0.27     35163
           2       0.08      0.00      0.00     16498
           3       0.11      0.06      0.07     22359
           4       0.19      0.12      0.15     41315
           5       0.59      0.77      0.67    133587

    accuracy                           0.48    248922
   macro avg       0.25      0.24      0.23    248922
weighted avg       0.40      0.48      0.43    248922



In [12]:
result = pd.DataFrame({"test": y_test_unencoded, "predict": y_predict_unencoded})
result.head()

Unnamed: 0,test,predict
0,5,4
1,5,5
2,5,5
3,5,5
4,5,5


In [13]:
# add column to tell use which ones are misclassified
result["correct"] = result.test == result.predict

print(result[result.correct == True].sample(5))

result[result.correct == False].sample(5)

        test  predict  correct
224739     5        5     True
41968      4        4     True
107973     1        1     True
185766     5        5     True
228159     5        5     True


Unnamed: 0,test,predict,correct
135350,1,5,False
204381,5,4,False
210048,3,5,False
143114,3,4,False
84762,5,3,False


# Incorrectly classified 2-star ratings

In [0]:
# reset reviews index so it matches our result dataframe
reviews_reset = reviews_test.reset_index().rename({"index": "index_orig"}, axis=1)

In [15]:
incorrect_result = result[(result.test == 2) & (result.correct == False)]
incorrect_idx = incorrect_result.index.tolist()
incorrect_pd = pd.merge(incorrect_result, reviews_reset.loc[incorrect_idx], left_index = True, right_index = True)
incorrect_pd.sample(min(20, len(incorrect_result)), random_state=1)

Unnamed: 0,test,predict,correct,index_orig,review_body
134304,2,5,False,84875,doe job opening closing case three month span ...
27989,2,1,False,375230,did not fit phone
247247,2,5,False,76324,broke one drop
158395,2,1,False,508575,bought use black decker automotive ithis thing...
103781,2,5,False,277390,ok
176949,2,5,False,83358,worked well month intermittently not plan get ...
62449,2,1,False,645145,looked nice did not quite fit correctly starte...
13687,2,5,False,447991,work ok long do not drop phone good look not p...
41198,2,1,False,487558,no ac connection button stop working volume co...
186454,2,5,False,758078,keep phone protected scratch very easily bubbl...


# Incorrectly classified 3-star ratings

In [16]:
incorrect_result = result[(result.test == 3) & (result.correct == False)]
incorrect_idx = incorrect_result.index.tolist()
incorrect_pd = pd.merge(incorrect_result, reviews_reset.loc[incorrect_idx], left_index = True, right_index = True)
incorrect_pd.sample(min(10, len(incorrect_result)), random_state=1)

Unnamed: 0,test,predict,correct,index_orig,review_body
8522,3,4,False,654949,feel bit bulky got cheap no biggie diztronic m...
124200,3,5,False,960329,no instruction kind included product ruined fi...
234846,3,4,False,893776,obviously love one direction quite excited com...
136664,3,5,False,933127,received item mail sooner expected thought cas...
26496,3,4,False,839882,bought always like phone charged even work gir...
35225,3,5,False,312632,ok quality price
18023,3,5,False,492734,shell charger great bought 3 different one pas...
204139,3,4,False,21543,good looking blue case also rubbery finish mak...
140724,3,5,False,170966,phone problem
188355,3,5,False,712031,freind bought device love unfortunatley mine f...


# 5-star misclassifed as 1-star

In [17]:
incorrect_result = result[(result.test == 5) & (result.predict == 1) & (result.correct == False)]
incorrect_idx = incorrect_result.index.tolist()
incorrect_pd = pd.merge(incorrect_result, reviews_reset.loc[incorrect_idx], left_index = True, right_index = True)
incorrect_pd.sample(min(20, len(incorrect_result)), random_state=1)

Unnamed: 0,test,predict,correct,index_orig,review_body
58124,5,1,False,114275,great case fit well especially price
72659,5,1,False,269316,advertised make hay cheap gift
60574,5,1,False,220114,would buy transaction went well around
196777,5,1,False,427664,work well harley needed
178598,5,1,False,897576,cute two piece cover pink rubbery mold white h...
41836,5,1,False,990414,bought item father christams work excellent ac...
97442,5,1,False,433932,great accessory mophie case
95342,5,1,False,713892,pink pretty wow wow wow love pink match shirt ...
206981,5,1,False,135758,work look cute
48635,5,1,False,574999,great long cord android product love give way ...


# 1-star misclassified as 5-star


In [18]:
incorrect_result = result[(result.test == 1) & (result.predict == 5) & (result.correct == False)]
incorrect_idx = incorrect_result.index.tolist()
incorrect_pd = pd.merge(incorrect_result, reviews_reset.loc[incorrect_idx], left_index = True, right_index = True)
incorrect_pd.sample(min(20, len(incorrect_result)), random_state=1)

Unnamed: 0,test,predict,correct,index_orig,review_body
134038,1,5,False,152483,glass way big use instead covering flat portio...
178143,1,5,False,80856,can not get air bubble front screen
150138,1,5,False,38130,doe not hold phone well started falling apart ...
146576,1,5,False,280710,dont buy never worked
118325,1,5,False,46442,worked good month ear piece dead can not retur...
192229,1,5,False,730464,purchased item new received refurbished phone ...
37423,1,5,False,266666,cracked dropped carpet crack spread 3 4 length...
129517,1,5,False,151251,product arrive damaged do
155880,1,5,False,910408,screw wide iphone screw inserted iphone cable ...
71920,1,5,False,430622,stopped working within first week


# 4-star misclassified as 5-star

In [19]:
incorrect_result = result[(result.test == 4) & (result.predict == 5) & (result.correct == False)]
incorrect_idx = incorrect_result.index.tolist()
incorrect_pd = pd.merge(incorrect_result, reviews_reset.loc[incorrect_idx], left_index = True, right_index = True)
incorrect_pd.sample(min(20, len(incorrect_result)), random_state=1)

Unnamed: 0,test,predict,correct,index_orig,review_body
181038,4,5,False,470553,might start falling
38730,4,5,False,759193,second one first one lasted month broke taking...
77590,4,5,False,124931,love rubber durable not super bulky gave 4 dis...
245,4,5,False,525892,perfect working
161296,4,5,False,241785,good case
202905,4,5,False,332148,easy apply trashed two ended dust probably eas...
22066,4,5,False,415553,love everything case except button power butto...
181980,4,5,False,991487,first look phone will tempted buy one remember...
245341,4,5,False,127012,absolutely loved color loved leather aspect to...
178768,4,5,False,615674,freedom spot photon last 8 9 month work great ...
