<a href="https://colab.research.google.com/github/sv650s/amazon-review-classification/blob/master/notebooks/deep_learning/6.5.5-LSTMB16-GloVe-problematic-categories-all-ratings-500k.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In this notebook, we will look at review samples from mis-classified examples

We are loading pre-trained model with 500k example dataset

In [0]:
from google.colab import drive
drive.mount('/content/drive')

import sys
DRIVE_DIR = "drive/My Drive/Springboard/capstone"
sys.path.append(DRIVE_DIR)


%tensorflow_version 2.x


import tensorflow as tf
# checl to make sure we are using GPU here
tf.test.gpu_device_name()

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
TensorFlow 2.x selected.


'/device:GPU:0'

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Embedding, \
    SpatialDropout1D, Flatten, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.utils import model_to_dot
from tensorflow.keras.initializers import Constant


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight


import pandas as pd
import numpy as np
from IPython.display import SVG
import pickle
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import os
import logging


import util.dict_util as du
import util.plot_util as pu
import util.file_util as fu
import util.keras_util as ku
import util.report_util as ru

import random

# fix random seeds
tf.compat.v1.reset_default_graph()
tf.compat.v1.set_random_seed(1)
random.seed(1)
np.random.seed(1)

logging.basicConfig(level=logging.ERROR)

%matplotlib inline
sns.set()


DATE_FORMAT = '%Y-%m-%d'
TIME_FORMAT = '%Y-%m-%d %H:%M:%S'
LABEL_COLUMN = "star_rating"
REVIEW_COLUMN = "review_body"


DEBUG = False


LSTM_DIM = 16 # total LSTM units
MODEL_NAME = f"LSTMB{LSTM_DIM}"
ARCHITECTURE = f"1x{LSTM_DIM}"
DESCRIPTION = f"1 Layer {LSTM_DIM} LSTM Units, No Dropout, GloVe Embedding, Balanced Weights"
FEATURE_SET_NAME = "glove"
PATIENCE = 4

SAMPLES = "2m"

if DEBUG:
  DATA_FILE = f'{DRIVE_DIR}/data/amazon_reviews_us_Wireless_v1_00-test-preprocessed.csv'
  MODEL_NAME = f'test-{MODEL_NAME}'
  MISSING_WORDS_FILE = f'{DRIVE_DIR}/reports/glove_embedding-missing_words-test.csv'
  ku.ModelWrapper.set_report_filename('test-dl_prototype-report.csv')
else:
  DATA_FILE = f"{DRIVE_DIR}/data/amazon_reviews_us_Wireless_v1_00-{SAMPLES}-preprocessed.csv"
  MISSING_WORDS_FILE = f'{DRIVE_DIR}/reports/glove_embedding-missing_words-{SAMPLES}.csv'
  ku.ModelWrapper.set_report_filename('glove_embedding-dl_prototype-report.csv')


EMBEDDING_FILE = f'{DRIVE_DIR}/data/embeddings/glove.840B.300d.txt'

# first layer filter
FILTER1 = 32
# Network Settings
KERNEL_SIZE=3



# length of our embedding - 300 is standard
EMBED_SIZE = 300
EPOCHS  = 50
BATCH_SIZE = 128

# From EDA, we know that 90% of review bodies have 100 words or less, 
# we will use this as our sequence length
MAX_SEQUENCE_LENGTH = 100


pd.set_option("max_colwidth", 150)

In [0]:
# Load Report file
report = pd.read_csv(f'{DRIVE_DIR}/reports/glove_embedding-dl_prototype-report.csv', quotechar="'")
report = report[(report.model_name == 'LSTMB16') & (report.train_examples == 373376)]
report

Unnamed: 0,accuracy,architecture,batch_size,class_weight,classification_report,confusion_matrix,description,embedding,epochs,evaluate_time_min,feature_set_name,file,loss,max_sequence_length,model_file,model_json_file,model_name,predict_time_min,roc_auc,sampling_type,status,status_date,test_examples,test_features,tokenizer_file,train_examples,train_features,train_time_min,weights_file,network_history_file
7,0.698575,1x16,128.0,"[1.4190206082718126, 3.030958904109589, 2.2082815826827535, 1.2016727615047613, 0.3730861754985499]","{""1"": {""precision"": 0.6652655889145497, ""recall"": 0.8155719139297848, ""f1-score"": 0.7327906385143729, ""support"": 17660}, ""2"": {""precision"": 0.3740...","[[14403, 704, 1057, 176, 1320], [3971, 876, 1898, 437, 1018], [2019, 567, 3563, 2164, 2990], [588, 139, 1745, 5444, 12847], [669, 56, 662, 2488, 6...","1 Layer 16 LSTM Units, No Dropout, GloVe Embedding, Balanced Weights",300.0,18.0,0.53,glove,drive/My Drive/Springboard/capstone/data/amazon_reviews_us_Wireless_v1_00-500k-preprocessed.csv,0.773573,100.0,drive/My Drive/Springboard/capstone/models/LSTMB16-1x16-glove-sampling_none-497835-100-star_rating-model.h5,drive/My Drive/Springboard/capstone/models/LSTMB16-1x16-glove-sampling_none-497835-100-star_rating-model.json,LSTMB16,0.35,"{""auc_1"": 0.9601084318520167, ""auc_2"": 0.8863719871464448, ""auc_3"": 0.8532126387128502, ""auc_4"": 0.7752074932607105, ""auc_5"": 0.9053329334856144, ...",none,success,2020-02-14 03:07:12,124459.0,100.0,drive/My Drive/Springboard/capstone/models/LSTMB16-1x16-glove-sampling_none-497835-100-star_rating-tokenizer.pkl,373376.0,100.0,11.93,drive/My Drive/Springboard/capstone/models/LSTMB16-1x16-glove-sampling_none-497835-100-star_rating-weights.h5,drive/My Drive/Springboard/capstone/models/LSTMB16-1x16-glove-sampling_none-497835-100-star_rating-history.pkl


In [0]:
DATA_FILE = report.file.values[0]
print(f'Reading datafile: {DATA_FILE}')
df = pd.read_csv(DATA_FILE, encoding='utf8', engine='python')

rating = df[LABEL_COLUMN]
reviews = df[REVIEW_COLUMN]

Reading datafile: drive/My Drive/Springboard/capstone/data/amazon_reviews_us_Wireless_v1_00-500k-preprocessed.csv


# Preprocessing

In [0]:


# pre-process our lables
# one hot encode our star ratings since Keras/TF requires this for the labels
ohe = OneHotEncoder()
y = ohe.fit_transform(rating.values.reshape(len(rating), 1)).toarray()


# split our data into train and test sets
reviews_train, reviews_test, y_train, y_test = train_test_split(reviews, y, random_state=1)

with open(report.tokenizer_file.values[0], 'rb') as file:
  t = pickle.load(file)

# Pre-process our features (review body)
# t = Tokenizer(oov_token="<UNK>")
# fit the tokenizer on the documents
# t.fit_on_texts(reviews_train)
# tokenize both our training and test data
train_sequences = t.texts_to_sequences(reviews_train)
test_sequences = t.texts_to_sequences(reviews_test)

print("Vocabulary size={}".format(len(t.word_counts)))
print("Number of Documents={}".format(t.document_count))


# pad our reviews to the max sequence length
X_train = sequence.pad_sequences(train_sequences, maxlen=MAX_SEQUENCE_LENGTH)
X_test = sequence.pad_sequences(test_sequences, maxlen=MAX_SEQUENCE_LENGTH)

print('Train review vectors shape:', X_train.shape, ' Test review vectors shape:', X_test.shape)


Vocabulary size=73964
Number of Documents=373376
Train review vectors shape: (373376, 100)  Test review vectors shape: (124459, 100)


# Load Our Pre-trained Model

In [0]:
from tensorflow.keras.models import load_model

model = load_model(report.model_file.values[0])

Double checking our accuracy. It should be: 70.60%

In [0]:
scores = model.evaluate(X_test, y_test, verbose=1)




In [0]:
print("Loss: %.2f%%" % (scores[0]*100))
print("Accuracy: %.2f%%" % (scores[1]*100))


Loss: 77.36%
Accuracy: 69.86%


In [0]:
# look at a couple prediction results
y_predict = model.predict(X_test)
y_predict[:5]

array([[9.5919299e-01, 3.6801748e-02, 2.9989763e-03, 2.9485291e-04,
        7.1144250e-04],
       [9.7352004e-01, 2.2524560e-02, 1.7759219e-03, 2.6848080e-04,
        1.9110879e-03],
       [1.5300682e-02, 7.9228029e-02, 4.0516040e-01, 4.3939203e-01,
        6.0918804e-02],
       [8.8656682e-04, 4.5840599e-04, 2.7882075e-03, 5.3637948e-02,
        9.4222885e-01],
       [5.3672753e-03, 2.2732127e-02, 1.2872684e-01, 4.0373909e-01,
        4.3943465e-01]], dtype=float32)

# Unencode our labels back to 1 to 5 so we can look at confusion matrix and classification report

In [0]:
y_test_unencoded = ku.unencode(y_test)
y_predict_unencoded = ku.unencode(y_predict)

In [0]:
from sklearn.metrics import confusion_matrix, classification_report

cr = classification_report(y_test_unencoded, y_predict_unencoded)
print(cr)

              precision    recall  f1-score   support

           1       0.67      0.82      0.73     17660
           2       0.37      0.11      0.17      8200
           3       0.40      0.32      0.35     11303
           4       0.51      0.26      0.35     20763
           5       0.78      0.94      0.85     66533

    accuracy                           0.70    124459
   macro avg       0.54      0.49      0.49    124459
weighted avg       0.65      0.70      0.66    124459



In [0]:
confusion_matrix = confusion_matrix(y_test_unencoded, y_predict_unencoded)
print(confusion_matrix)

[[14403   704  1057   176  1320]
 [ 3971   876  1898   437  1018]
 [ 2019   567  3563  2164  2990]
 [  588   139  1745  5444 12847]
 [  669    56   662  2488 62658]]


In [0]:
classification_report = classification_report(y_test_unencoded, y_predict_unencoded)
print(classification_report)

              precision    recall  f1-score   support

           1       0.67      0.82      0.73     17660
           2       0.37      0.11      0.17      8200
           3       0.40      0.32      0.35     11303
           4       0.51      0.26      0.35     20763
           5       0.78      0.94      0.85     66533

    accuracy                           0.70    124459
   macro avg       0.54      0.49      0.49    124459
weighted avg       0.65      0.70      0.66    124459



In [0]:
result = pd.DataFrame({"test": y_test_unencoded, "predict": y_predict_unencoded})
result.head()

Unnamed: 0,test,predict
0,1,1
1,1,1
2,4,4
3,5,5
4,4,5


In [0]:
# add column to tell use which ones are misclassified
result["correct"] = result.test == result.predict

print(result[result.correct == True].sample(5))

result[result.correct == False].sample(5)

       test  predict  correct
58549     5        5     True
71990     5        5     True
27079     3        3     True
29245     5        5     True
23127     1        1     True


Unnamed: 0,test,predict,correct
77485,4,5,False
84176,1,3,False
78868,4,5,False
30534,4,5,False
99869,3,4,False


# Incorrectly classified 2-star ratings

In [0]:
# reset reviews index so it matches our result dataframe
reviews_reset = reviews_test.reset_index().rename({"index": "index_orig"}, axis=1)

In [0]:
incorrect_result = result[(result.test == 2) & (result.correct == False)]
incorrect_idx = incorrect_result.index.tolist()
incorrect_pd = pd.merge(incorrect_result, reviews_reset.loc[incorrect_idx], left_index = True, right_index = True)
incorrect_pd.sample(min(20, len(incorrect_result)), random_state=1)

Unnamed: 0,test,predict,correct,index_orig,review_body
43683,2,5,False,158088,update ended returning item week use outlet charger quit working ugh hate happens everything included packet need charge samsung galaxy home car c...
22862,2,5,False,229757,change color
63357,2,1,False,389149,ordered antenna jku adaptor connected antenna base wrong size emailed amazon send correct adaptor did refund money unable send right size adaptor ...
69159,2,1,False,347414,case looked very nice however month fell apart tried put back together did not stay intact would not waste money
84102,2,1,False,479027,basically title say fell lap floor didnt break top piece wont stay anymore lasted month
88688,2,1,False,93243,can not get stay dash glove box
107979,2,4,False,347121,looking rubber case do not get hard plastic case rubberized texture also bit darker not bright picture show otherwise fit fine durability cheaper ...
46751,2,3,False,449428,really disappointing though jabra quality brand motorola brand blue came cell phone work much better talk button poorly placed really press hard g...
89512,2,4,False,492251,product since dec paid ti ame worked great got needed go provided nearest taco bell suggested alternate route detects stuck traffic etc however co...
80577,2,1,False,409114,unless use thick temper glass cover button wont flush along will prone accidental press annoying applied two ipads one phone first day week later ...


# Incorrectly classified 3-star ratings

In [0]:
incorrect_result = result[(result.test == 3) & (result.correct == False)]
incorrect_idx = incorrect_result.index.tolist()
incorrect_pd = pd.merge(incorrect_result, reviews_reset.loc[incorrect_idx], left_index = True, right_index = True)
incorrect_pd.sample(min(10, len(incorrect_result)), random_state=1)

Unnamed: 0,test,predict,correct,index_orig,review_body
76774,3,5,False,383852,really excited got case issue pink melty part loose phone yet blue part amazingly snug help phone pink part shook phone would fly also went lock p...
20724,3,4,False,194309,provides good protection amperfections material affect clarity ok product price
88562,3,4,False,285245,product hard set standard 3 ring 3 5 mm port know do work very well price cant turn
38655,3,5,False,138462,affordable not bulky carrier
79221,3,5,False,25034,nice case li ame ordered bright yellow
99383,3,4,False,314942,product came directly china amport posting irony well made much cheaper looking person photo picture wearing bouncing around bag last couple month...
33240,3,5,False,333075,better protector find hard put small screen without messing
12237,3,1,False,383926,car charger still work black plastic piece next connector broke every ti ame unplug wire show put plastic part back
63085,3,1,False,244110,not steady thought long portrayed picture chrysler voyager turned not next wheel
105090,3,4,False,22648,work well enough base little firm though make not conform well enough curve dash spring bit tight liking take little effort should open insert rem...


# 5-star misclassifed as 1-star

In [0]:
incorrect_result = result[(result.test == 5) & (result.predict == 1) & (result.correct == False)]
incorrect_idx = incorrect_result.index.tolist()
incorrect_pd = pd.merge(incorrect_result, reviews_reset.loc[incorrect_idx], left_index = True, right_index = True)
incorrect_pd.sample(min(20, len(incorrect_result)), random_state=1)

Unnamed: 0,test,predict,correct,index_orig,review_body
122845,5,1,False,145567,bought shortly got new blackberry put case hell dropped phone thrown phone sat phone closed car door phone etc phone still work almost 2 year purc...
69737,5,1,False,39244,never power
53678,5,1,False,493212,second one used first one mile bicycle trip first one broken shipped bike back east would not without gps junky
15086,5,1,False,419981,nice little charger like size packability one item found go bag given five star rating performs well high capacity charger seen real complaint des...
10155,5,1,False,358949,thick black silicon case year ala finally tore bottom back tried case either fli amsy thin difficult get ti ames needed dock phone use 1 8 jack di...
51010,5,1,False,233581,got second one did not even fix iphone plastic horrible messed disappointed
9236,5,1,False,321256,otterbox make great case far no matter done always protected phone damage hand belt clip come piece junk break amazon great return policy able get...
3426,5,1,False,23651,plugged ipad charged overnight charged morning no error message others reported reading review hope continues work purchased brand cable stopped w...
48911,5,1,False,284611,tried dozen ti ames update map supposed lifeti ame well ti ame begin something stall waste ti ame mean lot wasted ti ame would purchased another b...
43646,5,1,False,478016,second speck case purchased feel like iphone good new not saved multiple scratch bump protected iphone several big fall concrete glass stair etc f...


# 1-star misclassified as 5-star


In [0]:
incorrect_result = result[(result.test == 1) & (result.predict == 5) & (result.correct == False)]
incorrect_idx = incorrect_result.index.tolist()
incorrect_pd = pd.merge(incorrect_result, reviews_reset.loc[incorrect_idx], left_index = True, right_index = True)
incorrect_pd.sample(min(20, len(incorrect_result)), random_state=1)

Unnamed: 0,test,predict,correct,index_orig,review_body
25724,1,5,False,319122,first got came part looked like fairly easy put got many back very easy put one would not go tried put ammediately shattered
16070,1,5,False,359084,manufacture date stock battery per battery store l ion battery must sold year manufacture go bad case sold bunk battery
79666,1,5,False,90432,band great function would fantastic worked 2 month
103115,1,5,False,149979,love want stock sad
15728,1,5,False,345524,havent recived case yet since orderd month half ago really wish get
23101,1,5,False,68441,great
54734,1,5,False,301879,havent got yet february havent got yet should
102929,1,5,False,434649,third suction mount le 5 year garmin usage product need complete redesign failure rate unacceptable former design engineer would ashamed name atta...
120666,1,5,False,410657,sharp edge do not want put phone afraid will damage phone
84070,1,5,False,361625,old car mount scosche like very much one feel cheaper le substantial tried motorcycle holder part popped base mount couple mile road riding able s...


# 4-star misclassified as 5-star

In [0]:
incorrect_result = result[(result.test == 4) & (result.predict == 5) & (result.correct == False)]
incorrect_idx = incorrect_result.index.tolist()
incorrect_pd = pd.merge(incorrect_result, reviews_reset.loc[incorrect_idx], left_index = True, right_index = True)
incorrect_pd.sample(min(20, len(incorrect_result)), random_state=1)

Unnamed: 0,test,predict,correct,index_orig,review_body
38318,4,5,False,149799,great phone virgin mobile put top excellent price device self battery not good iphone battery life do get hr use one charge
50521,4,5,False,124061,5 year later thing still thumping away will not give earth shattering rumble get deep coiled sub want concealment good price nothing bad say one
55238,4,5,False,126623,perfect purpose like
112587,4,5,False,166065,love phone no complaint price great
65148,4,5,False,490484,phone si ample basic phone no feature phone doe let store phone number doe text message display show character upper case lower case can not speci...
90696,4,5,False,276702,price bargain wish power hey good go kid check yard
22854,4,5,False,106071,nice sturdy case doe not come screen protection will buy separately
118018,4,5,False,323910,got case very fast love order different case place cannotseam take one phone love much look very good gold iphone
108230,4,5,False,93364,took longer anticipated get quality great price not advertised talk phone case shut very helpful
112851,4,5,False,196345,like
