In [1]:
import random

positive = 'Positive'
neutral  = 'Neutral'
negative = 'Negative'

class Review:
    def __init__(self, text, star):
        self.text = text
        self.star = star
        self.eval = self.get_eval()
        
    def get_eval(self):
        if(self.star < 3):
            return negative
        elif(self.star == 3):
            return neutral
        else:
            return positive

class ReviewDist:
    def __init__(self, reviews):
        self.reviews = reviews
    
    def evenly_dist(self):
        neg_rv = list( filter( lambda x: x.eval == negative, self.reviews ) )
        neu_rv = list( filter( lambda x: x.eval == neutral, self.reviews ) )
        pos_rv = list( filter( lambda x: x.eval == positive, self.reviews ))
        
        neg_len = len(neg_rv)
        neu_len = len(neu_rv)
        pos_len = len(pos_rv)
        
#         common_len = min( neg_len, neu_len, pos_len )
        common_len = min( neg_len, pos_len )
        
#         self.reviews = neg_rv[:common_len] + neu_rv[:common_len] + pos_rv[:common_len]
        self.reviews = neg_rv[:common_len] + pos_rv[:common_len]
        random.shuffle( self.reviews )
    
    def get_text(self):
        return [z.text for z in self.reviews]
    
    def get_eval(self):
        return [z.eval for z in self.reviews]

# Read data from JSON file

In [2]:
import json

# Print out or not
say = False

## data
file = 'data/Books_small_10000.json'



reviews = []
with open(file) as f:
    for row in f:
        review = json.loads(row)
        
        if(say):
            print('reviewerID' + ': ' + review['reviewerID'])
            print('asin' + ': ' + review['asin'])
            print('Overall' + ': ' + str( review['overall'] ) )
            print('reviewerName' + ': ' + review['reviewerName'])
            print('reviewText' + ': ' + review['reviewText'] )
        
        reviews.append( Review(review['reviewText'], review['overall']) )
    # End - for
# End - with open

print( reviews[3].text )
print( reviews[3].star )
        

I really enjoyed this adventure and look forward to reading more of Robert Spire. I especially liked all the info on global warming. You did a good job on the research.
4.0


# Data

In [3]:
from sklearn.model_selection import train_test_split
training_set, test_set = train_test_split(reviews, test_size = 0.4, random_state=42)

train_ds = ReviewDist( training_set )
test_ds  = ReviewDist( test_set )

train_ds.evenly_dist()
X_train = train_ds.get_text()
y_train = train_ds.get_eval()

test_ds.evenly_dist()
X_test  = test_ds.get_text()
y_test  = test_ds.get_eval()

print( 'No. of positive reviews: ', y_train.count(positive) )
print( 'No. of neutral reviews: ', y_train.count(neutral) )
print( 'No. of negative reviews: ', y_train.count(negative) )

No. of positive reviews:  393
No. of neutral reviews:  0
No. of negative reviews:  393


In [4]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer  = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec  = vectorizer.transform(X_test)

print( X_train[0] )
print( X_train_vec[0] )

print( X_train_vec.shape )
print( X_test_vec.shape )


I received a copy of this book from Netgalley in exchange for an honest review.Wow.If I had to sum this book up into just one word, there isn&#8217;t another better for the job.  First there is the absolutely breathtakingly stunning cover.  The very moment I saw it, I knew I&#8217;d be obsessed.  It would take a lot for me to hate a book with that cover.  By looking at it, I never would have guessed the story line, but it fits just perfectly.Can you imagine living your entire life without ever truly seeing the sun? Can you imagine never experiencing the beautiful night sky and all its beauty? What would the world look like with only one and not the other, I wonder? Well, apparently Elizabeth Fama wondered too, because she created a complicated alternate universe from that very idea.This is a dystopian that reads so much more than dystopian.  It has a lot of themes and similarities, but it was so much bigger than that box.  I&#8217;ve been reading dystopian novels since 2010 (and longer

# Classifications

### Naive Bayes

In [5]:
from sklearn.naive_bayes import GaussianNB

X_train_vect = X_train_vec.toarray()
X_test_vect  = X_test_vec.toarray()

clf_nb = GaussianNB()
clf_nb.fit(X_train_vect, y_train)


print('Review: \n', X_test[0])
print()
print('Actual review evaluation: ', y_test[0])
print()

print( 'Shape of X_test_vect: ', X_test_vect[0].shape )
print()

print( 'Predicted review evaluation: ', clf_nb.predict( [X_test_vect[0]] )[0] )
print()

score = clf_nb.score(X_test_vect, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%' )

Review: 
 Robin writes such wondrous feel-good romances! I love them! When I pick up one of her books, I know that I will be in for a wonderful, enjoyable read.Yours at Midnight was a treat. Lyric and Quinn are wonderful characters. Quinn is our tortured hero who has been in love with Lyric since childhood. But... Lyric was besties with Quinn's brother. The three of them as next-door neighbors made a lot of memories through the years.Quinn's brother has now tragically passed away. Quinn has been away from home having left shortly after his brother's funeral. He hasn't seen or talked to Lyric in all that time, and he has an apology to make to her. Lyric has a few big secrets of her own. LOL! I wanted to shake her more than once!I connected immediately to both Lyric and Quinn. They're both survivors; each was stubborn and strong. The book's supporting characters and the setting as the time line neared New Year's Eve were perfect. I couldn't put the book down as Lyric's and Quinn's re-dev

### SVM

In [6]:
from sklearn.svm import SVC
clf_svm = SVC(kernel='linear', C=0.025)
clf_svm.fit(X_train_vec, y_train)

print('Review: \n', X_test[0])
print()
print('Actual review evaluation: ', y_test[0])
print()

print( 'Predicted review evaluation: ', clf_svm.predict( X_test_vec[0] )[0] )
print()

score = clf_svm.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%' )

Review: 
 Robin writes such wondrous feel-good romances! I love them! When I pick up one of her books, I know that I will be in for a wonderful, enjoyable read.Yours at Midnight was a treat. Lyric and Quinn are wonderful characters. Quinn is our tortured hero who has been in love with Lyric since childhood. But... Lyric was besties with Quinn's brother. The three of them as next-door neighbors made a lot of memories through the years.Quinn's brother has now tragically passed away. Quinn has been away from home having left shortly after his brother's funeral. He hasn't seen or talked to Lyric in all that time, and he has an apology to make to her. Lyric has a few big secrets of her own. LOL! I wanted to shake her more than once!I connected immediately to both Lyric and Quinn. They're both survivors; each was stubborn and strong. The book's supporting characters and the setting as the time line neared New Year's Eve were perfect. I couldn't put the book down as Lyric's and Quinn's re-dev

### Decision Tree

In [7]:
from sklearn.tree import DecisionTreeClassifier

clf_dt = DecisionTreeClassifier(max_depth=None)
clf_dt.fit(X_train_vec, y_train)


print('Review: \n', X_test[0])
print()
print('Actual review evaluation: ', y_test[0])
print()

print( 'Predicted review evaluation: ', clf_dt.predict( X_test_vec[0] )[0] )
print()

score = clf_dt.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%' )

Review: 
 Robin writes such wondrous feel-good romances! I love them! When I pick up one of her books, I know that I will be in for a wonderful, enjoyable read.Yours at Midnight was a treat. Lyric and Quinn are wonderful characters. Quinn is our tortured hero who has been in love with Lyric since childhood. But... Lyric was besties with Quinn's brother. The three of them as next-door neighbors made a lot of memories through the years.Quinn's brother has now tragically passed away. Quinn has been away from home having left shortly after his brother's funeral. He hasn't seen or talked to Lyric in all that time, and he has an apology to make to her. Lyric has a few big secrets of her own. LOL! I wanted to shake her more than once!I connected immediately to both Lyric and Quinn. They're both survivors; each was stubborn and strong. The book's supporting characters and the setting as the time line neared New Year's Eve were perfect. I couldn't put the book down as Lyric's and Quinn's re-dev

### K-Neighbors Classifier

In [8]:
from sklearn.neighbors import KNeighborsClassifier

clf_kn = KNeighborsClassifier(3)
clf_kn.fit(X_train_vec, y_train)


print('Review: \n', X_test[0])
print()
print('Actual review evaluation: ', y_test[0])
print()

print( 'Predicted review evaluation: ', clf_kn.predict( X_test_vec[0] )[0] )
print()

score = clf_kn.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%' )

Review: 
 Robin writes such wondrous feel-good romances! I love them! When I pick up one of her books, I know that I will be in for a wonderful, enjoyable read.Yours at Midnight was a treat. Lyric and Quinn are wonderful characters. Quinn is our tortured hero who has been in love with Lyric since childhood. But... Lyric was besties with Quinn's brother. The three of them as next-door neighbors made a lot of memories through the years.Quinn's brother has now tragically passed away. Quinn has been away from home having left shortly after his brother's funeral. He hasn't seen or talked to Lyric in all that time, and he has an apology to make to her. Lyric has a few big secrets of her own. LOL! I wanted to shake her more than once!I connected immediately to both Lyric and Quinn. They're both survivors; each was stubborn and strong. The book's supporting characters and the setting as the time line neared New Year's Eve were perfect. I couldn't put the book down as Lyric's and Quinn's re-dev

### Random Forest Classifier

In [9]:
from sklearn.ensemble import RandomForestClassifier

clf_rdf = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
clf_rdf.fit(X_train_vec, y_train)


print('Review: \n', X_test[0])
print()
print('Actual review evaluation: ', y_test[0])
print()

print( 'Predicted review evaluation: ', clf_rdf.predict( X_test_vec[0] )[0] )
print()

score = clf_rdf.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%' )

Review: 
 Robin writes such wondrous feel-good romances! I love them! When I pick up one of her books, I know that I will be in for a wonderful, enjoyable read.Yours at Midnight was a treat. Lyric and Quinn are wonderful characters. Quinn is our tortured hero who has been in love with Lyric since childhood. But... Lyric was besties with Quinn's brother. The three of them as next-door neighbors made a lot of memories through the years.Quinn's brother has now tragically passed away. Quinn has been away from home having left shortly after his brother's funeral. He hasn't seen or talked to Lyric in all that time, and he has an apology to make to her. Lyric has a few big secrets of her own. LOL! I wanted to shake her more than once!I connected immediately to both Lyric and Quinn. They're both survivors; each was stubborn and strong. The book's supporting characters and the setting as the time line neared New Year's Eve were perfect. I couldn't put the book down as Lyric's and Quinn's re-dev

### AdaBoost Classifier

In [10]:
from sklearn.ensemble import AdaBoostClassifier

clf_adb = AdaBoostClassifier()
clf_adb.fit(X_train_vec, y_train)


print('Review: \n', X_test[0])
print()
print('Actual review evaluation: ', y_test[0])
print()

print( 'Predicted review evaluation: ', clf_adb.predict( X_test_vec[0] )[0] )
print()

score = clf_adb.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%' )

Review: 
 Robin writes such wondrous feel-good romances! I love them! When I pick up one of her books, I know that I will be in for a wonderful, enjoyable read.Yours at Midnight was a treat. Lyric and Quinn are wonderful characters. Quinn is our tortured hero who has been in love with Lyric since childhood. But... Lyric was besties with Quinn's brother. The three of them as next-door neighbors made a lot of memories through the years.Quinn's brother has now tragically passed away. Quinn has been away from home having left shortly after his brother's funeral. He hasn't seen or talked to Lyric in all that time, and he has an apology to make to her. Lyric has a few big secrets of her own. LOL! I wanted to shake her more than once!I connected immediately to both Lyric and Quinn. They're both survivors; each was stubborn and strong. The book's supporting characters and the setting as the time line neared New Year's Eve were perfect. I couldn't put the book down as Lyric's and Quinn's re-dev

### Logistic Regression

In [11]:
from sklearn.linear_model import LogisticRegression

clf_lr = LogisticRegression(random_state=0)
clf_lr.fit(X_train_vec, y_train)


print('Review: \n', X_test[0])
print()
print('Actual review evaluation: ', y_test[0])
print()

print( 'Predicted review evaluation: ', clf_lr.predict( X_test_vec[0] )[0] )
print()

score = clf_lr.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%' )

Review: 
 Robin writes such wondrous feel-good romances! I love them! When I pick up one of her books, I know that I will be in for a wonderful, enjoyable read.Yours at Midnight was a treat. Lyric and Quinn are wonderful characters. Quinn is our tortured hero who has been in love with Lyric since childhood. But... Lyric was besties with Quinn's brother. The three of them as next-door neighbors made a lot of memories through the years.Quinn's brother has now tragically passed away. Quinn has been away from home having left shortly after his brother's funeral. He hasn't seen or talked to Lyric in all that time, and he has an apology to make to her. Lyric has a few big secrets of her own. LOL! I wanted to shake her more than once!I connected immediately to both Lyric and Quinn. They're both survivors; each was stubborn and strong. The book's supporting characters and the setting as the time line neared New Year's Eve were perfect. I couldn't put the book down as Lyric's and Quinn's re-dev

# Compare classifiers

In [12]:
print('Naive Bayes')
score = clf_nb.score(X_test_vect, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%\n' )


print('SVM')
score = clf_svm.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%\n' )

print('Decision Tree')
score = clf_dt.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%\n' )

print('K-Neighbors Classifier')
score = clf_kn.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%\n' )

print('Random Forest Classifier')
score = clf_rdf.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%\n' )

print('AdaBoost Classifier')
score = clf_adb.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%\n' )

print('Logistic Regression')
score = clf_lr.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%\n' )

Naive Bayes
Score: 60.8%

SVM
Score: 81.7%

Decision Tree
Score: 62.2%

K-Neighbors Classifier
Score: 58.0%

Random Forest Classifier
Score: 51.8%

AdaBoost Classifier
Score: 74.5%

Logistic Regression
Score: 81.3%



In [13]:
import numpy as np
# F1 scores
from sklearn.metrics import f1_score

print('Naive Bayes')
print('      ', positive, '--', neutral, '--', negative)
score = f1_score(y_test, clf_nb.predict( X_test_vect ), average=None, labels=[positive, negative])
print( 'Score: ' + str( np.round(100.*score,2) ) + '%\n\n' )


print('SVM')
print('      ', positive, '--', neutral, '--', negative)
score = f1_score(y_test, clf_svm.predict( X_test_vect ), average=None, labels=[positive, negative])
print( 'Score: ' + str( np.round(100.*score,2) ) + '%\n\n' )

print('Decision Tree')
print('      ', positive, '--', neutral, '--', negative)
score = f1_score(y_test, clf_dt.predict( X_test_vect ), average=None, labels=[positive, negative])
print( 'Score: ' + str( np.round(100.*score,2) ) + '%\n\n' )

print('K-Neighbors Classifier')
print('      ', positive, '--', neutral, '--', negative)
score = f1_score(y_test, clf_kn.predict( X_test_vect ), average=None, labels=[positive, negative])
print( 'Score: ' + str( np.round(100.*score,2) ) + '%\n\n' )

print('Random Forest Classifier')
print('      ', positive, '--', neutral, '--', negative)
score = f1_score(y_test, clf_rdf.predict( X_test_vect ), average=None, labels=[positive, negative])
print( 'Score: ' + str( np.round(100.*score,2) ) + '%\n\n' )

print('AdaBoost Classifier')
print('      ', positive, '--', neutral, '--', negative)
score = f1_score(y_test, clf_adb.predict( X_test_vect ), average=None, labels=[positive, negative])
print( 'Score: ' + str( np.round(100.*score,2) ) + '%\n\n' )

print('Logistic Regression')
print('      ', positive, '--', neutral, '--', negative)
score = f1_score(y_test, clf_lr.predict( X_test_vect ), average=None, labels=[positive, negative])
print( 'Score: ' + str( np.round(100.*score,2) ) + '%\n\n' )

Naive Bayes
       Positive -- Neutral -- Negative
Score: [58.53 62.76]%


SVM
       Positive -- Neutral -- Negative
Score: [81.82 81.53]%


Decision Tree
       Positive -- Neutral -- Negative
Score: [61.22 63.04]%


K-Neighbors Classifier
       Positive -- Neutral -- Negative
Score: [62.25 52.58]%


Random Forest Classifier
       Positive -- Neutral -- Negative
Score: [14.18 66.48]%


AdaBoost Classifier
       Positive -- Neutral -- Negative
Score: [74.8  74.19]%


Logistic Regression
       Positive -- Neutral -- Negative
Score: [81.57 80.97]%




In [15]:
from sklearn.model_selection import GridSearchCV

params = {'kernel':('linear', 'poly', 'rbf', 'sigmoid'),
          'C':(0.2, 0.5, 1, 4, 8, 16, 32)}

svc = SVC()
clf = GridSearchCV(svc, params, cv=5)
clf.fit( X_train_vec, y_train )

# GridSearchCV(cv=5, error_score=nan,
#              estimator=SVC(C=1.0, break_ties=False, cache_size=200,
#                            class_weight=None, coef0=0.0,
#                            decision_function_shape='ovr', degree=3,
#                            gamma='scale', kernel='rbf', max_iter=-1,
#                            probability=False, random_state=None, shrinking=True,
#                            tol=0.001, verbose=False),
#              iid='deprecated', n_jobs=None,
#              param_grid={'C': (0.2, 0.5, 1, 4, 8, 16, 32),
#                          'kernel': ('linear', 'poly', 'rbf', 'sigmoid')},
#              pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
#              scoring=None, verbose=0)

GridSearchCV(cv=5, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': (0.2, 0.5, 1, 4, 8, 16, 32),
                         'kernel': ('linear', 'poly', 'rbf', 'sigmoid')},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [16]:
print('SVM')
score = clf.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%\n' )

SVM
Score: 80.7%



# Save model

In [20]:
import pickle

# Save model
with open( 'sk_classifier_SVC.pkl', 'wb' ) as f:
    pickle.dump(clf, f)

# Load model
with open('sk_classifier_SVC.pkl', 'rb' ) as f:
    clf_load = pickle.load(f)



print('Review: \n', X_test[0])
print()
print('Actual review evaluation: ', y_test[0])
print()

print( 'Predicted review evaluation: ', clf_load.predict( X_test_vec[0] )[0] )
print()

score = clf_load.score(X_test_vec, y_test)
print( 'Score: ' + str( round(100.*score,1) ) + '%' )

Review: 
 Robin writes such wondrous feel-good romances! I love them! When I pick up one of her books, I know that I will be in for a wonderful, enjoyable read.Yours at Midnight was a treat. Lyric and Quinn are wonderful characters. Quinn is our tortured hero who has been in love with Lyric since childhood. But... Lyric was besties with Quinn's brother. The three of them as next-door neighbors made a lot of memories through the years.Quinn's brother has now tragically passed away. Quinn has been away from home having left shortly after his brother's funeral. He hasn't seen or talked to Lyric in all that time, and he has an apology to make to her. Lyric has a few big secrets of her own. LOL! I wanted to shake her more than once!I connected immediately to both Lyric and Quinn. They're both survivors; each was stubborn and strong. The book's supporting characters and the setting as the time line neared New Year's Eve were perfect. I couldn't put the book down as Lyric's and Quinn's re-dev