In [1]:
#import any necessary libraries
import numpy as np
import pandas as pd
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob



In [2]:
#load the spacy english model (small) and add pipeline
nlp = spacy.load('en_core_web_sm')
nlp.add_pipe('spacytextblob')

<spacytextblob.spacytextblob.SpacyTextBlob at 0x120d1bb80>

In [3]:
#load the dataset into pandas dataframe
amazon_df = pd.read_csv('amazon_product_reviews.csv', low_memory=False)

In [15]:
#display dataframe
print(amazon_df["reviews.text"].head())
print(amazon_df["reviews.text"].shape)
amazon_df["reviews.text"].isnull().sum()

0    I thought it would be as big as small paper bu...
1    This kindle is light and easy to use especiall...
2    Didnt know how much i'd use a kindle so went f...
3    I am 100 happy with my purchase. I caught it o...
4    Solid entry level Kindle. Great for kids. Gift...
Name: reviews.text, dtype: object
(5000,)


0

In [5]:
#drop any rows with missing values and prints
clean_data = amazon_df.dropna(subset =["reviews.text"])
reviews_data = clean_data["reviews.text"]
print(reviews_data.shape)
print(reviews_data.head())

(5000,)
0    I thought it would be as big as small paper bu...
1    This kindle is light and easy to use especiall...
2    Didnt know how much i'd use a kindle so went f...
3    I am 100 happy with my purchase. I caught it o...
4    Solid entry level Kindle. Great for kids. Gift...
Name: reviews.text, dtype: object


In [6]:
#create a function to preprocess 'reviews_data'
def preprocess_text(text):
    
    doc = nlp(text)

    preprocessed_tokens = []

    for token in doc:
        if not token.is_stop and not token.is_punct:
            clean_token = token.lemma_.lower()
            preprocessed_tokens.append(clean_token)

    clean_text = " ".join(preprocessed_tokens)
    
    return clean_text

In [7]:
processed_reviews = reviews_data.apply(preprocess_text)
print(processed_reviews.head())

0    think big small paper turn like palm think sma...
1               kindle light easy use especially beach
2    not know use kindle go low end m happy little ...
3    100 happy purchase catch sale good price norma...
4    solid entry level kindle great kid gifted kid ...
Name: reviews.text, dtype: object


In [9]:
#create function to test sentiment in cleaned data
def sentiment_analysis(clean_data):
    for key, value in clean_data.items():
        doc = nlp(value)
        print(f'Review: {key}')
        print(f'Sentiment: {doc._.blob.polarity}')

In [10]:
#load dataset and clean
amazon_df = pd.read_csv('amazon_product_reviews.csv', sep=',', low_memory=False)
amazon_df = amazon_df.dropna(subset=['reviews.text'])
reviews_data = amazon_df['reviews.text'].iloc[[0, 15, 25, 56, 85, 156, 200, 348, 420, 555, 760]]

sentiment_analysis(reviews_data)

Review: 0
Sentiment: -0.10897435897435898
Review: 15
Sentiment: 0.10000000000000003
Review: 25
Sentiment: 0.3333333333333333
Review: 56
Sentiment: 0.1
Review: 85
Sentiment: -0.0375
Review: 156
Sentiment: 0.5466666666666666
Review: 200
Sentiment: 0.38727272727272727
Review: 348
Sentiment: 0.5698979591836735
Review: 420
Sentiment: 0.8
Review: 555
Sentiment: 0.1
Review: 760
Sentiment: 0.5375


In [11]:
first_review = amazon_df['reviews.text'][203]
second_review = amazon_df['reviews.text'][659]

In [12]:
#load the spacy english model (medium)
nlp = spacy.load('en_core_web_md')

In [13]:
#create function to compare the similarity between two reviews
def similarity (firt, second):
    similarity_results = nlp(first).similarity(second)
    return(similarity_results)

In [14]:
print(f"Review one: {first_review}")
print(f"Review two: {second_review}")
print(f"Similarity: {first_review, second_review}")


Review one: This Echo Show comes in handy. I use it almost as much as I thought I would. I am still learning all that it can do, but my favorite part is the screen. It‚Äôs not a huge deal, but if I could change one thing it would be a detachable power cable instead of hardwired.
Review two: Great item to upgrade your house. Works very well.
Similarity: ('This Echo Show comes in handy. I use it almost as much as I thought I would. I am still learning all that it can do, but my favorite part is the screen. It‚Äôs not a huge deal, but if I could change one thing it would be a detachable power cable instead of hardwired.', 'Great item to upgrade your house. Works very well.')
