### **Data Pre-Processing**

In [170]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import string
import re
import nltk
import textblob

from textblob import TextBlob
from textblob import Word
from spellchecker import SpellChecker
from nltk import tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.probability import FreqDist
from sklearn.feature_extraction.text import CountVectorizer

In [171]:
df_raw = pd.read_excel("amazon_review.xlsx")
df_raw.head(5)

Unnamed: 0,Review Model,Retailer,Review date,Review name,Review rating,Review title,Review Content,Verified Purchase or not,People_find_helpful,vine or not,URL,list price,rating count,overall rating
0,Canon Pixma TS6420a,Amazon,2023-08-10,Ernest Birkholz,5,Works great,🖨 was easy to install and works great.,Verified Purchase,,,https://www.amazon.com/product-reviews/B09TG52...,129.99,285,4.2
1,HP OfficeJet Pro 9015e,Amazon,2022-06-04,mattey,3,spunky mid size printer,🔆Slower print speed than what I’m used to (old...,Verified Purchase,,,https://www.amazon.com/product-reviews/B08QR6P...,289.99,285,4.2
2,Canon PIXMA MG3620,Amazon,2023-03-15,Maria D,4,𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜,𝙻𝚘𝚟𝚎 𝚒𝚝,Verified Purchase,,,https://www.amazon.com/Canon-MG3620-Wireless-P...,79.99,285,4.2
3,Epson - ET-3830,Amazon,2022-11-17,Ryan H,5,Shaq knows what he's talking about,"Yup, this printer is a slam dunk. :)What an up...",Verified Purchase,12.0,,https://www.amazon.com/product-reviews/B096NBP...,399.99,285,4.2
4,HP ENVY 6055e,Amazon,2022-04-09,Sam,3,Not User Friendly,You would think something as simple as reconne...,Verified Purchase,,,https://www.amazon.com/product-reviews/B08XYRV...,129.99,285,4.2


In [172]:
df_raw['vine or not'].fillna(0, inplace=True)
df_raw['People_find_helpful'].fillna(0, inplace=True)
df_raw["Verified Purchase or not"].fillna(0, inplace=True)

In [173]:
df_raw.loc[df_raw["Verified Purchase or not"]=="verified Purchase", "Verified Purchase or not"] = 1
df_raw["Verified Purchase or not"] = df_raw["Verified Purchase or not"].astype('bool')
df_raw.loc[df_raw["vine or not"] == "VINE VOICE", "vine or not"] = 1
df_raw["vine or not"] = df_raw["vine or not"].astype('bool')

In [174]:
df_raw['Review Model'] = df_raw['Review Model'].astype(str)
df_raw['Retailer'] = df_raw['Retailer'].astype(str)
df_raw['Review title'] = df_raw['Review title'].astype(str)
df_raw['Review Content'] = df_raw['Review Content'].astype(str)
df_raw['People_find_helpful'] = df_raw['People_find_helpful'].astype(str)

#### <font color=#FFB703> **Checking for Missing Values** </font>

In [175]:
missing_values = df_raw.isnull().sum()
missing_values

Review Model                 0
Retailer                     0
Review date                  0
Review name                 57
Review rating                0
Review title                 0
Review Content               0
Verified Purchase or not     0
People_find_helpful          0
vine or not                  0
URL                          0
list price                   0
rating count                 0
overall rating               0
dtype: int64

#### <font color=#FFB703> **Summarising Review Rating** </font>

In [176]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10209 entries, 0 to 10208
Data columns (total 14 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Review Model              10209 non-null  object        
 1   Retailer                  10209 non-null  object        
 2   Review date               10209 non-null  datetime64[ns]
 3   Review name               10152 non-null  object        
 4   Review rating             10209 non-null  int64         
 5   Review title              10209 non-null  object        
 6   Review Content            10209 non-null  object        
 7   Verified Purchase or not  10209 non-null  bool          
 8   People_find_helpful       10209 non-null  object        
 9   vine or not               10209 non-null  bool          
 10  URL                       10209 non-null  object        
 11  list price                10209 non-null  float64       
 12  rating count      

#### <font color=#FFB703> **Feature Extraction** </font>

<font color=#61BEB0>**Number of Words:**</font>

In [177]:
# Number of words (Review Content)
df_raw['content_word_count'] = df_raw['Review Content'].apply(lambda x: len(str(x).split(" ")))
df_raw[['Review Content','content_word_count']].head()

Unnamed: 0,Review Content,content_word_count
0,🖨 was easy to install and works great.,8
1,🔆Slower print speed than what I’m used to (old...,79
2,𝙻𝚘𝚟𝚎 𝚒𝚝,2
3,"Yup, this printer is a slam dunk. :)What an up...",84
4,You would think something as simple as reconne...,160


In [178]:
# Number of words (Review title)
df_raw['title_word_count'] = df_raw['Review title'].apply(lambda x: len(str(x).split(" ")))
df_raw[['Review title','title_word_count']].head()

Unnamed: 0,Review title,title_word_count
0,Works great,2
1,spunky mid size printer,4
2,𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜,2
3,Shaq knows what he's talking about,6
4,Not User Friendly,3


<font color=#61BEB0>**Number of Characters:**</font>

In [179]:
# Number of characters (Review Content)
df_raw['content_char_count'] = df_raw['Review Content'].str.len() ## this also includes spaces
df_raw[['Review Content','content_char_count']].head()

Unnamed: 0,Review Content,content_char_count
0,🖨 was easy to install and works great.,38
1,🔆Slower print speed than what I’m used to (old...,460
2,𝙻𝚘𝚟𝚎 𝚒𝚝,7
3,"Yup, this printer is a slam dunk. :)What an up...",469
4,You would think something as simple as reconne...,861


In [180]:
# Number of characters (Review title)
df_raw['title_char_count'] = df_raw['Review title'].str.len() ## this also includes spaces
df_raw[['Review title','title_char_count']].head()

Unnamed: 0,Review title,title_char_count
0,Works great,11
1,spunky mid size printer,23
2,𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜,12
3,Shaq knows what he's talking about,34
4,Not User Friendly,17


<font color=#61BEB0>**Average Word Length:**</font>

In [181]:
# Average word length (Review Content)
def avg_word(sentence):
  words = sentence.split()
  return (sum(len(word) for word in words)/len(words))

df_raw['content_avg_word'] = df_raw['Review Content'].apply(lambda x: avg_word(x))
df_raw[['Review Content','content_avg_word']].head()

Unnamed: 0,Review Content,content_avg_word
0,🖨 was easy to install and works great.,3.875
1,🔆Slower print speed than what I’m used to (old...,4.835443
2,𝙻𝚘𝚟𝚎 𝚒𝚝,3.0
3,"Yup, this printer is a slam dunk. :)What an up...",4.595238
4,You would think something as simple as reconne...,4.3875


In [182]:
# Average word length (Review title)
def avg_word(sentence):
  words = sentence.split()
  return (sum(len(word) for word in words)/len(words))

df_raw['title_avg_word'] = df_raw['Review title'].apply(lambda x: avg_word(x))
df_raw[['Review title','title_avg_word']].head()

Unnamed: 0,Review title,title_avg_word
0,Works great,5.0
1,spunky mid size printer,5.0
2,𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜,5.5
3,Shaq knows what he's talking about,4.833333
4,Not User Friendly,5.0


<font color=#61BEB0>**Number of Stop Words:**</font>

In [183]:
# Number of stop words (Review Content)
stop = stopwords.words('english')

df_raw['content_stopwords'] = df_raw['Review Content'].apply(lambda x: len([x for x in x.split() if x in stop]))
df_raw[['Review Content','content_stopwords']].head()

Unnamed: 0,Review Content,content_stopwords
0,🖨 was easy to install and works great.,3
1,🔆Slower print speed than what I’m used to (old...,19
2,𝙻𝚘𝚟𝚎 𝚒𝚝,0
3,"Yup, this printer is a slam dunk. :)What an up...",31
4,You would think something as simple as reconne...,71


In [184]:
# Number of stop words (Review title)
stop = stopwords.words('english')

df_raw['title_stopwords'] = df_raw['Review title'].apply(lambda x: len([x for x in x.split() if x in stop]))
df_raw[['Review title','title_stopwords']].head()

Unnamed: 0,Review title,title_stopwords
0,Works great,0
1,spunky mid size printer,0
2,𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜,0
3,Shaq knows what he's talking about,2
4,Not User Friendly,0


#### <font color=#FFB703> **Text Pre-Processing** </font>

<font color=#61BEB0>**Brand Extraction:**</font>

In [185]:
df_raw['Brand'] = df_raw['Review Model'].str.split().str[0]
df_raw[['Brand', 'Review Model']].head()

Unnamed: 0,Brand,Review Model
0,Canon,Canon Pixma TS6420a
1,HP,HP OfficeJet Pro 9015e
2,Canon,Canon PIXMA MG3620
3,Epson,Epson - ET-3830
4,HP,HP ENVY 6055e


<font color=#61BEB0>**Lower Casing:**</font>

In [186]:
# Lower casing (Review Content)
df_raw["Review Content"] = df_raw["Review Content"].apply(lambda x: " ".join(x.lower() for x in x.split()))
df_raw["Review Content"].head()

0               🖨 was easy to install and works great.
1    🔆slower print speed than what i’m used to (old...
2                                              𝙻𝚘𝚟𝚎 𝚒𝚝
3    yup, this printer is a slam dunk. :)what an up...
4    you would think something as simple as reconne...
Name: Review Content, dtype: object

In [187]:
# Lower casing (Review title)
df_raw['Review title'] = df_raw['Review title'].apply(lambda x: " ".join(x.lower() for x in x.split()))
df_raw['Review title'].head()

0                           works great
1               spunky mid size printer
2                          𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜
3    shaq knows what he's talking about
4                     not user friendly
Name: Review title, dtype: object

<font color=#61BEB0>**Removing Punctuation:**</font>

In [188]:
# Replace / with spaces (Review Content)
df_raw['Review Content'] = df_raw['Review Content'].str.replace('/',' ', regex=True)
df_raw['Review Content'].head(10)

0               🖨 was easy to install and works great.
1    🔆slower print speed than what i’m used to (old...
2                                              𝙻𝚘𝚟𝚎 𝚒𝚝
3    yup, this printer is a slam dunk. :)what an up...
4    you would think something as simple as reconne...
5          you won't be disappointed with this printer
6    you will spend hours of your time plugging unp...
7    you will get very poor quality black and white...
8    you use an app for setup. not much to explain ...
9    you need to install this without using the hp ...
Name: Review Content, dtype: object

In [189]:
# Replace / with spaces (Review title)
df_raw['Review title'] = df_raw['Review title'].str.replace('/',' ', regex=True)
df_raw['Review title'].head()

0                           works great
1               spunky mid size printer
2                          𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜
3    shaq knows what he's talking about
4                     not user friendly
Name: Review title, dtype: object

In [190]:
# mark each sentence end with " . " to facilitate sentence separation later without affecting word removal
df_raw['Review Content']  = df_raw['Review Content'].str.replace('[\.\?!]+',' . ', regex=True)
df_raw['Review Content'].head()

0             🖨 was easy to install and works great . 
1    🔆slower print speed than what i’m used to (old...
2                                              𝙻𝚘𝚟𝚎 𝚒𝚝
3    yup, this printer is a slam dunk .  :)what an ...
4    you would think something as simple as reconne...
Name: Review Content, dtype: object

In [191]:
# Removing punctuation except sentence ends (Review Content)
df_raw['Review Content'] = df_raw['Review Content'].str.replace('[^\w\s\.]','', regex=True)
df_raw['Review Content'].head()

0               was easy to install and works great . 
1    slower print speed than what im used to old mo...
2                                              𝙻𝚘𝚟𝚎 𝚒𝚝
3    yup this printer is a slam dunk .  what an upg...
4    you would think something as simple as reconne...
Name: Review Content, dtype: object

In [192]:
# Removing punctuation (Review title)
df_raw['Review title'] = df_raw['Review title'].str.replace('[^\w\s]','', regex=True)
df_raw['Review title'].head()

0                          works great
1              spunky mid size printer
2                         𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜
3    shaq knows what hes talking about
4                    not user friendly
Name: Review title, dtype: object

<font color=#61BEB0>**Removal of Stop Words:**</font>

In [193]:
# Removal of stop words (Review Content)
stop = stopwords.words('english')
df_raw['Review Content'] = df_raw['Review Content'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
df_raw['Review Content'].head()

0                           easy install works great .
1    slower print speed im used old model . hpojpro...
2                                              𝙻𝚘𝚟𝚎 𝚒𝚝
3    yup printer slam dunk . upgrade . paid little ...
4    would think something simple reconnecting wifi...
Name: Review Content, dtype: object

In [194]:
# Removal of stop words (Review title)
stop = stopwords.words('english')
df_raw['Review title'] = df_raw['Review title'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
df_raw['Review title'].head()

0                works great
1    spunky mid size printer
2               𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜
3     shaq knows hes talking
4              user friendly
Name: Review title, dtype: object

<font color=#61BEB0>**Spelling Correction:**</font>

In [195]:
spell=SpellChecker()

In [196]:
#find commonly misspelled words
misspelled = {}
for i in df_raw['Review Content']:
    unknown = spell.unknown(i.split())
    for j in unknown:
        misspelled[j] = misspelled.get(j, 0) + 1

errors = pd.DataFrame(list(misspelled.items()), columns= ['word','count']).sort_values('count', ascending=False)

print(errors)

                word  count
2                 hp   2737
12             epson   1050
13            doesnt    937
38             didnt    733
25             youre    304
...              ...    ...
2155  perkssupported      1
2156        devolver      1
2157      actualizar      1
2158            pide      1
5121        ludacris      1

[5122 rows x 2 columns]


In [197]:
print(errors[:50])

                  word  count
2                   hp   2737
12               epson   1050
13              doesnt    937
38               didnt    733
25               youre    304
64               wasnt    303
10                isnt    298
14                  pc    235
66            allinone    170
20                 hps    166
139             inkjet    138
167              pixma    123
197            ecotank    110
90           officejet    106
65                 3rd     95
184                pdf     81
50              theyre     78
27            shouldnt     72
144                  x     70
24             macbook     68
5                   bw     67
78                 2nd     65
17                  xl     64
19                 adf     64
34                thru     64
188             epsons     62
28                   w     61
276                 cd     59
175              hasnt     53
76          chromebook     52
331        doublesided     50
72            twosided     48
191       

In [198]:
#add commonly "misspelled" words related to topic
spell.word_frequency.load_words(['hp', 'canon', 'epson', "pixma", "officejet", "inkjet", "laserjet", "ios", "airprint", "pdf",
                                "deskjet", "2sided", "refillable", "ecotank", "macbook", "cd"])

In [199]:
df_raw['Review title'] = df_raw['Review title'].apply(lambda x: " ".join([spell.correction(x) if spell.correction(x) else x 
                                                          for x in x.split()]))
df_raw['Review title'].head(10)

0                works great
1    spunky mid size printer
2               𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜
3     shag knows hes talking
4              user friendly
5              great printer
6                buy buy buy
7      poor quality printing
8               instructions
9              great printer
Name: Review title, dtype: object

In [200]:
df_raw['Review Content'] = df_raw['Review Content'].apply(lambda x: " ".join([spell.correction(x) if spell.correction(x) else x 
                                                          for x in x.split()]))
df_raw['Review Content'].head(10)

0                           easy install works great .
1    slower print speed im used old model . hpojpro...
2                                               𝙻𝚘𝚟𝚎 i
3    yup printer slam dunk . upgrade . paid little ...
4    would think something simple reconnecting wifi...
5                            wont disappointed printer
6    spend hours time plugging unplugging rebooting...
7    get poor quality black white printing copying ...
8    use app setup . much explain something isn't w...
9    need install without using hp smart app hp sli...
Name: Review Content, dtype: object

In [201]:
#remove any punctuation that was put back in by spell check
df_raw['Review Content'] = df_raw['Review Content'].str.replace('[^\w\s\.]','', regex=True)
df_raw['Review title'] = df_raw['Review title'].str.replace('[^\w\s]','', regex=True)

<font color=#61BEB0>**Removal of Common Words:**</font>

In [202]:
# combine title and content to extract common words
df_raw["Full review"] = df_raw['Review title'] + ' . ' + df_raw['Review Content']

In [203]:
df_raw["Full review"].head()

0             works great . easy install works great .
1    spunky mid size printer . slower print speed i...
2                                𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜 . 𝙻𝚘𝚟𝚎 i
3    shag knows hes talking . yup printer slam dunk...
4    user friendly . would think something simple r...
Name: Full review, dtype: object

In [204]:
# Extract common words from full review
freq = pd.Series(' '.join(df_raw['Full review']).split()).value_counts()[:11]
freq

.           71736
printer     19526
ink          8139
print        7606
hp           6472
use          3677
one          3636
paper        3518
printing     3374
set          3236
get          3164
Name: count, dtype: int64

In [205]:
# exclude ' . ' which we still need for sentence level split later
freq= freq[1:]
freq

printer     19526
ink          8139
print        7606
hp           6472
use          3677
one          3636
paper        3518
printing     3374
set          3236
get          3164
Name: count, dtype: int64

In [206]:
# Remove common words from content
freq = list(freq.index)
df_raw['Review Content'] = df_raw['Review Content'].apply(lambda x: " ".join(x for x in x.split() if x not in freq))
df_raw['Review Content'].head()

0                           easy install works great .
1    slower speed im used old model . hpojpro . 860...
2                                               𝙻𝚘𝚟𝚎 i
3    yup slam dunk . upgrade . paid little front eq...
4    would think something simple reconnecting wifi...
Name: Review Content, dtype: object

In [207]:
# Remove common words from title
df_raw['Review title'] = df_raw['Review title'].apply(lambda x: " ".join(x for x in x.split() if x not in freq))
df_raw['Review title'].head()

0               works great
1           spunky mid size
2              𝙲𝚕𝚎𝚊𝚛 𝚙𝚛𝚒𝚗𝚝𝚜
3    shag knows hes talking
4             user friendly
Name: Review title, dtype: object

<font color=#61BEB0>**Removal of Rare Words:**</font>

In [208]:
# Extract rare words from full review (count=1)
freq = pd.Series(' '.join(df_raw['Full review']).split()).value_counts()
freq = freq.loc[lambda x : (x ==1)]

In [209]:
freq = list(freq.index)
df_raw['Review Content'] = df_raw['Review Content'].apply(lambda x: " ".join(x for x in x.split() if x not in freq))
df_raw['Review Content'].head()

0                           easy install works great .
1    slower speed im used old model . . 8600 new 90...
2                                                    i
3    yup slam . upgrade . paid little front equival...
4    would think something simple reconnecting wifi...
Name: Review Content, dtype: object

In [210]:
df_raw['Review title'] = df_raw['Review title'].apply(lambda x: " ".join(x for x in x.split() if x not in freq))
df_raw['Review title'].head()

0               works great
1           spunky mid size
2                          
3    shag knows hes talking
4             user friendly
Name: Review title, dtype: object

In [211]:
df_raw['Full review'] = df_raw['Full review'].apply(lambda x: " ".join(x for x in x.split() if x not in freq))
df_raw['Full review'].head()

0             works great . easy install works great .
1    spunky mid size printer . slower print speed i...
2                                                  . i
3    shag knows hes talking . yup printer slam . up...
4    user friendly . would think something simple r...
Name: Full review, dtype: object

<font color=#61BEB0>**Stemming:**</font>

In [212]:
# Stemming (Review Content)
st = PorterStemmer()
df_raw['Review Content'].apply(lambda x: " ".join([st.stem(word) for word in x.split()]))

0                                 easi instal work great .
1        slower speed im use old model . . 8600 new 901...
2                                                        i
3        yup slam . upgrad . paid littl front equival t...
4        would think someth simpl reconnect wifi would ...
                               ...                        
10204                                                  can
10205                                                  can
10206                                                  can
10207                                                  can
10208                                                  can
Name: Review Content, Length: 10209, dtype: object

In [213]:
# Stemming (Review title)
st = PorterStemmer()
df_raw['Review title'].apply(lambda x: " ".join([st.stem(word) for word in x.split()]))

0                    work great
1               spunki mid size
2                              
3             shag know he talk
4                 user friendli
                  ...          
10204            quick deliveri
10205                    awesom
10206                      work
10207      el artículo leg root
10208    new work like use good
Name: Review title, Length: 10209, dtype: object

<font color=#61BEB0>**Combine Title and Review:**</font>

In [214]:
# combine title and content to extract common words
df_raw["Full review"] = df_raw['Review title'] + ' . ' + df_raw['Review Content']

<font color=#61BEB0>**Export data files:**</font>

In [217]:
# create unique ID for each row
df_raw['ID'] = df_raw.index + 1

In [231]:
# remove sentence separators and export
df_fullrev = df_raw.copy(deep=True)
df_fullrev['Review Content'] = df_fullrev['Review Content'].str.replace(' \.','', regex=True)
df_fullrev['Full review'] = df_fullrev['Full review'].str.replace(' \.','', regex=True)

df_fullrev.to_excel('amazon_review_processed_full.xlsx', index=False)

In [254]:
# break reviews into list of sentences
df_sents = df_raw.copy(deep=True)
df_sents["Review Content"]= df_sents["Review Content"].apply(lambda x: list(filter(None, x.split(" ."))))
df_sents["Full review"]= df_sents["Full review"].apply(lambda x: list(filter(None, x.split(" ."))))

In [259]:
df_sents1 = df_sents.explode("Full review")
df_sents1.head(10)

Unnamed: 0,Review Model,Retailer,Review date,Review name,Review rating,Review title,Review Content,Verified Purchase or not,People_find_helpful,vine or not,...,title_word_count,content_char_count,title_char_count,content_avg_word,title_avg_word,content_stopwords,title_stopwords,Brand,Full review,ID
0,Canon Pixma TS6420a,Amazon,2023-08-10,Ernest Birkholz,5,works great,[easy install works great],True,0.0,False,...,2,38,11,3.875,5.0,3,0,Canon,works great,1
0,Canon Pixma TS6420a,Amazon,2023-08-10,Ernest Birkholz,5,works great,[easy install works great],True,0.0,False,...,2,38,11,3.875,5.0,3,0,Canon,easy install works great,1
1,HP OfficeJet Pro 9015e,Amazon,2022-06-04,mattey,3,spunky mid size,"[slower speed im used old model, 8600 new 901...",True,0.0,False,...,4,460,23,4.835443,5.0,19,0,HP,spunky mid size,2
1,HP OfficeJet Pro 9015e,Amazon,2022-06-04,mattey,3,spunky mid size,"[slower speed im used old model, 8600 new 901...",True,0.0,False,...,4,460,23,4.835443,5.0,19,0,HP,slower speed im used old model,2
1,HP OfficeJet Pro 9015e,Amazon,2022-06-04,mattey,3,spunky mid size,"[slower speed im used old model, 8600 new 901...",True,0.0,False,...,4,460,23,4.835443,5.0,19,0,HP,8600 new 9015e sounds like breaking apart inside,2
1,HP OfficeJet Pro 9015e,Amazon,2022-06-04,mattey,3,spunky mid size,"[slower speed im used old model, 8600 new 901...",True,0.0,False,...,4,460,23,4.835443,5.0,19,0,HP,setup annoyingly complex luckily researched w...,2
2,Canon PIXMA MG3620,Amazon,2023-03-15,Maria D,4,,[i],True,0.0,False,...,2,7,12,3.0,5.5,0,0,Canon,i,3
3,Epson - ET-3830,Amazon,2022-11-17,Ryan H,5,shag knows hes talking,"[yup slam, upgrade, paid little front equiva...",True,12.0,False,...,6,469,34,4.595238,4.833333,31,2,Epson,shag knows hes talking,4
3,Epson - ET-3830,Amazon,2022-11-17,Ryan H,5,shag knows hes talking,"[yup slam, upgrade, paid little front equiva...",True,12.0,False,...,6,469,34,4.595238,4.833333,31,2,Epson,yup slam,4
3,Epson - ET-3830,Amazon,2022-11-17,Ryan H,5,shag knows hes talking,"[yup slam, upgrade, paid little front equiva...",True,12.0,False,...,6,469,34,4.595238,4.833333,31,2,Epson,upgrade,4


In [260]:
df_sents1.to_excel('amazon_review_processed_sents.xlsx', index=False)