In [15]:
import pandas as pd
import re
import csv
rc = ["""This is my first experience with this type of product, so I don't know if my technique needs improvement. This worked very well on surfaces inside my house, the fine gray house dust just went away with a quick press and remove of this product. In my wife's car, it didn't seem to have enough stickiness to pull away the lint and dust. I suspect that some type of sealer for the plastic surfaces may have been used when she had the car detailed a while back. I remember just a few days after the detailing, there was a sticky feel to the dashboard and other interior surfaces. Even blue painters tape couldn't pull away the dust however, so it's not the fault of this product. So be aware it may not work on some surfaces that have been treated with "sealers"."""]
with open('rew.csv', 'w',newline='') as out_file:
    writer = csv.writer(out_file)
    writer.writerow(('Review',))
    writer.writerow(rc)
mydata = pd.read_csv('rew.csv', delimiter = '\t')

In [16]:
# Define a function to clean the text
def clean(text):
# Removes all special characters and numericals leaving the alphabets
    text = re.sub('[^A-Za-z]+', ' ', text)
    return text

# Cleaning the text in the review column
mydata['Cleaned Reviews'] = mydata['Review'].apply(clean)
mydata.head()

Unnamed: 0,Review,Cleaned Reviews
0,This is my first experience with this type of ...,This is my first experience with this type of ...


In [17]:
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
from nltk import pos_tag
nltk.download('stopwords')
from nltk.corpus import stopwords
nltk.download('wordnet')
from nltk.corpus import wordnet
# POS tagger dictionary
pos_dict = {'J':wordnet.ADJ, 'V':wordnet.VERB, 'N':wordnet.NOUN, 'R':wordnet.ADV}
def token_stop_pos(text):
    tags = pos_tag(word_tokenize(text))
    newlist = []
    for word, tag in tags:
        if word.lower() not in set(stopwords.words('english')):
            newlist.append(tuple([word, pos_dict.get(tag[0])]))
    return newlist

mydata['POS tagged'] = mydata['Cleaned Reviews'].apply(token_stop_pos)
mydata.head()

[nltk_data] Downloading package punkt to C:\Users\Md Alamin
[nltk_data]     Hossain\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Md Alamin
[nltk_data]     Hossain\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Md Alamin
[nltk_data]     Hossain\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Unnamed: 0,Review,Cleaned Reviews,POS tagged
0,This is my first experience with this type of ...,This is my first experience with this type of ...,"[(first, a), (experience, n), (type, n), (prod..."


In [18]:
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
def lemmatize(pos_data):
    lemma_rew = " "
    for word, pos in pos_data:
        if not pos:
            lemma = word
            lemma_rew = lemma_rew + " " + lemma
        else:
            lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
            lemma_rew = lemma_rew + " " + lemma
        return lemma_rew

mydata['Lemma'] = mydata['POS tagged'].apply(lemmatize)
mydata.head()

Unnamed: 0,Review,Cleaned Reviews,POS tagged,Lemma
0,This is my first experience with this type of ...,This is my first experience with this type of ...,"[(first, a), (experience, n), (type, n), (prod...",first


In [19]:
from textblob import TextBlob
# function to calculate subjectivity
def getSubjectivity(review):
    if review== None:
        return 0.0
    return TextBlob(review).sentiment.subjectivity
    # function to calculate polarity
def getPolarity(review):
    if review== None:
        return 0.0
    return TextBlob(review).sentiment.polarity

# function to analyze the reviews
def analysis(score):
    if score < 0:
        return 'Negative'
    elif score == 0:
        return 'Neutral'
    else:
        return 'Positive'

In [20]:
fin_data = pd.DataFrame(mydata[['Review', 'Lemma']])
fin_data['Subjectivity'] = fin_data['Lemma'].apply(getSubjectivity) 
fin_data['Polarity'] = fin_data['Lemma'].apply(getPolarity) 
fin_data['Analysis'] = fin_data['Polarity'].apply(analysis)
fin_data.head()

Unnamed: 0,Review,Lemma,Subjectivity,Polarity,Analysis
0,This is my first experience with this type of ...,first,0.333333,0.25,Positive


In [21]:
tb_counts = fin_data.Analysis.value_counts()
tb_counts

Positive    1
Name: Analysis, dtype: int64

In [22]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
# function to calculate vader sentiment
def vadersentimentanalysis(review):
    if review == None:
        review = ""
    vs = analyzer.polarity_scores(review)
    return vs['compound']
fin_data['Vader Sentiment'] = fin_data['Lemma'].apply(vadersentimentanalysis)
# function to analyse
def vader_analysis(compound):
    if compound >= 0.5:
        return 'Positive'
    elif compound <= -0.5 :
        return 'Negative'
    else:
        return 'Neutral'


fin_data['Vader Analysis'] = fin_data['Vader Sentiment'].apply(vader_analysis)


fin_data.head(9)
vader_counts = fin_data['Vader Analysis'].value_counts()
vader_counts

Neutral    1
Name: Vader Analysis, dtype: int64