# Scoring Mechanism to Quantify Different Emotions

In [1]:
import numpy as np
import pandas as pd
import nltk, string
from scipy import spatial
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
import spacy
import ast
import re
import warnings
warnings.filterwarnings('ignore')

nltk.download('punkt') 

embeddings = {}
with open("data/AffectVec-v2.0-w2v.txt", 'r') as file:
    for line in file:
        word = line.split()[0]
        embeddings[word] = np.asarray(line.split()[1:], "float32")

[nltk_data] Downloading package punkt to /Users/madhu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## 1. Developing LIWC-Inspired Scoring Mechanism

In [14]:
def nearest_neighbors(word):
    return sorted(embeddings.keys(), key=lambda w: spatial.distance.euclidean(embeddings[w], word))

In [15]:
nlp = spacy.load('en_core_web_lg')

def compute_similarity(word1, word2):
    token1, token2 = nlp(word1), nlp(word2)
    return token1.similarity(token2)

In [18]:
df = pd.read_csv('liwc.csv', names=["Category", "Abbrev", "Examples", "5 Nearest Neighbors", "All Neighbors"])

for ind in df.index:
    value = df["Examples"][ind]
    val = list(value.split(","))
    df['Examples'][ind] = val

for ind in df.index:
    examples = df["Examples"][ind]
    neighbors = []
    for example in examples:
        ex_neigh = []
        try:
            nn = nearest_neighbors(embeddings[example.lower().replace("'", "")])[1:6]
            for n in nn:
                ex_neigh.append(n)
        except KeyError:
            pass
        
        neighbors.append(ex_neigh)      
    df['5 Nearest Neighbors'][ind] = neighbors

In [19]:
for ind in df.index:
    examples = df["Examples"][ind]
    neighbors = []
    for example in examples:
        ex_neigh = []
        try:
            nn = nearest_neighbors(embeddings[example.lower().replace("'", "")])
            for n in nn:
                ex_neigh.append(n)
        except KeyError:
            pass
        
        neighbors.append(ex_neigh)      
    df['All Neighbors'][ind] = neighbors

In [20]:
df

Unnamed: 0,Category,Abbrev,Examples,5 Nearest Neighbors,All Neighbors
0,Total function words,funct,"[it, to, no, very]","[[ey, ec, him, ti, es], [per, at, pour, of, au...","[[it, ey, ec, him, ti, es, did, both, could, t..."
1,Total pronouns,pronoun,"[I, them, itself]","[[li, je, ich, j, me], [thier, ones, where, al...","[[i, li, je, ich, j, me, es, ti, te, jag, my, ..."
2,Personal pronouns,ppron,"[I, them, her]","[[li, je, ich, j, me], [thier, ones, where, al...","[[i, li, je, ich, j, me, es, ti, te, jag, my, ..."
3,First pers singular,i,"[I, me, mine]","[[li, je, ich, j, me], [could, am, did, i, ai]...","[[i, li, je, ich, j, me, es, ti, te, jag, my, ..."
4,First pers plural,we,"[we, us, our]","[[ours, ourselves, nous, our, notre], [america...","[[we, ours, ourselves, nous, our, notre, mysel..."
...,...,...,...,...,...
66,Swear words,swear,"[fuck, damn, shit]","[[fucking, cunt, damn, goddam, fucker], [godda...","[[fuck, fucking, cunt, damn, goddam, fucker, b..."
67,Netspeak,netspeak,"[btw, lol, thx]","[[yah, yup, atleast, yea, yep], [jk, lola, iz,...","[[btw, yah, yup, atleast, yea, yep, thats, vid..."
68,Assent,assent,"[agree, OK, yes]","[[agreeing, approve, accord, accept, approved]...","[[agree, agreeing, approve, accord, accept, ap..."
69,Nonfluencies,nonflu,"[er, hm, umm]","[[sos, roe, re, uh, hmm], [hmm, um, oum, huh, ...","[[er, sos, roe, re, uh, hmm, eh, mayday, wat, ..."


In [35]:
df.to_csv('liwc_complete.csv')

In [23]:
from nltk.stem import WordNetLemmatizer
wnl = WordNetLemmatizer()
from nltk.stem.lancaster import LancasterStemmer
st = LancasterStemmer()
import nltk
from nltk.stem import PorterStemmer
sno = nltk.stem.SnowballStemmer('english')
porter = PorterStemmer()

def word_stem(original):
    lancaster = st.stem(original)
    wordnet = wnl.lemmatize(original)
    snow = sno.stem(original)
    port = porter.stem(original)
    options = [lancaster, wordnet, snow, port]
    if original in options:
        options.remove(original)
    res = [] 
    [res.append(x) for x in options if x not in res] 
    return res

wordlist = ["crappy", "worrying", "daughters", "leaves", "swimming", "coming", "strawberries"]

for word in wordlist:
    print(word_stem(word))

['crappy', 'crappi']
['worry', 'worri']
['daught', 'daughter']
['leav', 'leaf']
['swim']
['com', 'come']
['strawberry', 'strawberri']


In [26]:
from nltk.stem import WordNetLemmatizer
wnl = WordNetLemmatizer()
    
def find_category(word):
    category = []
    for ind in df.index:
        if category == []:
            if word in df["Examples"][ind]:
                category.append(ind)
            else:
                stemmed = word_stem(word)
                for item in stemmed:
                    if item in df["Examples"][ind]:
                        category.append(ind)
    if category == []: 
        for ind in df.index:
            if category == []:
                for i in range(len(df["5 Nearest Neighbors"][ind])):
                    if word in df["5 Nearest Neighbors"][ind][i]:
                        category.append(ind) 
                    else:
                        stemmed = word_stem(word)
                        for item in stemmed:
                            if item in df["5 Nearest Neighbors"][ind][i]:
                                category.append(ind)

    if category == []: 
        for ind in df.index:
            if category == []:
                for i in range(len(df["All Neighbors"][ind])):
                    if category == []:
                        if word in df["All Neighbors"][ind][i]:
                            category.append(ind) 
                        else:
                            stemmed = word_stem(word)
                            for item in stemmed:
                                if item in df["All Neighbors"][ind][i]:
                                    category.append(ind)
    if category == []:
        category.append(0)
    return category

In [28]:
def maximum_score(word_score):
    word, index = word_score[0], word_score[1]
    neigh = [item for sublist in df["5 Nearest Neighbors"][index] for item in sublist]
    examples = df["Examples"][index] + neigh
    if word in examples:
        return 1
    
    max_score = 0
    for ex in examples:
        score = compute_similarity(word, ex)
        if score > max_score:
            max_score = score   
    return max_score

In [32]:
def scoring_code(sentence):
    sentence = re.sub(r'[^\w\s]','',sentence.lower())

    word_categories = []
    for word in sentence.split(' '):
        belongsto = [word, find_category(word)[0]]
        word_categories.append(belongsto)

    category_scores = []
    for i in word_categories:
        sco = maximum_score(i)
        if sco is not 99:
            category_scores.append((i[1],sco))
    
    cat_sco = {}
    result = []
    for a, b in category_scores: 
        cat_sco.setdefault(a, []).append(b)  
    for key, value in cat_sco.items(): 
        result.append((df["Category"][key], sum(value)/len(category_scores))) 
    return result

In [33]:
print(scoring_code("Very loud! I didn't think noise would ever be an issue but this is WAY louder"))

[('Total function words', 0.39812606986139715), ('First pers singular', 0.0625), ('Insight', 0.0625), ('Discrepancy', 0.0625), ('Articles', 0.0625), ('Conjunctions', 0.0625), ('Present focus', 0.0625)]


## 2. Scoring 20 Amazon Reviews (10 1-Star and 10 5-Star)

In [36]:
import pandas as pd 
ap1 ='We bought a brand new set of AirPods for 159. After using them for a week, I was listening to them and the right air pod went dead. I figured, maybe I just need to charge them. So when I got home I charged them, and the next day I went to use them again and the right air bud was still dead. So I did some research, tried to reset them, and then I couldnt reconnect them to my phone at all. Once I contacted Apple, they tried to give us the run around. They wanted us to let them fully die, which makes sense, so we did that. Then, when they still didnt work we wanted to exchange the faulty pair for a new set (BECAUSE WE PAID 159 DOLLARS AND ONLY USED THEM FOR A WEEK). Keep in mind, they were taken care of, not dropped, no water damage, they were expensive so they were treated delicately. Apple wanted a 180 deposit to get a new set, which is MORE THAN WHAT I ORIGINALLY PAID FOR. I decided to contact Amazon, and they suggested contacting Apple, but once I explained what was going, on Amazon offered to make things right and send out a replacement. Never had an issue with Amazon customer service, but Apple was extremely disappointing. I wont purchase these again.' 
ap2 = "These AirPods are amazing they automatically play audio as soon as you put them in your ears and pause when you take them out. A simple double-tap during music listening will skip forward. To adjust the volume, change the song, make a call, or even get directions, just say Hey Siri to activate your favorite personal assistant. Plus, when youre on a call or talking to Siri, an additional accelerometer works with dual beamforming microphones to filter out background noise and ensure that your voice is transmitted with clarity and consistency. Additionally, they deliver five hours of listening time on a single charge, and theyre made to keep up with you thanks to a charging case that holds multiple additional charges for more than 24 hours of listening time. Just 15 minutes in the case gives you three hours of listening to time or up to two hours of talk time. I would highly recommend it to anyone looking to buy"
                  
airpod_reviews = {'Product Reviewed': ["Apple AirPods with Charging Case", "Apple AirPods with Charging Case"],
        'Sentence':[ap1, ap2], 
                  'Score':[scoring_code(ap1),scoring_code(ap2)]} 
   
amazon_df = pd.DataFrame(airpod_reviews) 

blender1 = "Very loud! I didn't think noise would ever be an issue but this is WAY louder than any blender I’ve owned in the past. The base is also very heavy and suctions to surfaces, so not ideal if you are constantly getting out/putting away (I personally do not like appliances sitting out on the counter when not in use). The smaller sized cups are so tall with just a tiny blade at the bottom, I can never get it to blend my ingredients - so basically they are useless. Sadly will be throwing this away and getting something else ASAP."
blender2 = "This is my third NB purchase and by far the most impressive product. The first two were 600 and 900 watts, which lasted about 3 years each when the motors burned out. I assume this was because of filling too close to the lines. This model has several improvements, including multiple speeds and functions, an actual blender, plus the 32 and 24 oz cups for smoothies; it is quieter and more high tech. The old style cup had rubber gaskets that were inset and fouled if not very carefully cleaned by removing them. The new cups have eliminated this problem. I expect to enjoy this NB more and for it to last longer than my first two."

blender_reviews = [pd.Series(["NutriBullet", blender1, scoring_code(blender1)], index=amazon_df.columns),
                   pd.Series(["NutriBullet", blender2, scoring_code(blender2)], index=amazon_df.columns)]
amazon_df = amazon_df.append(blender_reviews, ignore_index=True)

bar1 = "Tastes terrible and super sweet"
bar2 = "shipped fast, product was in perfect order and shape. flavor GREAT price GOOD Popular with work out crew lots of flavors the guys loved flavors the guys loved Low sugar high protein"

bar_reviews = [pd.Series(["CLIF BAR - Energy Bars", bar1, scoring_code(bar1)], index=amazon_df.columns),
                   pd.Series(["CLIF BAR - Energy Bars", bar2, scoring_code(bar2)], index=amazon_df.columns)]
amazon_df = amazon_df.append(bar_reviews, ignore_index=True)

tp1 = "Rolls are way too narrow and not worth the cost. Basucally Family Mega...translates into better have child size hands. Never buy again!!"
tp2 = "I've been using this toilet paper for several years now. I order a case each month at the much-cheaper Amazon Subscribe & Save price. This paper is thick, very absorbent, doesnt contain any scents or dyed that cause any irritation for me or my family. One case of 18 huge rolls lasts our family with 2 bathrooms for 1 whole month. I've tried most all other brands and this one lasts the longest of any of them. I pray that Amazon never stops carrying this product."

tp_reviews = [pd.Series(["Charmin Ultra Soft Cushiony Touch Toilet Paper", tp1, scoring_code(tp1)], index=amazon_df.columns),
                   pd.Series(["Charmin Ultra Soft Cushiony Touch Toilet Paper", tp2, scoring_code(tp2)], index=amazon_df.columns)]
amazon_df = amazon_df.append(tp_reviews, ignore_index=True)

shampoo1 = "Never been a fan of pantene but read some good reviews and decided to try this shampoo and conditioner set especially because it said it was sulfate free... At first it made my hair shiny that was after the first wash.. Then After the second wash my hair started to fall out in chunks and It makes me mad cause I was already growing it out I was left with my hair being all thinned out and i lost about 2 inches in length..please do not buy this shampoo and conditioner its crap! There are better options out there and cheaper!"
shampoo2 = "My fav, go-to shampoo and conditioner for years. I occasionally add another brand but always go back to Pantene. Products 4x the price don’t work as well! I’ve used Purology, Tea Tree Paul Mitchell, Aveda, etc, always go back to this!"

shampoo_reviews = [pd.Series(["Pantene Moisturizing Shampoo and Conditioner", shampoo1, scoring_code(shampoo1)], index=amazon_df.columns),
                   pd.Series(["Pantene Moisturizing Shampoo and Conditioner", shampoo2, scoring_code(shampoo2)], index=amazon_df.columns)]
amazon_df = amazon_df.append(shampoo_reviews, ignore_index=True)

paint1 = "Half of the colors are dry. I did not realized it after the return policy expired. Very disappointed."
paint2 = "24 small tubes (12ml, or .4 ounces) of a wide variety of colors. I am just trying to get back into some creative projects, and I thought I would refresh my stash of paint. If I knew I would be getting into bigger projects, I would buy larger containers. When I saw that this was a set of small tubes, I thought it would be perfect for me as I didn't want to any to go to waste. It is enough to start dabbling again without spending a lot of money to buy larger amounts. Comes with 3 basic paint brushes, too. All of the tubes in my package look good. I pressed all of them, and they feel soft. I opened up half dozen, and those were all fine. Hopefully remainder are good. I think this is a nice kit to have if you want to dabble, or as an add on to a themed gift."

paint_reviews = [pd.Series(["Acrylic Paint Set", paint1, scoring_code(paint1)], index=amazon_df.columns),
                   pd.Series(["Acrylic Paint Set", paint2, scoring_code(paint2)], index=amazon_df.columns)]
amazon_df = amazon_df.append(paint_reviews, ignore_index=True)

gatorade1 = "There is nothing classic about this vile wretched drink. I purchased these hoping to recapture the classic refreshing, revitalizing, experience from my athletic Highschool days. I am beside myself on the lack of transparency in the advertisement (no mention of small bottles in the title) and i find it misleading to call this product classic. It is clearly smaller than the standard classic gatorade and the flavor is something I would not wish on my worst enemy. This is nothing less than an atrocity."
gatorade2 = "I depend on this drink to keep me hydrated and my body systems in balance. Since I am diabetic, this is a low-calorie, tasty way to keep me going. And this is my most favorite Gatorade flavor"

gatorade_reviews = [pd.Series(["Gatorade Thirst Quencher", gatorade1, scoring_code(gatorade1)], index=amazon_df.columns),
                   pd.Series(["Gatorade Thirst Quencher", gatorade2, scoring_code(gatorade2)], index=amazon_df.columns)]
amazon_df = amazon_df.append(gatorade_reviews, ignore_index=True)

earmuffs1 = "Maybe I’m just not that sharp but I expected men’s Bluetooth earmuffs would have speakers in them and connect to my phone. They’re just earmuffs. They’re nice and comfortable but I could have gotten just earmuffs for less than half of the price of these."

earmuffs2 = "I bought a pair of these in D,C, in November from a street vendor. For a south Texan, D.C. in November was unbearable; it was cold with incessant wind and little sunshine. After I purchased these from the Pentagon City metro stop vendor, my last week in the Capital City was more than bearable. I spent hours on the Mall going from Smithsonian to Smithsonian. I spent hours in Arlington Cemetery as well as walking around Georgetown and the canal district. They are easy to don, although it takes both hands to put on. Doffing these ear muffs is even easier: use one hand to pull back the 'yoke' at the back of your head, fold them using the inherent twist and collapse feature with one hand, and they easily fit in a winter coat pocket without too much bulk. Combine these behind the head muffs with a tight weave winter cap and touch screen winter gloves, and you'll stay as warm as possible without feeling like Randy from 'A Christmas Story' These have been such a great addition to my Winter wardrobe, even in South Texas. that I stuffed them in stockings for my family, albeit after the holiday season. You won't be disappointed. And, as an added bonus for all your gotta-keep-my-coiffure-perfect friends and family members, these muffs sit on the ears, but the suspension rides on the occipital bone. If the temperatures are pleasant enough, you won't muss your hair. But the temperatures require a stocking cap, ymmv, Highly recommended."

earmuffs_reviews = [pd.Series(["180s Fleece Behind-the-Head Earmuffs", earmuffs1, scoring_code(earmuffs1)], index=amazon_df.columns),
                   pd.Series(["180s Fleece Behind-the-Head Earmuffs", earmuffs2, scoring_code(earmuffs2)], index=amazon_df.columns)]
amazon_df = amazon_df.append(earmuffs_reviews, ignore_index=True)

echo1 = "I bought the new echo plus during the black Friday sale this weekend to make use of the Line In feature which I was disappointed that my second generation echo I purchased last year didnt have. I was so excited to see that this changed this year with the new generation... EXCEPT it doest work!! The alexa software that you need to configure it as a 'line in' and not a 'line out' doesnt function, and according to the tech that assisted me, this has been a common problem with the device. I bought it for this reason, as advertised by amazon, and it doest work. The options I were given by the tech was essentially to wait and hope it gets fixed in the future, return it and lose out on the black friday deal if it is fixed in the future and wanted to buy it again at that time once it is, or wait and waste $109 dollars if it isnt fixed in the future once I pass the return window. All for an issue that is 100% AMAZON'S FAULT!! FALSE ADVERTISING and HORRIBLE CUSTOMER SERVICE like I have never experience before from amazon from a problem THEY inflicted and CAN remedy if they choose too! I asked for an extended return window, if they can honor the black friday price if I was to return it and buy it again in the future, and a refund/discount on the product due to it essentially being defective for my purposes, and each request was flat out REJECTED. When I asked to speak to someone else, that wasNOT PERMITTED either. SHAME ON YOU AMAZON, SHAME!!!"
echo2 = "Short & Sweet - For what it is, this device is amazing. Performance is quick, audio is great, and the built-in Zigbee hub works quite well."

echo_reviews = [pd.Series(["Echo Plus (2nd Gen) with Philips Hue Bulb", echo1, scoring_code(echo1)], index=amazon_df.columns),
                   pd.Series(["Echo Plus (2nd Gen) with Philips Hue Bulb ", echo2, scoring_code(echo2)], index=amazon_df.columns)]
amazon_df = amazon_df.append(echo_reviews, ignore_index=True)

adidas1 = "I used to love classic adidas shoes but these HURT my heel! The back goes up so high it drills into my skin when I walk."
adidas2 = "Great running shoe right out of the box! Highly recommended it......love this shoe. I run 40-50 miles a week."

adidas_reviews = [pd.Series(["adidas Women's Grand Court Sneaker", adidas1, scoring_code(adidas1)], index=amazon_df.columns),
                   pd.Series(["adidas Women's Grand Court Sneaker", adidas2, scoring_code(adidas2)], index=amazon_df.columns)]
amazon_df = amazon_df.append(adidas_reviews, ignore_index=True)

In [37]:
amazon_df

Unnamed: 0,Product Reviewed,Sentence,Score
0,Apple AirPods with Charging Case,We bought a brand new set of AirPods for 159. ...,"[(First pers plural, 0.032325493699025404), (T..."
1,Apple AirPods with Charging Case,These AirPods are amazing they automatically p...,"[(Impersonal pronouns, 0.006211180124223602), ..."
2,NutriBullet,Very loud! I didn't think noise would ever be ...,"[(Total function words, 0.361022922058207), (F..."
3,NutriBullet,This is my third NB purchase and by far the mo...,"[(Total function words, 0.32572424341974016), ..."
4,CLIF BAR - Energy Bars,Tastes terrible and super sweet,"[(Total function words, 0.3084179131240047), (..."
5,CLIF BAR - Energy Bars,"shipped fast, product was in perfect order and...","[(Total function words, 0.36854884863779364), ..."
6,Charmin Ultra Soft Cushiony Touch Toilet Paper,Rolls are way too narrow and not worth the cos...,"[(Total function words, 0.26911576188186204), ..."
7,Charmin Ultra Soft Cushiony Touch Toilet Paper,I've been using this toilet paper for several ...,"[(Total function words, 0.40842370000446765), ..."
8,Pantene Moisturizing Shampoo and Conditioner,Never been a fan of pantene but read some good...,"[(Negations, 0.019801980198019802), (Total fun..."
9,Pantene Moisturizing Shampoo and Conditioner,"My fav, go-to shampoo and conditioner for year...","[(Total function words, 0.3026626173355583), (..."


In [47]:
amazon_df.to_csv('amazon_reviews.csv')

In [41]:
negative_reviews_scores = []
positive_reviews_scores = []

for i in range(0,len(amazon_df),2):
    negative_reviews_scores.append(amazon_df["Score"][i])
    
for i in range(1,len(amazon_df),2):
    positive_reviews_scores.append(amazon_df["Score"][i])

In [46]:
import pandas as pd 

products = []
negative_reviews = []
positive_reviews = []
negative_reviews_score = []
positive_reviews_score = []

for i in range(0,len(amazon_df),2):
    products.append(amazon_df["Product Reviewed"][i])
    
for i in range(0,len(amazon_df),2):
    negative_reviews.append(amazon_df["Sentence"][i])
    negative_reviews_score.append(amazon_df["Score"][i])
    
for i in range(1,len(amazon_df),2):
    positive_reviews.append(amazon_df["Sentence"][i])
    positive_reviews_score.append(amazon_df["Score"][i])

reviews = {'Product Reviewed': products,
        '5 Star Review':positive_reviews,
        '5 Star Review (Score)':positive_reviews_score,
        '1 Star Review':negative_reviews,
        '1 Star Review (Score)':negative_reviews_score} 
   
reviews_df = pd.DataFrame(reviews) 
reviews_df


Unnamed: 0,Product Reviewed,5 Star Review,5 Star Review (Score),1 Star Review,1 Star Review (Score)
0,Apple AirPods with Charging Case,These AirPods are amazing they automatically p...,"[(Impersonal pronouns, 0.006211180124223602), ...",We bought a brand new set of AirPods for 159. ...,"[(First pers plural, 0.032325493699025404), (T..."
1,NutriBullet,This is my third NB purchase and by far the mo...,"[(Total function words, 0.32572424341974016), ...",Very loud! I didn't think noise would ever be ...,"[(Total function words, 0.361022922058207), (F..."
2,CLIF BAR - Energy Bars,"shipped fast, product was in perfect order and...","[(Total function words, 0.36854884863779364), ...",Tastes terrible and super sweet,"[(Total function words, 0.3084179131240047), (..."
3,Charmin Ultra Soft Cushiony Touch Toilet Paper,I've been using this toilet paper for several ...,"[(Total function words, 0.40842370000446765), ...",Rolls are way too narrow and not worth the cos...,"[(Total function words, 0.26911576188186204), ..."
4,Pantene Moisturizing Shampoo and Conditioner,"My fav, go-to shampoo and conditioner for year...","[(Total function words, 0.3026626173355583), (...",Never been a fan of pantene but read some good...,"[(Negations, 0.019801980198019802), (Total fun..."
5,Acrylic Paint Set,"24 small tubes (12ml, or .4 ounces) of a wide ...","[(Total function words, 0.36942761724950995), ...",Half of the colors are dry. I did not realized...,"[(Total function words, 0.35958968527664015), ..."
6,Gatorade Thirst Quencher,I depend on this drink to keep me hydrated and...,"[(First pers singular, 0.10810810810810811), (...",There is nothing classic about this vile wretc...,"[(Total function words, 0.36890239674963243), ..."
7,180s Fleece Behind-the-Head Earmuffs,"I bought a pair of these in D,C, in November f...","[(First pers singular, 0.019762845849802372), ...",Maybe I’m just not that sharp but I expected m...,"[(Tentative, 0.022222222222222223), (Total fun..."
8,Echo Plus (2nd Gen) with Philips Hue Bulb,"Short & Sweet - For what it is, this device is...","[(Total function words, 0.28017566563357466), ...",I bought the new echo plus during the black Fr...,"[(First pers singular, 0.043478260869565216), ..."
9,adidas Women's Grand Court Sneaker,Great running shoe right out of the box! Highl...,"[(Total function words, 0.4150659539157915), (...",I used to love classic adidas shoes but these ...,"[(First pers singular, 0.07692307692307693), (..."


In [48]:
reviews_df.to_csv('amazon_reviews_scored.csv')

## 3. Data Visualization (Prepping the Data)

Making graphs on an external tool, Datawrapper (https://www.datawrapper.de/)

In [49]:
liwccategories = []
for ind in df.index:
    liwccategories.append(df["Category"][ind])
print(liwccategories, len(liwccategories))

['Total function words', 'Total pronouns', 'Personal pronouns', 'First pers singular', 'First pers plural', 'Second person', 'Third pers singular', 'Third pers plural', 'Impersonal pronouns', 'Articles', 'Prepositions', 'Auxiliary verbs', 'Common Adverbs', 'Conjunctions', 'Negations', 'Common verbs', 'Common adjectives', 'Comparisons', 'Interrogatives', 'Numbers', 'Quantifiers', 'Affective processes', 'Positive emotion', 'Negative emotion', 'Anxiety', 'Anger', 'Sadness', 'Social processes', 'Family', 'Friends', 'Female references', 'Male references', 'Cognitive processes', 'Insight', 'Causation', 'Discrepancy', 'Tentative', 'Certainty', 'Differentiation', 'Perceptual processes', 'See', 'Hear', 'Feel', 'Biological processes', 'Body', 'Health', 'Sexual', 'Ingestion', 'Affiliation', 'Achievement', 'Power', 'Reward', 'Risk', 'Past focus', 'Present focus', 'Future focus', 'Relativity ', 'Motion', 'Space', 'Time', 'Work', 'Leisure', 'Home', 'Money', 'Religion', 'Death', 'Swear words', 'Netsp

In [53]:
positive_reviews=[[('Impersonal pronouns', 0.006211180124223602), ('Total function words', 0.3710216546676471), ('Present focus', 0.012422360248447204), ('Third pers plural', 0.018633540372670808), ('Prepositions', 0.0610930502120442), ('Future focus', 0.006211180124223602), ('Second person', 0.048576977060678386), ('Total pronouns', 0.012422360248447204), ('Space', 0.012422360248447204), ('Conjunctions', 0.024844720496894408), ('Interrogatives', 0.018633540372670808), ('Reward', 0.006211180124223602), ('Hear', 0.024844720496894408), ('Auxiliary verbs', 0.006211180124223602), ('Time', 0.006211180124223602), ('Social processes', 0.011201012732048913), ('Articles', 0.006211180124223602), ('Quantifiers', 0.006211180124223602), ('Causation', 0.006211180124223602), ('Comparisons', 0.006211180124223602), ('Numbers', 0.006211180124223602), ('First pers singular', 0.006211180124223602), ('Discrepancy', 0.006211180124223602), ('Perceptual processes', 0.00463273943809684)],
          [('Total function words', 0.32572424341974016), ('Present focus', 0.017094017094017096), ('Conjunctions', 0.06837606837606838), ('Prepositions', 0.07692307692307693), ('Quantifiers', 0.02564102564102564), ('Numbers', 0.017094017094017096), ('Articles', 0.017094017094017096), ('Interrogatives', 0.008547008547008548), ('Motion', 0.010243483491488658), ('First pers singular', 0.017094017094017096), ('Causation', 0.02564102564102564), ('Auxiliary verbs', 0.02564102564102564), ('Affiliation', 0.006891297399653198), ('Comparisons', 0.017094017094017096), ('Negations', 0.008547008547008548), ('Total pronouns', 0.008547008547008548), ('Common adjectives', 0.008547008547008548), ('Third pers plural', 0.008547008547008548)]
          ,[('Total function words', 0.40842370000446765), ('First pers plural', 0.016221099412348516), ('Causation', 0.03488372093023256), ('Quantifiers', 0.011627906976744186), ('Present focus', 0.023255813953488372), ('First pers singular', 0.03488372093023256), ('Prepositions', 0.03488372093023256), ('Cognitive processes', 0.011627906976744186), ('Conjunctions', 0.011627906976744186), ('Total pronouns', 0.011627906976744186), ('Negations', 0.011627906976744186), ('Common verbs', 0.011627906976744186)]
          ,[('Total function words', 0.3026626173355583), ('Conjunctions', 0.04878048780487805), ('Causation', 0.024390243902439025), ('First pers singular', 0.024390243902439025), ('Prepositions', 0.0658388125898768), ('Certainty', 0.04878048780487805), ('Motion', 0.04878048780487805), ('Comparisons', 0.024390243902439025), ('First pers plural', 0.010823015738245125)]
          ,[('Total function words', 0.36942761724950995), ('Quantifiers', 0.012738853503184714), ('Prepositions', 0.04712600402783374), ('First pers singular', 0.08280254777070063), ('Auxiliary verbs', 0.012738853503184714), ('Space', 0.01910828025477707), ('Conjunctions', 0.03184713375796178), ('Insight', 0.01910828025477707), ('Discrepancy', 0.025477707006369428), ('Comparisons', 0.01910828025477707), ('Interrogatives', 0.006369426751592357), ('See', 0.006369426751592357), ('Causation', 0.006369426751592357), ('Motion', 0.006369426751592357), ('Present focus', 0.01910828025477707), ('Social processes', 0.006369426751592357), ('Common verbs', 0.004112816280312351), ('Perceptual processes', 0.006369426751592357), ('Positive emotion', 0.01910828025477707), ('Total pronouns', 0.008329725576610015), ('Third pers plural', 0.006369426751592357), ('Impersonal pronouns', 0.006369426751592357), ('Second person', 0.006369426751592357), ('Articles', 0.006369426751592357)]
          ,[('First pers singular', 0.10810810810810811), ('Total function words', 0.34950421950462135), ('Conjunctions', 0.05405405405405406), ('Space', 0.02702702702702703), ('Past focus', 0.02702702702702703), ('Auxiliary verbs', 0.02702702702702703), ('Present focus', 0.05405405405405406), ('Prepositions', 0.02702702702702703), ('Motion', 0.02702702702702703)]
          ,[('First pers singular', 0.019762845849802372), ('Total function words', 0.283445884014915), ('Prepositions', 0.10077848450958396), ('Impersonal pronouns', 0.02766798418972332), ('Space', 0.03162055335968379), ('Causation', 0.011857707509881422), ('Conjunctions', 0.05138339920948617), ('Quantifiers', 0.007905138339920948), ('Comparisons', 0.015810276679841896), ('Third pers plural', 0.011857707509881422), ('Motion', 0.003952569169960474), ('Death', 0.003952569169960474), ('Present focus', 0.011857707509881422), ('Reward', 0.00276377955003522), ('Third pers singular', 0.003952569169960474), ('Body', 0.011857707509881422), ('Time', 0.011857707509881422), ('First pers plural', 0.0034063949830501644), ('Second person', 0.019762845849802372), ('Total pronouns', 0.007905138339920948), ('Feel', 0.003952569169960474), ('Auxiliary verbs', 0.003952569169960474), ('Articles', 0.003952569169960474), ('Assent', 0.0017917894778856226), ('Affiliation', 0.003952569169960474), ('Positive emotion', 0.003952569169960474)]
          ,[('Total function words', 0.28017566563357466), ('Positive emotion', 0.038461538461538464), ('Causation', 0.038461538461538464), ('Interrogatives', 0.038461538461538464), ('Present focus', 0.15384615384615385), ('Conjunctions', 0.038461538461538464), ('Prepositions', 0.038461538461538464), ('Comparisons', 0.038461538461538464)]
          ,[('Total function words', 0.4150659539157915), ('Prepositions', 0.10526315789473684), ('First pers singular', 0.05263157894736842), ('Numbers', 0.05263157894736842)],
                 [('Total function words', 0.4150659539157915), ('Prepositions', 0.10526315789473684), ('First pers singular', 0.05263157894736842), ('Numbers', 0.05263157894736842)]]

negative_reviews= [[('First pers plural', 0.032325493699025404), ('Total function words', 0.31475411575015133), ('Prepositions', 0.054465100510835814), ('Causation', 0.02654867256637168), ('Comparisons', 0.017699115044247787), ('Total pronouns', 0.03982300884955752), ('First pers singular', 0.05752212389380531), ('Hear', 0.004424778761061947), ('Conjunctions', 0.05752212389380531), ('Motion', 0.015396594352023887), ('Time', 0.008849557522123894), ('Tentative', 0.004424778761061947), ('Interrogatives', 0.017699115044247787), ('Past focus', 0.01327433628318584), ('Differentiation', 0.004424778761061947), ('Ingestion', 0.017699115044247787), ('Third pers plural', 0.035398230088495575), ('Articles', 0.01327433628318584), ('Space', 0.004424778761061947), ('Negations', 0.008849557522123894), ('Present focus', 0.004424778761061947), ('Auxiliary verbs', 0.004424778761061947), ('Impersonal pronouns', 0.004424778761061947)],
                   [('Total function words', 0.361022922058207), ('First pers singular', 0.03), ('Insight', 0.01), ('Discrepancy', 0.01), ('Articles', 0.01), ('Conjunctions', 0.07), ('Present focus', 0.05), ('Third pers plural', 0.02), ('Space', 0.02), ('Prepositions', 0.06), ('Negations', 0.04), ('Second person', 0.01), ('Interrogatives', 0.01), ('First pers plural', 0.004668033812497192), ('Sadness', 0.0071390479439136225), ('Auxiliary verbs', 0.01)]
                   ,[('Total function words', 0.3084179131240047), ('Conjunctions', 0.2), ('Positive emotion', 0.2)]
                   ,[('Total function words', 0.26911576188186204), ('Present focus', 0.045454545454545456), ('Conjunctions', 0.045454545454545456), ('Negations', 0.09090909090909091), ('Prepositions', 0.045454545454545456), ('Space', 0.045454545454545456), ('Achievement', 0.045454545454545456), ('Auxiliary verbs', 0.045454545454545456), ('Body', 0.045454545454545456)]
                   ,[('Negations', 0.019801980198019802), ('Total function words', 0.3987497667644932), ('Prepositions', 0.0297029702970297), ('Conjunctions', 0.06930693069306931), ('Positive emotion', 0.009900990099009901), ('Causation', 0.009900990099009901), ('Common adjectives', 0.009900990099009901), ('Comparisons', 0.0297029702970297), ('Numbers', 0.009900990099009901), ('Space', 0.02441857336725205), ('First pers singular', 0.039603960396039604), ('Cognitive processes', 0.009900990099009901), ('Present focus', 0.019801980198019802), ('Motion', 0.009900990099009901), ('Swear words', 0.009900990099009901), ('Achievement', 0.009900990099009901)]
                   ,[('Total function words', 0.35958968527664015), ('Prepositions', 0.1111111111111111), ('Present focus', 0.05555555555555555), ('First pers singular', 0.05555555555555555), ('Past focus', 0.05555555555555555), ('Negations', 0.05555555555555555), ('Comparisons', 0.05555555555555555), ('Time', 0.05555555555555555)]
                   ,[('Total function words', 0.36890239674963243), ('Present focus', 0.047619047619047616), ('First pers singular', 0.047619047619047616), ('Impersonal pronouns', 0.011904761904761904), ('Prepositions', 0.07142857142857142), ('Time', 0.011904761904761904), ('Auxiliary verbs', 0.011904761904761904), ('Total pronouns', 0.011904761904761904), ('Space', 0.023809523809523808), ('Past focus', 0.011904761904761904), ('Quantifiers', 0.011904761904761904), ('Conjunctions', 0.023809523809523808), ('Third pers plural', 0.023809523809523808), ('Discrepancy', 0.011904761904761904), ('Negations', 0.011904761904761904), ('Articles', 0.011904761904761904)]
                   ,[('Tentative', 0.022222222222222223), ('Total function words', 0.3070063152760113), ('Negations', 0.022222222222222223), ('Conjunctions', 0.08888888888888889), ('First pers singular', 0.06666666666666667), ('Discrepancy', 0.044444444444444446), ('Auxiliary verbs', 0.044444444444444446), ('Space', 0.022222222222222223), ('Total pronouns', 0.022222222222222223), ('Positive emotion', 0.022222222222222223), ('Causation', 0.022222222222222223), ('Third pers plural', 0.022222222222222223), ('Prepositions', 0.022222222222222223), ('Impersonal pronouns', 0.022222222222222223)]
                   ,[('First pers singular', 0.043478260869565216), ('Total function words', 0.3925471264735436), ('Prepositions', 0.0920245747549634), ('Interrogatives', 0.007246376811594203), ('First pers plural', 0.0016913165987308668), ('Space', 0.021739130434782608), ('Articles', 0.010869565217391304), ('Numbers', 0.0036231884057971015), ('Auxiliary verbs', 0.010869565217391304), ('Conjunctions', 0.06521739130434782), ('Second person', 0.007246376811594203), ('Negations', 0.007246376811594203), ('Assent', 0.001453907182975311), ('Causation', 0.014492753623188406), ('Cognitive processes', 0.0036231884057971015), ('Present focus', 0.010869565217391304), ('Past focus', 0.0036231884057971015), ('Third pers plural', 0.010869565217391304), ('Impersonal pronouns', 0.0036231884057971015)]
                   ,[('First pers singular', 0.07692307692307693), ('First pers plural', 0.017067063279540388), ('Total function words', 0.2760531864455037), ('Positive emotion', 0.038461538461538464), ('Conjunctions', 0.07692307692307693), ('Impersonal pronouns', 0.038461538461538464), ('Negative emotion', 0.038461538461538464), ('Prepositions', 0.038461538461538464), ('Motion', 0.027951054765315533), ('Space', 0.038461538461538464), ('Interrogatives', 0.038461538461538464)]]

In [70]:
positive_data_df = pd.DataFrame(columns = products, index = liwccategories)

In [75]:
scores = []

def fill_chart(df_name, review_type, product_index, product_name):
    def review_vis(review_type, index): 
        categories = []
        scores = []
        for rev in review_type[index]:
            category, score = rev[0], rev[1]
            categories.append(category)
            scores.append(score)
        return categories, scores

    categories, scores = review_vis(review_type, product_index)

    for ind in categories:
        index = categories.index(ind)
        df_name[product_name][ind] = scores[index]
        
for prod in products:
    fill_chart(positive_data_df, positive_reviews, products.index(prod), prod)

In [76]:
positive_data_df.to_csv("positive_reviews_breakdown.csv")
positive_data_df

Unnamed: 0,Apple AirPods with Charging Case,NutriBullet,CLIF BAR - Energy Bars,Charmin Ultra Soft Cushiony Touch Toilet Paper,Pantene Moisturizing Shampoo and Conditioner,Acrylic Paint Set,Gatorade Thirst Quencher,180s Fleece Behind-the-Head Earmuffs,Echo Plus (2nd Gen) with Philips Hue Bulb,adidas Women's Grand Court Sneaker
Total function words,0.371022,0.325724,0.408424,0.302663,0.369428,0.349504,0.283446,0.280176,0.415066,0.415066
Total pronouns,0.0124224,0.00854701,0.0116279,,0.00832973,,0.00790514,,,
Personal pronouns,,,,,,,,,,
First pers singular,0.00621118,0.017094,0.0348837,0.0243902,0.0828025,0.108108,0.0197628,,0.0526316,0.0526316
First pers plural,,,0.0162211,0.010823,,,0.00340639,,,
...,...,...,...,...,...,...,...,...,...,...
Swear words,,,,,,,,,,
Netspeak,,,,,,,,,,
Assent,,,,,,,0.00179179,,,
Nonfluencies,,,,,,,,,,


In [78]:
negative_data_df = pd.DataFrame(columns = products, index = liwccategories)
for prod in products:
    fill_chart(negative_data_df, negative_reviews, products.index(prod), prod)

In [80]:
negative_data_df.to_csv("negative_reviews_breakdown.csv")
negative_data_df

Unnamed: 0,Apple AirPods with Charging Case,NutriBullet,CLIF BAR - Energy Bars,Charmin Ultra Soft Cushiony Touch Toilet Paper,Pantene Moisturizing Shampoo and Conditioner,Acrylic Paint Set,Gatorade Thirst Quencher,180s Fleece Behind-the-Head Earmuffs,Echo Plus (2nd Gen) with Philips Hue Bulb,adidas Women's Grand Court Sneaker
Total function words,0.314754,0.361023,0.308418,0.269116,0.39875,0.35959,0.368902,0.307006,0.392547,0.276053
Total pronouns,0.039823,,,,,,0.0119048,0.0222222,,
Personal pronouns,,,,,,,,,,
First pers singular,0.0575221,0.03,,,0.039604,0.0555556,0.047619,0.0666667,0.0434783,0.0769231
First pers plural,0.0323255,0.00466803,,,,,,,0.00169132,0.0170671
...,...,...,...,...,...,...,...,...,...,...
Swear words,,,,,0.00990099,,,,,
Netspeak,,,,,,,,,,
Assent,,,,,,,,,0.00145391,
Nonfluencies,,,,,,,,,,
