In [2]:
#System Operations
import os 
import sys
from datetime import datetime, date
from collections import Counter 
import operator

#Data Structure Operations
import pandas as pd 
import numpy as np 
import re
import glob
import json

#NLP
from nltk.tokenize import word_tokenize, sent_tokenize

#HuggingFace ML Models for Sentiment Analysis
from transformers import pipeline


### Read Datasets

In [4]:
### Alexa Subreddit Posts (Submissions: Title, Body, Comments) ###

alexa_1 = pd.read_csv('./data/alexa_agg_clean_token_df_19-May-2023_23-04-57.csv', sep='\t', dtype='unicode', encoding='utf-8', index_col='index')
alexa_2 = pd.read_csv('./data/alexa_agg_clean_token_df_20-May-2023_01-33-37.csv', sep='\t', dtype='unicode', encoding='utf-8', index_col='index')


In [72]:
### Examine the Data ###

alexa_1.head(3)

Unnamed: 0_level_0,created_unix_utc,created_datetime_pst,title,author,score,id,subreddit,url,body,num_comments,comments,commenter_list,commenter_count,comment_list,cleaned_title,cleaned_body,cleaned_comments
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,1682903186.0,30-Apr-2023 18:06:26,Spotify on Alexa,CelticNomadd,1,1348j93,alexa,https://www.reddit.com/r/alexa/comments/1348j9...,Amazon should add an option to just view the s...,0,[],[],0,['NaN'],spotify on alexa,amazon should add option to just view song pla...,
1,1682901623.0,30-Apr-2023 17:40:23,Can Echo Show automatically connect to iPhone ...,dsupreme99,1,1347z0d,alexa,https://www.reddit.com/r/alexa/comments/1347z0...,"Thinking of getting Alexa device, can the echo...",0,[],[],0,['NaN'],echoshow automatically connect to iphone hotsp...,think of get alexa device echoshow automatical...,
2,1682898187.0,30-Apr-2023 16:43:07,I want to bring my smart bulb on holiday. Do I...,Minimum-Lemon-402,3,1346p2a,alexa,https://www.reddit.com/r/alexa/comments/1346p2...,Heading to visit the inlaws in another country...,3,['CYPH3R_22: They have to be on the same netwo...,"['CYPH3R_22', 'freebubbleup', 'CYPH3R_22']",3,"[""'CYPH3R_22: They have to be on the same netw...",want to bring smartlight on holiday need to br...,head to visit inlaw in another country go to b...,to on same network ask alexa what voltage in c...


### Text Pre-Processing Task

In [3]:
def remove_usernames(text): 
    '''
    Required python pkgs: 
    - re (import re)
    
    Function: Remove usernames from aggregated df.comments column.
    Intended to be used on {subreddit-scraper.py} and {subreddit-search-scraper.py} generated dataframes.
    
    Input Arguments: 
    text: input type is list of (str) items.
    
    '''
    text = [re.sub(r'\b([A-Za-z0-9\_\-]+: )', '', sentence) for sentence in text]
    return text 


def remove_hyperlinks(text):
    '''
    Required python pkgs: 
    - re (import re)
    
    Function: Remove hyperlinks from text.
    
    Input Arguments: 
    text: input type is list of (str) items.
        
    '''
    #remove hyperlinks.
    text = [re.sub(r'(\(?http\S+\)?)', '', str(sentence)) for sentence in text]
    text = [re.sub(r'(!\[.*\]\(.*\))', '', sentence) for sentence in text]
    text = [re.sub(r'(\(\/.*\))', '', sentence) for sentence in text]
    return text


def expand_contractions(text):
    '''
    Required python pkgs: 
    - re (import re)
    
    Function: Expand contractions in text.
    
    Input Arguments: 
    text: input type is list of (str) items.
        
    '''
    # specific patterns
    text = [re.sub(r"\b(i\.e\.\,?)\s?", "", sentence) for sentence in text]
    text = [re.sub(r"\btheyre\b", "they are", sentence) for sentence in text]
    text = [re.sub(r"\b(e\.g\.\,?)\s?", "", sentence) for sentence in text]
    text = [re.sub(r" \'em ", " them ", sentence) for sentence in text]
    text = [re.sub(r"(won('|\’)t|wont)", "will not", sentence) for sentence in text]
    text = [re.sub(r"((C|c)an('|\’)t|(C|c)ant)\b", "cannot", sentence) for sentence in text]
    text = [re.sub(r"\bdidnt\b", "did not", sentence) for sentence in text]
    text = [re.sub(r"\bhavnt\b", "have not", sentence) for sentence in text]
    text = [re.sub(r"\bhasnt\b", "has not", sentence) for sentence in text]
    text = [re.sub(r"\bdoesnt\b", "does not", sentence) for sentence in text]
    text = [re.sub(r"\bisnt\b", "is not", sentence) for sentence in text]
    text = [re.sub(r"\bive\b", "i have", sentence) for sentence in text]
    text = [re.sub(r"\bdont\b", "do not", sentence) for sentence in text]
    text = [re.sub(r"\bu\b", "you", sentence) for sentence in text]
    text = [re.sub(r"\bhavign\b", "having", sentence) for sentence in text]
    text = [re.sub(r"\bhavethe\b", "have the", sentence) for sentence in text]
    text = [re.sub(r"\bwiuld\b", "would", sentence) for sentence in text]
    text = [re.sub(r"\blet\'s", "let us", sentence) for sentence in text]
    text = [re.sub(r"\b(T|t)here\'s\b", "there is", sentence) for sentence in text]
    text = [re.sub(r"\b(H|h)here\'s\b", "here is", sentence) for sentence in text]
    # general patterns
    text = [re.sub(r"(?<=[a-z])n(\\\'|\'|\\\’|\’)t", " not", sentence) for sentence in text]
    text = [re.sub(r"(\\\'|\'|\’)re\b", " are", sentence) for sentence in text]
    #text = [re.sub(r"(\\'|\'|\’)s\b", "", sentence) for sentence in text]
    text = [re.sub(r"(?<=[a-z]|I)(\\\'|\'|\’)d\b", " would", sentence) for sentence in text]
    text = [re.sub(r"(?<=[a-z]|I)(\\\'|\'|\’)ll", " will", sentence) for sentence in text]
    text = [re.sub(r"(?<=[a-z]|I)(\'|\’)ve\b", " have", sentence) for sentence in text]
    text = [re.sub(r"\b(I|i)(\\\'m|\'m|\’m|m)\b", "i am", sentence) for sentence in text]
    text = [re.sub(r"\b((I|i)(\'|\’)ll|(I|i)ll)\b", "i will", sentence) for sentence in text]
    #text = [re.sub(r"\+", "plus", sentence) for sentence in text]
    return text


def remove_amazon_numbers(text): 
    '''
    Required python pkgs: 
    - re (import re)
    
    Function: Remove numbers from text data.
    
    Input Arguments: 
    text: input type is list of (str) items.
    
    '''
    text = [re.sub(r"((1st|1|first)( (G|g)eneration| (G|g)en))|(( G|g)eneration|( G|g)en) (1st|1|(O|o)ne)", "firstgen", sentence) for sentence in text] 
    text = [re.sub(r"((2nd|2|second)( (G|g)eneration| (G|g)en))|(( G|g)eneration|( G|g)en) (2nd|2|(T|t)wo)", "secondgen", sentence) for sentence in text] 
    text = [re.sub(r"((3rd|3|third)( (G|g)eneration| (G|g)en))|(( G|g)eneration|( G|g)en) (3rd|3|(T|t)hree)", "thirdgen", sentence) for sentence in text] 
    text = [re.sub(r"((4th|4|fourth)( (G|g)eneration| (G|g)en))|(( G|g)eneration|( G|g)en) (4th|4|(F|f)our)", "fourthgen", sentence) for sentence in text] 
    text = [re.sub(r"((5th|5|fifth)( (G|g)eneration| (G|g)en))|(( G|g)eneration|( G|g)en) (5th|5|(F|f)ive)", "fifthgen", sentence) for sentence in text] 
    text = [re.sub(r"((6th|6|sixth)( (G|g)eneration| (G|g)en))|(( G|g)eneration|( G|g)en) (6th|6|(S|s)ix)", "sixthgen", sentence) for sentence in text] 
    text = [re.sub(r"((7th|7)( (G|g)eneration| (G|g)en))", "seventhgen", sentence) for sentence in text] 
    text = [re.sub(r"((8th|8)( (G|g)eneration| (G|g)en))", "eightgen", sentence) for sentence in text] 
    text = [re.sub(r"((9th|9)( (G|g)eneration| (G|g)en))", "ninthgen", sentence) for sentence in text] 
    text = [re.sub(r"((10th|10)( (G|g)eneration| (G|g)en))", "tenthgen", sentence) for sentence in text] 
    text = [re.sub(r"\b1st(-(P|p)arty| (P|p)arty)\b", "firstparty", sentence) for sentence in text]
    text = [re.sub(r"\b3rd(-(P|p)arty| (P|p)arty)\b", "thirdparty", sentence) for sentence in text]
    text = [re.sub(r"(1x)('?s)?", "one_time", sentence) for sentence in text] 
    text = [re.sub(r"(2x)('?s)?", "two_times", sentence) for sentence in text] 
    text = [re.sub(r"(3x)('?s)?", "three_times", sentence) for sentence in text] 
    text = [re.sub(r"(4x)('?s)?", "four_times", sentence) for sentence in text] 
    text = [re.sub(r"(5x)('?s)?", "five_times", sentence) for sentence in text] 
    text = [re.sub(r"\b1st\b", "first", sentence) for sentence in text] 
    text = [re.sub(r"\b2nd\b", "second", sentence) for sentence in text] 
    text = [re.sub(r"\b3rd\b", "third", sentence) for sentence in text]
    text = [re.sub(r'\b([0-9](th|st|rd|nd))\b', '', sentence) for sentence in text]
    text = [re.sub(r'\b([0-9]\-)([0-9]x)\b', '', sentence) for sentence in text]
    text = [re.sub(r'\b([0-9]\.)([0-9]x[0-9]+\"?)?', '', sentence) for sentence in text]
    text = [re.sub(r'\s([0-9]+.?[0-9]+)([a-z]+)\b', '', sentence) for sentence in text]
    text = [re.sub(r'\b([A-Za-z]+?[0-9]+[A-Za-z]+[0-9]+[A-Za-z]+)\b', '', sentence) for sentence in text]
    text = [re.sub(r'\b([A-Za-z]+)?([0-9]+)([A-Za-z]+)?\b', '', sentence) for sentence in text] #n32w
    #text = [re.sub(r'\b([A-Za-z]+[0-9]+)([A-Za-z]+)? \b', '', sentence) for sentence in text] #new
    text = [re.sub(r'([A-Za-z]+[0-9]+)\b', '', sentence) for sentence in text]
    text = [re.sub(r'([A-Z]+[0-9])\w+', '', sentence) for sentence in text]
    text = [re.sub(r'([$]?[0-9]+[%a-z]+?\.? )\b', '', sentence) for sentence in text]
    text = [re.sub(r'\s([0-9]+[\.,:\-\_]?)([0-9]+)?([a-z]+)?', '', sentence) for sentence in text]
    text = [re.sub(r'(?<=[A-Za-z]|\_|\-)([0-9]+[\.,:\-\_]?)([0-9]+)?([a-z]+)?( )?\b', '', sentence) for sentence in text]
    #text = [re.sub(r'\s[a-z]\b', '', sentence) for sentence in text]
    return text


def amazon_ner_reddit(text): 
    '''        
    Required python pkgs: 
    - re (import re)
    
    Function: Regularize select items from text data (Amazon-specific).

    Input Arguments: 
    text: input type is list of (str) items.
       
    '''
    ## General (Abbreviations, Disfluencies)
    text = [re.sub(r"\b(R|r) and (D|d)\b", "research and development", sentence) for sentence in text]
    text = [re.sub(r"e v e r y t i m e", "everytime", sentence) for sentence in text]
    text = [re.sub(r"\b(USA|usa|U\.S\.|US)\b", " united_states ", sentence) for sentence in text]
    text = [re.sub(r"\b(U\.K\.|UK)\b", " uk ", sentence) for sentence in text]
    text = [re.sub(r" (r\/|\/r\/)", " subreddit ", sentence) for sentence in text]
    text = [re.sub(r"\b(O|o)(K|k)\b", "okay", sentence) for sentence in text]
    text = [re.sub(r"\bhelp u\b", "help you", sentence) for sentence in text]
    text = [re.sub(r"\byr\b", "year", sentence) for sentence in text]
    text = [re.sub(r"\be-mail\b", "email", sentence) for sentence in text]
    text = [re.sub(r"\b(pre-made)\b", "premade", sentence) for sentence in text]
    text = [re.sub(r"\b(re-used)\b", "reused", sentence) for sentence in text]
    text = [re.sub(r"\bsoooo\b", "so", sentence) for sentence in text]
    text = [re.sub(r"\bslooowwww\b", "slow", sentence) for sentence in text]
    text = [re.sub(r"\b(thx|thnx|tysm|ty|thank you|thank u|thank)\b", "thanks", sentence) for sentence in text]
    text = [re.sub(r"\bgooood\b", "good", sentence) for sentence in text]
    text = [re.sub(r"\bbyyyyye\b", "bye", sentence) for sentence in text]
    text = [re.sub(r"\b(A|a)$(A|a)(P|p) (R|r)ocky\b", "asap rocky", sentence) for sentence in text]
    text = [re.sub(r"\bshut the f up\b", "shut the fuck up", sentence) for sentence in text]
    text = [re.sub(r"\bspammed\b", "spam", sentence) for sentence in text]
    text = [re.sub(r"\b(B|b)ricked\b", "brick", sentence) for sentence in text]
    ## Sounds 
    text = [re.sub(r"\bwhite noise\b", "whitenoise", sentence) for sentence in text]
    text = [re.sub(r"\bbrown noise\b", "brownnoise", sentence) for sentence in text]
    text = [re.sub(r"\bpink noise\b", "pinknoise", sentence) for sentence in text]
    text = [re.sub(r"\bsleep sounds?\b", "sleepsounds", sentence) for sentence in text]
    text = [re.sub(r"\bthunderstorm sounds?\b", "thunderstormsounds", sentence) for sentence in text]
    text = [re.sub(r"\brain sounds?\b", "rainsounds", sentence) for sentence in text]
    text = [re.sub(r"\b(ambient (noise|sounds?))\b", "ambientsounds", sentence) for sentence in text]
    text = [re.sub(r"\b(S|s)leep (J|j)ar(s|(\'|\’)s)?\b", "sleepjar", sentence) for sentence in text]
    ## Settings/Prompts
    text = [re.sub(r"\b(pre-recorded)\b", "prerecorded", sentence) for sentence in text]
    text = [re.sub(r"\b(home-screen|home screen)s?\b", "homescreen", sentence) for sentence in text]
    text = [re.sub(r"\b(home-screen|home screen)s?\b", "homescreen", sentence) for sentence in text]
    text = [re.sub(r"\btemp\b", "temperature", sentence) for sentence in text]
    text = [re.sub(r"\bre\-added\b", "readded", sentence) for sentence in text]
    text = [re.sub(r"\b(re\-pair)", "repair", sentence) for sentence in text]
    text = [re.sub(r"\b(re(\-| )purposing)\b", "repurpose", sentence) for sentence in text]
    text = [re.sub(r"\b(re\-enabl)", "reenabl", sentence) for sentence in text]
    text = [re.sub(r"\b(re\-creat)", "recreat", sentence) for sentence in text]
    text = [re.sub(r"\b(re\-register)", "reregister", sentence) for sentence in text]
    text = [re.sub(r"\b(re\-sync)", "resync", sentence) for sentence in text]
    text = [re.sub(r"\b(re\-link)", "relink", sentence) for sentence in text]
    text = [re.sub(r"\b(re\-setting)", "resetting", sentence) for sentence in text]
    text = [re.sub(r"\b(re\-install)", "reinstall", sentence) for sentence in text]
    text = [re.sub(r"\b(re\-ask)", "reask", sentence) for sentence in text]
    text = [re.sub(r"\bvol\b", "volume", sentence) for sentence in text]
    text = [re.sub(r"\binstalling\b", "install", sentence) for sentence in text]
    ## Locations 
    text = [re.sub(r"\bdining room\b", "diningroom", sentence) for sentence in text]
    text = [re.sub(r"\b(multi-room|multi room)\b", "multiroom", sentence) for sentence in text]
    text = [re.sub(r"\bliving room\b", "livingroom", sentence) for sentence in text]
    text = [re.sub(r"\bbed room\b", "bedroom", sentence) for sentence in text]
    ## Radio Stations  
    text = [re.sub(r"\b(SXM|sxm)\b", "siriusxm", sentence) for sentence in text]
    text = [re.sub(r"\b(R|r)adio 1\b", "radioone", sentence) for sentence in text]
    text = [re.sub(r"\b(R|r)adio 2\b", "radiotwo", sentence) for sentence in text]
    text = [re.sub(r"\b(R|r)adio 3\b", "radiothree", sentence) for sentence in text]
    text = [re.sub(r"\b(R|r)adio 4\b", "radiofour", sentence) for sentence in text]
    text = [re.sub(r"\b(H|h)eart (FM|fm)\b", "heartfm", sentence) for sentence in text]
    text = [re.sub(r"\bi(H|h)eart( (R|r)adio)?\b", "iheartradio ", sentence) for sentence in text]
    text = [re.sub(r"\b(i |i\-|i)?(H|h)eart( FM| fm|FM|fm)\b", "iheartradio", sentence) for sentence in text]
    text = [re.sub(r"\b(BBC|Bbc|bbc) ((N|n)ews)\b", "bbcnews", sentence) for sentence in text]
    text = [re.sub(r"\b(C|c)(\-| )(S|s)pan\b", "cspan", sentence) for sentence in text]
    ## Apps
    text = [re.sub(r"\b(P|p)aramount(\+| \+)\b", "paramountplus", sentence) for sentence in text]
    text = [re.sub(r"\b(H|h)(B|b)(O|o) (M|m)ax\b", "hbomax", sentence) for sentence in text]
    text = [re.sub(r"\b(D|d)isney(\+| \+| (P|p)lus)\b", "disneyplus", sentence) for sentence in text]
    text = [re.sub(r"\b(F|f)lash (B|b)riefing(s)?\b", "flashbrief", sentence) for sentence in text]
    text = [re.sub(r"\b(F|f)lash (N|n)ews (B|b)riefing(s)?\b", "flashbrief", sentence) for sentence in text]
    ## Smart Devices 
    #### General
    text = [re.sub(r"\b((W|w)i-(F|f)i|(W|w)i (F|f)i)\b", "wifi", sentence) for sentence in text]
    text = [re.sub(r"\bsound bar\b", "soundbar", sentence) for sentence in text]
    text = [re.sub(r"\bvac\b", "vacuum", sentence) for sentence in text]
    text = [re.sub(r"\bdimmer switch\b", "dimmerswitch", sentence) for sentence in text]
    text = [re.sub(r"\b((S|s)mart )(((L|l)ights?)|((B|b)ulbs?))\b", "smartlight", sentence) for sentence in text] 
    text = [re.sub(r"\b((S|s)mart )(P|p)lugs?\b", "smartplug", sentence) for sentence in text] 
    text = [re.sub(r"\b((M|m)otion )(S|s)ensors?\b", "motionsensor", sentence) for sentence in text] 
    text = [re.sub(r"\b((S|s)mart )(T\|t)(V|v)\b", "smarttv", sentence) for sentence in text] 
    text = [re.sub(r"\b(S|s)mart (H|h)ome\b", "smarthome", sentence) for sentence in text] 
    text = [re.sub(r"\bhome assistant\b", "homeassistant", sentence) for sentence in text]
    text = [re.sub(r"\b((M|m)icro)( )?(SDs?)( cards?)?\b", "microsdcard", sentence) for sentence in text]
    text = [re.sub(r"\b(S|s)(D|d) cards?\b", "sdcard", sentence) for sentence in text]
    text = [re.sub(r"\b(I|i)(R|r) (?=(lightbulb|filter|light|option|microwave|dim|reflection|camera|LED|led|(night vision))(s?)\b)", "infrared", sentence) for sentence in text]
    text = [re.sub(r"\b(I|i)(R|r)\b", "infrared", sentence) for sentence in text]
    text = [re.sub(r"\b(C|c) (wire)\b", "cwire", sentence) for sentence in text]
    #### Amazon-owned 
    text = [re.sub(r"\b((T|t)(P|p) (L|l)ink (K|k)asa)\b", "tplinkkassa", sentence) for sentence in text]
    text = [re.sub(r"\b(K|k)asa\b", "tplinkkassa", sentence) for sentence in text]
    text = [re.sub(r"\b(((F|f)ire( TV| tv|TV|tv) (C|c)ubes?)|((F|f)ire (C|c)ubes?)|((F|f)ire(C|c)ubes?))\b", "firecube", sentence) for sentence in text]
    text = [re.sub(r"\b((F|f)ire )(TV|tv)( OS| os)?\b", "firetv", sentence) for sentence in text] 
    text = [re.sub(r"\b(((F|f)ire )(TV|tv) (S|s)tick|((F|f)ire )(S|s)tick)\b", "firetvstick", sentence) for sentence in text] 
    text = [re.sub(r"\b(E|e)ero mesh\b", "eero", sentence) for sentence in text] 
    text = [re.sub(r"\b(B|b)link( doorbell)? cam(era)?s?\b", "blinksecurity", sentence) for sentence in text] 
    text = [re.sub(r"\b(E|e)ufy(( (S|s)olo)?cam(era)?s?( E20)?)?\b", "eufysecurity", sentence) for sentence in text] 
    #### Specific Brands
    text = [re.sub(r"\b((P|p)hill?ips )?(H|h)ue\b", "philipshue", sentence) for sentence in text]
    text = [re.sub(r"\bphilipshue (((L|l)ight(bulb| bulb)?s?)|smartlight)\b", "huesmartlight", sentence) for sentence in text]
    text = [re.sub(r"\b(J|j)(B|b)(L|l) speakers?\b", "jblspeaker", sentence) for sentence in text]
    text = [re.sub(r"\b(L|l)(G|g) ((S|s)mart)?(T|t)(V|v)\b", "lgsmarttv ", sentence) for sentence in text] 
    text = [re.sub(r"\b(R|r)oku (TV|tv|smarttv)\b", "rokutv ", sentence) for sentence in text] 
    text = [re.sub(r"\b(S|s)amsung (TV|tv|smarttv)\b", "samsungtv ", sentence) for sentence in text] 
    text = [re.sub(r"\b(I|i)nsignia (TV|tv|smarttv)\b", "insigniatv ", sentence) for sentence in text] 
    text = [re.sub(r"\b(((L|l)ight(B|b| B| b)ulbs?)|(B|b)ulbs?)\b", "lightbulb", sentence) for sentence in text]
    text = [re.sub(r"\b(D|d)link (S|s)ensors?\b", "dlinksensor", sentence) for sentence in text]
    text = [re.sub(r"\b(W|w)emo (P|p)lugs?\b", "wemoplug", sentence) for sentence in text]
    text = [re.sub(r"\b(NETGEAR|(N|N)etgear) (N|n)ighthawk\b", "netgearnighthawk", sentence) for sentence in text]
    text = [re.sub(r"\b((L|l)(G|g) (T|t)hin(Q|q|k)|(T|t)hin(Q|q|k) (skill|app))\b", "lgthinqskill", sentence) for sentence in text]
    text = [re.sub(r"\b(X|x)(B|b)ox (S|s)eries (S|s|x|X)\b", "xbox", sentence) for sentence in text]
    text = [re.sub(r"\b(S|s)onos ((O|o)ne|(B|b)eam)\b", "sonos", sentence) for sentence in text]
    text = [re.sub(r"\b(M|m)ac (I|i)?(O|o)(S|s)\b", "macos", sentence) for sentence in text]
    text = [re.sub(r"\b(I|i)phone (I|i)?(O|o)(S|s)\b", "iphoneos", sentence) for sentence in text]
    text = [re.sub(r"\bpi\-hole\b", "pihole", sentence) for sentence in text]
    ## Prime Video Music
    text = [re.sub(r"\b(((AMAZON|Amazon|amazon) (PRIME|Prime|prime) (M|m)usic (U|u)nlimited)|((AMAZON|Amazon|amazon) (PRIME|Prime|prime) (M|m)usic)|((PRIME|Prime|prime) (M|m)usic (U|u)nlimited)|((PRIME|Prime|prime) (M|m)usic))\b", "primemusic", sentence) for sentence in text]
    text = [re.sub(r"\b(AMAZON|Amazon|amazon) (M|m)usic (U|u)nlimited\b", "primemusic", sentence) for sentence in text]
    text = [re.sub(r"\b(M|m)usic (U|u)nlimited\b", "primemusic", sentence) for sentence in text]
    text = [re.sub(r"\b((AMAZON|Amazon|amazon) (PRIME|Prime|prime) (V|v)ideo|(PRIME|Prime|prime) (V|v)ideo)\b", "primevideo", sentence) for sentence in text]
    text = [re.sub(r"\b(AMAZON|Amazon|amazon) (PRIME|Prime|prime)\b|\b(PRIME|Prime|prime)\b", "amazonprime", sentence) for sentence in text]
    ## Amazon
    text = [re.sub(r"\b(A|a)mazon (B|b)asics?\b", "amazonbasics", sentence) for sentence in text] 
    text = [re.sub(r"\b(A|a)mazon (S|s)idewalk\b", "amazonsidewalk", sentence) for sentence in text]
    text = [re.sub(r"\b(A|a)mazon (S|s)hopping app?\b", "shoppingapp", sentence) for sentence in text]
    ## Alexa App
    text = [re.sub(r"\b(A|a)mazon (K|k)ids\+?\b", "amazonkids", sentence) for sentence in text]
    text = [re.sub(r"\b(A|a)lexa(s|\'s)\b", "alexa", sentence) for sentence in text] 
    text = [re.sub(r"\b(A|a)lexa ios? app\b", "alexaapp", sentence) for sentence in text]
    text = [re.sub(r"\b(R|r)outines?\b", "routine", sentence) for sentence in text] 
    text = [re.sub(r"\b(A|a)lexa (phone app|app)\b", "alexaapp", sentence) for sentence in text]
    ## Echo Dot
    text = [re.sub(r"\b((A|a)mazon (E|e)cho (D|d)ots?|(E|e)cho (D|d)ots?)\b", "echodot", sentence) for sentence in text] 
    text = [re.sub(r"\bDots?\b", "echodot", sentence) for sentence in text] 
    ## Echo Show
    text = [re.sub(r"\b(A|a)lexa (S|s)hows?\b", "echoshow", sentence) for sentence in text] 
    text = [re.sub(r"\b((E|e)cho )(S|s)how(s| 5| 8| 10| 15)?\b", "echoshow", sentence) for sentence in text] 
    text = [re.sub(r"\bShow(s|((\'|\’)s)| 5| 8| 10| 15)\b", "echoshow", sentence) for sentence in text] 
    ## Echo Flex 
    text = [re.sub(r"\b(E|e)cho flex\b", "echoflex", sentence) for sentence in text] 
    ## Echo Soundbar 
    text = [re.sub(r"\b(E|e)cho (subwoofer|sub)\b", "echosoundbar", sentence) for sentence in text] 
    ## Echo
    text = [re.sub(r"\b(A|a)mazon (E|e)cho devices?\b", "amazonecho", sentence) for sentence in text] 
    text = [re.sub(r"\b((A|a)mazon (E|e)cho (A|a)uto|(E|e)cho (A|a)uto)\b", "echoauto", sentence) for sentence in text]
    text = [re.sub(r"\b((A|a)mazon (A|a)lexa) (E|e)cho(s|\'s)?\b", "amazonecho", sentence) for sentence in text] 
    text = [re.sub(r"\b((A|a)lexa)\/(E|e)cho(s|\'s)?\b", "amazonecho", sentence) for sentence in text] 
    text = [re.sub(r"\b(E|e)cho devices?\b", "amazonecho", sentence) for sentence in text] 
    text = [re.sub(r"\b(E|e)chos?\b", "amazonecho", sentence) for sentence in text] 
    ## Alexa Features 
    text = [re.sub(r"\b(W|w)hisper mode\b", "whispermode", sentence) for sentence in text] 
    text = [re.sub(r"\b(W|w)ake words?\b", "wakeword", sentence) for sentence in text]
    text = [re.sub(r"\b(K|k)ids(\+| \+)\b", "kidsplus", sentence) for sentence in text]
    text = [re.sub(r"\b(D|d)rop (in|into)\b", "dropin", sentence) for sentence in text]
    text = [re.sub(r"\b(D|d)rop\-(in)\b", "dropin", sentence) for sentence in text]
    text = [re.sub(r"\bon\/off\b", "onoff", sentence) for sentence in text]
    text = [re.sub(r"\b(B|b)\.?t\.?w\b", "bytheway", sentence) for sentence in text]
    text = [re.sub(r"\b((B|b)y the way)\b", "bytheway", sentence) for sentence in text]
    ## Filter 
    text = [re.sub(r"\b(etc)(\.?\,?)", "", sentence) for sentence in text]
    text = [re.sub(r" \& ", " and ", sentence) for sentence in text]
    text = [re.sub(r"\&", " and ", sentence) for sentence in text]
    #text = [re.sub(r"\b(radio station|station|radio)\b", "radiostation", sentence) for sentence in text]
    return text


def tokenize_text(text): 
    '''
    Required python pkgs: 
     - nltk (from nltk.tokenize import word_tokenize, sent_tokenize)
     
    Function: Transform text into token form.
    
    Input Arguments: 
    text: input type is list of (str) items.
    
    '''
    token_ls = []
    for sentence in text:
        token_text = ' '.join(word_tokenize(sentence))
        token_ls.append(token_text)
    return token_ls


def reformat_praw_comments(praw_df): 
    comments = []
    tmp = []
    comment_col = praw_df.comments
    
    for i in comment_col: 
        comments.append(i)
    for j in comments: 
        commentors = re.findall(r"((?<=\'|\")[A-Za-z0-9_-]+(?=\:))",j)
        starts_with = re.findall(r"(\'|\")(?=[A-Za-z0-9_-]+\:)",j)
        tmp.append([commentors,len(commentors), starts_with,j])
    tmp_df = pd.DataFrame(tmp, columns=['commenters', 'commenter_count', 'starts_w', 'comment_text'])

    comment_list = []
    rgx_1 = r'(?<=\')\, (?=\'[A-Za-z0-9_-]+\:)'
    rgx_2 = r'(?<=\")\, (?=\"[A-Za-z0-9_-]+\:)'

    for i in range(len(tmp_df.comment_text)): 
        input = "".join(list(set(tmp_df.starts_w[i])))
        txt = tmp_df.comment_text[i]
        if input == "'": 
            txt = re.sub(r"(\\\'|\'|\\\’|\’)","\'", txt)
            txt = re.sub(r'\[|\]', "", txt)
            output = re.split(rgx_1, txt)
            comment_list.append(output)
        elif input == "\"": 
            txt = re.sub(r'\[|\]', "",txt)
            output = re.split(rgx_2, txt)
            comment_list.append(output)
        elif input == "\'\"": 
            txt = re.sub(r'\[|\]', "",txt)
            tmp_txt = re.sub(r"(\')(?=([A-Za-z0-9_-]+: )(.*?)\')", '\"', txt)
            tmp_txt = re.sub(r"(\')(?=$|\, (\"|\\\"))",'\"',tmp_txt)
            output = re.split(rgx_2, tmp_txt)
            comment_list.append(output)
        else: 
            comment_list.append(['NaN'])
    tmp_df['comment_list'] = comment_list
    praw_df["commenter_list"] = tmp_df.commenters
    praw_df["commenter_count"] = tmp_df.commenter_count
    praw_df['comment_list'] = comment_list
    return praw_df


def clean_amazon_text(text, usrnme, tokenize):
    text = remove_hyperlinks(text)
    if usrnme == 'yes':
        text = remove_usernames(text)
    text = expand_contractions(text)
    text = amazon_ner_reddit(text)
    text = remove_amazon_numbers(text)
    if tokenize == 'yes':
        text = tokenize_text(text)
    return text


def clean_amazon_comments(df, comments_column): 
    comments_list = []
    for index, row in df.iterrows(): 
        item = row[comments_column]
        text = clean_amazon_text(item, usrnme='yes', tokenize='no')
        text = [re.sub(r'(\\\')', "'", sentence) for sentence in text]
        text = [re.sub(r'(\-|\^|\>|\<|\%|\:|\*|\;|\(|\)|\[|\]|\/|\$|\#| \\| \- )', " ", sentence) for sentence in text]
        text = [re.sub(r'(\_|\~|\\ )', "", sentence) for sentence in text]
        text = [re.sub(r'( \!)', "!", sentence) for sentence in text]
        text = [re.sub(r'( \, )', ", ", sentence) for sentence in text]
        text = [re.sub(r'( \,\,+ )', " ", sentence) for sentence in text]
        text = [re.sub(r'(\.\.\.|\.\.)', ".", sentence) for sentence in text]
        text = [re.sub(r'(\'s )', "'s ", sentence) for sentence in text]
        text = [re.sub(r'( \. |\.   |  \. )', ". ", sentence) for sentence in text]
        text = [re.sub(r'( \.)', ".", sentence) for sentence in text]
        text = [re.sub(r'(\+)', "plus", sentence) for sentence in text]
        text = [re.sub(r'(\\\\n\\\\n|\\n\\n|\\n| \· )', " ", sentence) for sentence in text]
        out = [re.sub(r"\s\s+", ' ', str(i)) for i in text]
        comments_list.append(out)
    df['comments_list_filtered']= comments_list
    return df


def clean_txt(textdoc): 
    textdoc = [re.sub(r"(and\/or)", 'and or', i) for i in textdoc]
    textdoc = [re.sub(r"(\[|\]|\(|\)|\*|\^|\$|\#|\%|\`)", ' ', i) for i in textdoc]
    textdoc = [re.sub(r"(\[|\]|\(|\)|\*|\^|\>|\<|\$|\#|\-|\_|\;|\%|\||\>|\: )", ' ', i) for i in textdoc]
    textdoc = [re.sub(r"(\.\.\. )", '. ', i) for i in textdoc]
    textdoc = [re.sub(r"( \'\' )", ' ', i) for i in textdoc]
    textdoc = [re.sub(r"( \’ )", "'", i) for i in textdoc]
    textdoc = [re.sub(r"( \“ )", " ", i) for i in textdoc]
    textdoc = [re.sub(r"( \\ )", " ", i) for i in textdoc]
    textdoc = [re.sub(r"(\\ )", ' ', str(i)) for i in textdoc]
    textdoc = [re.sub(r"(\\)", '', str(i)) for i in textdoc]
    textdoc = [re.sub(r"\\n", ' ', str(i)) for i in textdoc]
    textdoc = [re.sub(r"\\\\n", '', str(i)) for i in textdoc]
    textdoc = [re.sub(r"( \/ )", '', str(i)) for i in textdoc]
    textdoc = [re.sub(r"\/", ' ', str(i)) for i in textdoc]
    textdoc = [re.sub(r" (?=(\!|\,|\.|\!|\\|\/|\?|\'|\\\.|\‘) ?)", '', str(i)) for i in textdoc]
    textdoc = [re.sub(r"(\,\,+)", '', str(i)) for i in textdoc]
    textdoc = [re.sub(r"( \— )", " ", str(i)) for i in textdoc]
    textdoc = [re.sub(r"( (\”|\") )", " ", str(i)) for i in textdoc]
    textdoc = [re.sub(r"\b(TL DR|TLDR)\b", '', str(i)) for i in textdoc]
    textdoc = [re.sub(r"( \'s)", "'s", str(i)) for i in textdoc]
    textdoc = [re.sub(r"(\.\.+ )", ". ", str(i)) for i in textdoc]
    textdoc = [re.sub(r"( (\”|\"))", " ", str(i)) for i in textdoc]
    textdoc = [re.sub(r"\b(A, B, C, D, E, and F)\b ?", "", str(i)) for i in textdoc]
    textdoc = [re.sub(r"\b(A, B, C, D, E, F)\b ?", "", str(i)) for i in textdoc]
    textdoc = [re.sub(r"\b(A\, B)\b", "", str(i)) for i in textdoc]
    textdoc = [re.sub(r"(\? \?\? \?)", "????", str(i)) for i in textdoc]
    textdoc = [re.sub(r"( \?\.)", "?", str(i)) for i in textdoc]
    textdoc = [re.sub(r"( \, )", ", ", str(i)) for i in textdoc]
    textdoc = [re.sub(r"\s\s+", ' ', str(i)) for i in textdoc]
    textdoc = [re.sub(r"\b(aaannnd|aannnnddddd|aaand|aaaaaaand|aaaand)\b", 'and', str(i)) for i in textdoc]
    textdoc = [re.sub(r"\b2four\b", 'twentyfour', str(i)) for i in textdoc]
    txt = [re.sub(r"\baamazon\b", 'amazon', str(i)) for i in textdoc]
    return txt




### Prepare Data for Sentiment Analysis Tasks 

#### Concatenate DataFrames

- Concatenate datasets. 
- Drop duplicate rows. 


In [5]:
### Drop Duplicates From Independent DataFrames ### 
alexa_1_dd = alexa_1.drop_duplicates()
alexa_2_dd = alexa_2.drop_duplicates()

### Concatenate DataFrames ###
alexa_concat = pd.concat([alexa_1_dd, alexa_2_dd])
print(f" Before Deduplication: {alexa_concat.shape}")

### Drop Duplicates From Concatenated DataFrame ###
alexa_concat_dd = alexa_concat.drop_duplicates(subset=['created_unix_utc', 'author', 'id', 'commenter_list'], keep='last').reset_index(drop=True)
print(f" After Deduplication: {alexa_concat_dd.shape}")

### Drop Empty Column ### 
alexa_concat_dd = alexa_concat_dd.drop('created_pst', axis=1)

alexa_concat_dd.head(3)


 Before Deduplication: (9867, 18)
 After Deduplication: (5811, 18)


Unnamed: 0,created_unix_utc,created_datetime_pst,title,author,score,id,subreddit,url,body,num_comments,comments,commenter_list,commenter_count,comment_list,cleaned_title,cleaned_body,cleaned_comments
0,1682903186.0,30-Apr-2023 18:06:26,Spotify on Alexa,CelticNomadd,1,1348j93,alexa,https://www.reddit.com/r/alexa/comments/1348j9...,Amazon should add an option to just view the s...,0,[],[],0,['NaN'],spotify on alexa,amazon should add option to just view song pla...,
1,1682901623.0,30-Apr-2023 17:40:23,Can Echo Show automatically connect to iPhone ...,dsupreme99,1,1347z0d,alexa,https://www.reddit.com/r/alexa/comments/1347z0...,"Thinking of getting Alexa device, can the echo...",0,[],[],0,['NaN'],echoshow automatically connect to iphone hotsp...,think of get alexa device echoshow automatical...,
2,1682898187.0,30-Apr-2023 16:43:07,I want to bring my smart bulb on holiday. Do I...,Minimum-Lemon-402,3,1346p2a,alexa,https://www.reddit.com/r/alexa/comments/1346p2...,Heading to visit the inlaws in another country...,3,['CYPH3R_22: They have to be on the same netwo...,"['CYPH3R_22', 'freebubbleup', 'CYPH3R_22']",3,"[""'CYPH3R_22: They have to be on the same netw...",want to bring smartlight on holiday need to br...,head to visit inlaw in another country go to b...,to on same network ask alexa what voltage in c...


#### Pre-Process Text Columns

In [6]:
### Body ### 
alexa_concat_dd["body_filtered"] = clean_amazon_text(alexa_concat_dd.body, usrnme='no', tokenize='yes')
alexa_concat_dd["body_filtered"] = clean_txt(alexa_concat_dd.body_filtered)

### Comments ### 
alexa_concat_dd = reformat_praw_comments(alexa_concat_dd)
alexa_concat_dd = clean_amazon_comments(alexa_concat_dd, comments_column='comment_list')


## Sentiment and Emotion Analysis Tasks 


In [7]:
def sentiment_model(model_name): 
    sentiment_classifier = pipeline("sentiment-analysis", model=model_name, tokenizer=model_name, max_length=512, truncation=True)
    return sentiment_classifier


def emotion_model(model_name): 
    if model_name == "arpanghoshal/EmoRoBERTa":
        tokenizer = RobertaTokenizerFast.from_pretrained(model_name)
        model = TFRobertaForSequenceClassification.from_pretrained(model_name)
        emotion_classifier = pipeline('sentiment-analysis', model=model_name)
    else: 
        emotion_classifier = pipeline("text-classification", model=model_name, max_length=512, truncation=True, return_all_scores=True)
    return emotion_classifier


def sentiment_analysis(df, column_name, model_type, classifier_model, comments='no'): 
    
    if comments == 'no':
        #Transform data to list.
        text_column = df[column_name].to_list()
        #Truncate text into 512 words. 
        text_list = []
        for item in text_column: 
            if len(item.split()) >= 512: 
                text = item.split()
                text_output = " ".join(text[0:512])
                text_list.append(text_output)
            else: 
                text_list.append(item)
        #Execute classification task.
        sentiment_results = classifier_model(text_list)
        #Write results as column to input df.
        df[f'{column_name}_{model_type}'] = sentiment_results

    if comments == 'yes': 
        #Transform data to list.
        sentiment_comments_list = []
        for index, row in df.iterrows(): 
            #print(f"Evaluating row {index}...")
            text_list = row[column_name]
            #Execute classification task.
            sentiment_results = classifier_model(text_list)
            sentiment_comments_list.append(sentiment_results) # []
        #Write results as column to input df.
        df[f'{column_name}_{model_type}'] = sentiment_comments_list
        
    return df



### Sentiment Analysis

Sentiment Analysis using HuggingFace: https://huggingface.co/tasks/text-classification

Models: 
- sentiment-roberta-large-english (https://huggingface.co/siebert/sentiment-roberta-large-english)
- bertweet-base-sentiment-analysis (https://huggingface.co/finiteautomata/bertweet-base-sentiment-analysis)
- twitter-xlm-roberta-base-sentiment (https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment)

In [8]:
## Sentiment Classifier Models ##

bertweet_model = "finiteautomata/bertweet-base-sentiment-analysis"
roberta_model= "siebert/sentiment-roberta-large-english"
cardiffnlp_model = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
citizenlab_model = "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned"


roberta_sentiment_classifier = sentiment_model(roberta_model)
bertweet_sentiment_classifier = sentiment_model(bertweet_model)
cardiffnlp_sentiment_classifier = sentiment_model(cardiffnlp_model)
citizenlab_sentiment_classifier = emotion_model(citizenlab_model) #classification = "text-classification"; using emotion_model path.


### Emotion Classification 

Emotion Classification using HuggingFace: https://huggingface.co/tasks/text-classification
    
Models: 
- emotion-english-distilroberta-base (https://huggingface.co/j-hartmann/emotion-english-distilroberta-base)
- distilbert-base-uncased-emotion (https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)
- EmoRoBERTa (https://huggingface.co/arpanghoshal/EmoRoBERTa)

In [9]:
## Emotion Classifier Models ##

distilroberta_model = "j-hartmann/emotion-english-distilroberta-base"
distilbert_model = "bhadresh-savani/distilbert-base-uncased-emotion"
emoroberta_model = "arpanghoshal/EmoRoBERTa"


distilroberta_emotion_classifier = emotion_model(distilroberta_model)
distilbert_emotion_classifier = emotion_model(distilbert_model)
emoroberta_emotion_classifier = emotion_model(emoroberta_model)


#### Execute Sentiment Analysis Tasks

In [64]:
## Title ##

### Sentiment ### 
sentiment_analysis(alexa_concat_dd, 'title', model_type='sentiment', classifier_model=roberta_sentiment_classifier)
sentiment_analysis(alexa_concat_dd, 'title', model_type='emotion_bertweet', classifier_model=bertweet_sentiment_classifier)

### Emotion ###
sentiment_analysis(alexa_concat_dd, 'title', model_type='emotion', classifier_model=distilroberta_emotion_classifier)
sentiment_analysis(alexa_concat_dd, 'title', model_type='emotion_bert', classifier_model=distilbert_emotion_classifier)


Unnamed: 0,created_unix_utc,created_datetime_pst,title,author,score,id,subreddit,url,body,num_comments,...,comment_list,cleaned_title,cleaned_body,cleaned_comments,body_filtered,comments_list_filtered,title_sentiment,title_emotion_bertweet,title_emotion,title_emotion_bert
0,1682903186.0,30-Apr-2023 18:06:26,Spotify on Alexa,CelticNomadd,1,1348j93,alexa,https://www.reddit.com/r/alexa/comments/1348j9...,Amazon should add an option to just view the s...,0,...,[NaN],spotify on alexa,amazon should add option to just view song pla...,,Amazon should add an option to just view the s...,[NaN],"{'label': 'POSITIVE', 'score': 0.987148106098175}","{'label': 'NEU', 'score': 0.8330541849136353}","[{'label': 'anger', 'score': 0.005384087096899...","[{'label': 'sadness', 'score': 0.0236624572426..."
1,1682901623.0,30-Apr-2023 17:40:23,Can Echo Show automatically connect to iPhone ...,dsupreme99,1,1347z0d,alexa,https://www.reddit.com/r/alexa/comments/1347z0...,"Thinking of getting Alexa device, can the echo...",0,...,[NaN],echoshow automatically connect to iphone hotsp...,think of get alexa device echoshow automatical...,,"Thinking of getting Alexa device, can the echo...",[NaN],"{'label': 'NEGATIVE', 'score': 0.9968199729919...","{'label': 'POS', 'score': 0.599865734577179}","[{'label': 'anger', 'score': 0.012488813139498...","[{'label': 'sadness', 'score': 0.0076527502387..."
2,1682898187.0,30-Apr-2023 16:43:07,I want to bring my smart bulb on holiday. Do I...,Minimum-Lemon-402,3,1346p2a,alexa,https://www.reddit.com/r/alexa/comments/1346p2...,Heading to visit the inlaws in another country...,3,...,['CYPH3R_22: They have to be on the same netwo...,want to bring smartlight on holiday need to br...,head to visit inlaw in another country go to b...,to on same network ask alexa what voltage in c...,Heading to visit the inlaws in another country...,"['They have to be on the same network', 'I wou...","{'label': 'NEGATIVE', 'score': 0.9632630348205...","{'label': 'NEU', 'score': 0.8540923595428467}","[{'label': 'anger', 'score': 0.016276195645332...","[{'label': 'sadness', 'score': 0.0004231125058..."
3,1682874868.0,30-Apr-2023 10:14:28,I don't get it where on the decade of ai so wh...,animestar1234,0,133xgwk,alexa,https://www.reddit.com/r/alexa/comments/133xgw...,I mean chat gpt is becoming so innovative and ...,6,...,[NaN],not get where on decade of ai why hasny alexa ...,mean chat gpt become innovative people start t...,aw launch bedrock to enable more llm developme...,I mean chat gpt is becoming so innovative and ...,[NaN],"{'label': 'NEGATIVE', 'score': 0.9958089590072...","{'label': 'NEU', 'score': 0.7140888571739197}","[{'label': 'anger', 'score': 0.011244812980294...","[{'label': 'sadness', 'score': 0.0052124224603..."
4,1682872224.0,30-Apr-2023 09:30:24,Is there a way to make calls between two Echoe...,kerfuffle_dood,5,133wear,alexa,https://www.reddit.com/r/alexa/comments/133wea...,"I know that there's the Drop-In, but as far as...",5,...,['NikkiRoxi: DROP IN is not just for use betwe...,way to make call between two echo with differe...,know dropin far know only between device with ...,dropin not just for use between same account u...,"I know that there is the Drop In, but as far a...",['DROP IN is not just for use between the same...,"{'label': 'NEGATIVE', 'score': 0.9950237274169...","{'label': 'NEU', 'score': 0.9725497364997864}","[{'label': 'anger', 'score': 0.008503718301653...","[{'label': 'sadness', 'score': 0.0275614112615..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5806,1677763725.0,02-Mar-2023 05:28:45,She keeps going off in the middle of the night...,JuliaJune96,3,11g2sbv,alexa,https://www.reddit.com/r/alexa/comments/11g2sb...,Occasionally I get this problem. She turns off...,13,...,[NaN],keep go off in middle of night while play ocea...,occasionally get problem turn off after short ...,sleepjar skill free version last for hour loop...,Occasionally I get this problem. She turns off...,[NaN],"{'label': 'NEGATIVE', 'score': 0.9994957447052...","{'label': 'NEU', 'score': 0.6331915855407715}","[{'label': 'anger', 'score': 0.094623029232025...","[{'label': 'sadness', 'score': 0.0036189923994..."
5807,1673970206.0,17-Jan-2023 07:43:26,Is the entire Alexa system falling apart? (Any...,PrivatePilot9,93,10efca9,alexa,https://www.reddit.com/r/alexa/comments/10efca...,"I'm nearing my wits end, especially with how i...",123,...,[NaN],entire alexa system fall apart anyone else,near wit end especially with how integral alex...,ask one of device to pull up one of connected ...,"i am nearing my wits end, especially with how ...",[NaN],"{'label': 'NEGATIVE', 'score': 0.9994725584983...","{'label': 'NEG', 'score': 0.798569917678833}","[{'label': 'anger', 'score': 0.088975846767425...","[{'label': 'sadness', 'score': 0.4629723429679..."
5808,1673702745.0,14-Jan-2023 05:25:45,Is it possible to revert the Alexa app (not th...,beyondthegraceofgod,5,10boe9l,alexa,https://www.reddit.com/r/alexa/comments/10boe9...,"In other words, I know how to reset my Echo (4...",9,...,[NaN],possible to revert alexaapp not device back to...,in other word know how to reset amazonecho fou...,actually go alexaamazoncom delete all device w...,"In other words, I know how to reset my amazone...",[NaN],"{'label': 'NEGATIVE', 'score': 0.996651828289032}","{'label': 'NEU', 'score': 0.9657372236251831}","[{'label': 'anger', 'score': 0.012898885644972...","[{'label': 'sadness', 'score': 0.0230021029710..."
5809,1673300204.0,09-Jan-2023 13:36:44,Just got a “pop-up” advertisement notification...,menicknick,7,107qud0,alexa,https://www.reddit.com/r/alexa/comments/107qud...,Anyone else get these? First time here. If i...,14,...,[NaN],just get pop up advertisement notification on ...,anyone else get first time here happen again i...,alexa setting notifications amazon shop thing ...,Anyone else get these? First time here. If it ...,[NaN],"{'label': 'POSITIVE', 'score': 0.9955065250396...","{'label': 'NEU', 'score': 0.8244529366493225}","[{'label': 'anger', 'score': 0.010887555778026...","[{'label': 'sadness', 'score': 0.0053306692279..."


In [66]:
## Body ## 

### Sentiment ### 
sentiment_analysis(alexa_concat_dd, 'body_filtered', model_type='sentiment', classifier_model=roberta_sentiment_classifier)
sentiment_analysis(alexa_concat_dd, 'body_filtered', model_type='sentiment_cardiff', classifier_model=cardiffnlp_sentiment_classifier)

### Emotion ###
sentiment_analysis(alexa_concat_dd, 'body_filtered', model_type='emotion', classifier_model=distilroberta_emotion_classifier)
sentiment_analysis(alexa_concat_dd, 'body_filtered', model_type='emotion_bert', classifier_model=distilbert_emotion_classifier)



Unnamed: 0,created_unix_utc,created_datetime_pst,title,author,score,id,subreddit,url,body,num_comments,...,body_filtered,comments_list_filtered,title_sentiment,title_emotion_bertweet,title_emotion,title_emotion_bert,body_filtered_sentiment,body_filtered_sentiment_cardiff,body_filtered_emotion,body_filtered_emotion_bert
0,1682903186.0,30-Apr-2023 18:06:26,Spotify on Alexa,CelticNomadd,1,1348j93,alexa,https://www.reddit.com/r/alexa/comments/1348j9...,Amazon should add an option to just view the s...,0,...,Amazon should add an option to just view the s...,[NaN],"{'label': 'POSITIVE', 'score': 0.987148106098175}","{'label': 'NEU', 'score': 0.8330541849136353}","[{'label': 'anger', 'score': 0.005384087096899...","[{'label': 'sadness', 'score': 0.0236624572426...","{'label': 'POSITIVE', 'score': 0.9841079711914...","{'label': 'neutral', 'score': 0.5693414807319641}","[{'label': 'anger', 'score': 0.014921673573553...","[{'label': 'sadness', 'score': 0.0083957826718..."
1,1682901623.0,30-Apr-2023 17:40:23,Can Echo Show automatically connect to iPhone ...,dsupreme99,1,1347z0d,alexa,https://www.reddit.com/r/alexa/comments/1347z0...,"Thinking of getting Alexa device, can the echo...",0,...,"Thinking of getting Alexa device, can the echo...",[NaN],"{'label': 'NEGATIVE', 'score': 0.9968199729919...","{'label': 'POS', 'score': 0.599865734577179}","[{'label': 'anger', 'score': 0.012488813139498...","[{'label': 'sadness', 'score': 0.0076527502387...","{'label': 'POSITIVE', 'score': 0.9968852400779...","{'label': 'neutral', 'score': 0.6567432880401611}","[{'label': 'anger', 'score': 0.009094883687794...","[{'label': 'sadness', 'score': 0.0095788342878..."
2,1682898187.0,30-Apr-2023 16:43:07,I want to bring my smart bulb on holiday. Do I...,Minimum-Lemon-402,3,1346p2a,alexa,https://www.reddit.com/r/alexa/comments/1346p2...,Heading to visit the inlaws in another country...,3,...,Heading to visit the inlaws in another country...,"['They have to be on the same network', 'I wou...","{'label': 'NEGATIVE', 'score': 0.9632630348205...","{'label': 'NEU', 'score': 0.8540923595428467}","[{'label': 'anger', 'score': 0.016276195645332...","[{'label': 'sadness', 'score': 0.0004231125058...","{'label': 'POSITIVE', 'score': 0.9955968260765...","{'label': 'neutral', 'score': 0.5459811687469482}","[{'label': 'anger', 'score': 0.010565615259110...","[{'label': 'sadness', 'score': 0.0015707103302..."
3,1682874868.0,30-Apr-2023 10:14:28,I don't get it where on the decade of ai so wh...,animestar1234,0,133xgwk,alexa,https://www.reddit.com/r/alexa/comments/133xgw...,I mean chat gpt is becoming so innovative and ...,6,...,I mean chat gpt is becoming so innovative and ...,[NaN],"{'label': 'NEGATIVE', 'score': 0.9958089590072...","{'label': 'NEU', 'score': 0.7140888571739197}","[{'label': 'anger', 'score': 0.011244812980294...","[{'label': 'sadness', 'score': 0.0052124224603...","{'label': 'NEGATIVE', 'score': 0.9915388226509...","{'label': 'negative', 'score': 0.7868428826332...","[{'label': 'anger', 'score': 0.016349205747246...","[{'label': 'sadness', 'score': 0.7200033068656..."
4,1682872224.0,30-Apr-2023 09:30:24,Is there a way to make calls between two Echoe...,kerfuffle_dood,5,133wear,alexa,https://www.reddit.com/r/alexa/comments/133wea...,"I know that there's the Drop-In, but as far as...",5,...,"I know that there is the Drop In, but as far a...",['DROP IN is not just for use between the same...,"{'label': 'NEGATIVE', 'score': 0.9950237274169...","{'label': 'NEU', 'score': 0.9725497364997864}","[{'label': 'anger', 'score': 0.008503718301653...","[{'label': 'sadness', 'score': 0.0275614112615...","{'label': 'NEGATIVE', 'score': 0.9938561320304...","{'label': 'neutral', 'score': 0.7338858246803284}","[{'label': 'anger', 'score': 0.004683145787566...","[{'label': 'sadness', 'score': 0.0135180046781..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5806,1677763725.0,02-Mar-2023 05:28:45,She keeps going off in the middle of the night...,JuliaJune96,3,11g2sbv,alexa,https://www.reddit.com/r/alexa/comments/11g2sb...,Occasionally I get this problem. She turns off...,13,...,Occasionally I get this problem. She turns off...,[NaN],"{'label': 'NEGATIVE', 'score': 0.9994957447052...","{'label': 'NEU', 'score': 0.6331915855407715}","[{'label': 'anger', 'score': 0.094623029232025...","[{'label': 'sadness', 'score': 0.0036189923994...","{'label': 'NEGATIVE', 'score': 0.9994725584983...","{'label': 'negative', 'score': 0.7942632436752...","[{'label': 'anger', 'score': 0.179894521832466...","[{'label': 'sadness', 'score': 0.0450465418398..."
5807,1673970206.0,17-Jan-2023 07:43:26,Is the entire Alexa system falling apart? (Any...,PrivatePilot9,93,10efca9,alexa,https://www.reddit.com/r/alexa/comments/10efca...,"I'm nearing my wits end, especially with how i...",123,...,"i am nearing my wits end, especially with how ...",[NaN],"{'label': 'NEGATIVE', 'score': 0.9994725584983...","{'label': 'NEG', 'score': 0.798569917678833}","[{'label': 'anger', 'score': 0.088975846767425...","[{'label': 'sadness', 'score': 0.4629723429679...","{'label': 'NEGATIVE', 'score': 0.9995005130767...","{'label': 'negative', 'score': 0.5074052214622...","[{'label': 'anger', 'score': 0.085705302655696...","[{'label': 'sadness', 'score': 0.9497105479240..."
5808,1673702745.0,14-Jan-2023 05:25:45,Is it possible to revert the Alexa app (not th...,beyondthegraceofgod,5,10boe9l,alexa,https://www.reddit.com/r/alexa/comments/10boe9...,"In other words, I know how to reset my Echo (4...",9,...,"In other words, I know how to reset my amazone...",[NaN],"{'label': 'NEGATIVE', 'score': 0.996651828289032}","{'label': 'NEU', 'score': 0.9657372236251831}","[{'label': 'anger', 'score': 0.012898885644972...","[{'label': 'sadness', 'score': 0.0230021029710...","{'label': 'POSITIVE', 'score': 0.9978129863739...","{'label': 'negative', 'score': 0.854820191860199}","[{'label': 'anger', 'score': 0.040406759828329...","[{'label': 'sadness', 'score': 0.2714154422283..."
5809,1673300204.0,09-Jan-2023 13:36:44,Just got a “pop-up” advertisement notification...,menicknick,7,107qud0,alexa,https://www.reddit.com/r/alexa/comments/107qud...,Anyone else get these? First time here. If i...,14,...,Anyone else get these? First time here. If it ...,[NaN],"{'label': 'POSITIVE', 'score': 0.9955065250396...","{'label': 'NEU', 'score': 0.8244529366493225}","[{'label': 'anger', 'score': 0.010887555778026...","[{'label': 'sadness', 'score': 0.0053306692279...","{'label': 'NEGATIVE', 'score': 0.9993822574615...","{'label': 'negative', 'score': 0.9104802012443...","[{'label': 'anger', 'score': 0.761510372161865...","[{'label': 'sadness', 'score': 0.0208881143480..."


In [16]:
## Comments ## 

### Sentiment ### 
sentiment_analysis(alexa_concat_dd, 'comments_list_filtered', model_type='sentiment', classifier_model=roberta_sentiment_classifier, comments='yes')
sentiment_analysis(alexa_concat_dd, 'comments_list_filtered', model_type='sentiment_cardiff', classifier_model=cardiffnlp_sentiment_classifier, comments='yes')

### Emotion ###
sentiment_analysis(alexa_concat_dd, 'comments_list_filtered', model_type='emotion', classifier_model=distilroberta_emotion_classifier, comments='yes')
sentiment_analysis(alexa_concat_dd, 'comments_list_filtered', model_type='emotion_bert', classifier_model=distilbert_emotion_classifier, comments='yes')



Unnamed: 0,created_unix_utc,created_datetime_pst,title,author,score,id,subreddit,url,body,num_comments,...,comment_list,cleaned_title,cleaned_body,cleaned_comments,body_filtered,comments_list_filtered,comments_list_filtered_sentiment,comments_list_filtered_sentiment_cardiff,comments_list_filtered_emotion,comments_list_filtered_emotion_bert
0,1682903186.0,30-Apr-2023 18:06:26,Spotify on Alexa,CelticNomadd,1,1348j93,alexa,https://www.reddit.com/r/alexa/comments/1348j9...,Amazon should add an option to just view the s...,0,...,[NaN],spotify on alexa,amazon should add option to just view song pla...,,Amazon should add an option to just view the s...,[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198..."
1,1682901623.0,30-Apr-2023 17:40:23,Can Echo Show automatically connect to iPhone ...,dsupreme99,1,1347z0d,alexa,https://www.reddit.com/r/alexa/comments/1347z0...,"Thinking of getting Alexa device, can the echo...",0,...,[NaN],echoshow automatically connect to iphone hotsp...,think of get alexa device echoshow automatical...,,"Thinking of getting Alexa device, can the echo...",[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198..."
2,1682898187.0,30-Apr-2023 16:43:07,I want to bring my smart bulb on holiday. Do I...,Minimum-Lemon-402,3,1346p2a,alexa,https://www.reddit.com/r/alexa/comments/1346p2...,Heading to visit the inlaws in another country...,3,...,['CYPH3R_22: They have to be on the same netwo...,want to bring smartlight on holiday need to br...,head to visit inlaw in another country go to b...,to on same network ask alexa what voltage in c...,Heading to visit the inlaws in another country...,"['They have to be on the same network', 'I wou...","[{'label': 'NEGATIVE', 'score': 0.978289425373...","[{'label': 'neutral', 'score': 0.6682137250900...","[[{'label': 'anger', 'score': 0.01139766629785...","[[{'label': 'sadness', 'score': 0.160842120647..."
3,1682874868.0,30-Apr-2023 10:14:28,I don't get it where on the decade of ai so wh...,animestar1234,0,133xgwk,alexa,https://www.reddit.com/r/alexa/comments/133xgw...,I mean chat gpt is becoming so innovative and ...,6,...,[NaN],not get where on decade of ai why hasny alexa ...,mean chat gpt become innovative people start t...,aw launch bedrock to enable more llm developme...,I mean chat gpt is becoming so innovative and ...,[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198..."
4,1682872224.0,30-Apr-2023 09:30:24,Is there a way to make calls between two Echoe...,kerfuffle_dood,5,133wear,alexa,https://www.reddit.com/r/alexa/comments/133wea...,"I know that there's the Drop-In, but as far as...",5,...,['NikkiRoxi: DROP IN is not just for use betwe...,way to make call between two echo with differe...,know dropin far know only between device with ...,dropin not just for use between same account u...,"I know that there is the Drop In, but as far a...",['DROP IN is not just for use between the same...,"[{'label': 'POSITIVE', 'score': 0.997146189212...","[{'label': 'neutral', 'score': 0.5218997001647...","[[{'label': 'anger', 'score': 0.01492923498153...","[[{'label': 'sadness', 'score': 0.014556899666..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5806,1677763725.0,02-Mar-2023 05:28:45,She keeps going off in the middle of the night...,JuliaJune96,3,11g2sbv,alexa,https://www.reddit.com/r/alexa/comments/11g2sb...,Occasionally I get this problem. She turns off...,13,...,[NaN],keep go off in middle of night while play ocea...,occasionally get problem turn off after short ...,sleepjar skill free version last for hour loop...,Occasionally I get this problem. She turns off...,[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198..."
5807,1673970206.0,17-Jan-2023 07:43:26,Is the entire Alexa system falling apart? (Any...,PrivatePilot9,93,10efca9,alexa,https://www.reddit.com/r/alexa/comments/10efca...,"I'm nearing my wits end, especially with how i...",123,...,[NaN],entire alexa system fall apart anyone else,near wit end especially with how integral alex...,ask one of device to pull up one of connected ...,"i am nearing my wits end, especially with how ...",[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198..."
5808,1673702745.0,14-Jan-2023 05:25:45,Is it possible to revert the Alexa app (not th...,beyondthegraceofgod,5,10boe9l,alexa,https://www.reddit.com/r/alexa/comments/10boe9...,"In other words, I know how to reset my Echo (4...",9,...,[NaN],possible to revert alexaapp not device back to...,in other word know how to reset amazonecho fou...,actually go alexaamazoncom delete all device w...,"In other words, I know how to reset my amazone...",[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198..."
5809,1673300204.0,09-Jan-2023 13:36:44,Just got a “pop-up” advertisement notification...,menicknick,7,107qud0,alexa,https://www.reddit.com/r/alexa/comments/107qud...,Anyone else get these? First time here. If i...,14,...,[NaN],just get pop up advertisement notification on ...,anyone else get first time here happen again i...,alexa setting notifications amazon shop thing ...,Anyone else get these? First time here. If it ...,[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198..."


### Create Text-Specific DataFrames

In [32]:
## Titles ##
title_ratings = alexa_concat_dd[['title','title_sentiment','title_emotion_bertweet','title_emotion','title_emotion_bert']].copy()

## Body ##
body_ratings = alexa_concat_dd[['body', 'body_filtered', 'body_filtered_sentiment', 'body_filtered_sentiment_cardiff', 'body_filtered_emotion', 'body_filtered_emotion_bert']].copy()

## Comments ##
comment_ratings = alexa_concat_dd[['comments', 'comments_list_filtered', 'comments_list_filtered_sentiment', 'comments_list_filtered_sentiment_cardiff', 'comments_list_filtered_emotion', 'comments_list_filtered_emotion_bert']].copy()


### Pairwise Comparison of Sentiment Classification Results 

In [33]:
## Sentiment (Positive, Negative, Neutral) ## 

def compare_sentiment(df, sentiment_column1, sentiment_column2):
    
    sentiment_ls = []

    negative = ['NEG', 'negative', 'NEGATIVE']
    positive = ['POS', 'positive', 'POSITIVE']
    neutral = ['NEU', 'neutral', 'NEUTRAL']
    
    df = df.reset_index()
    for index, row in df.iterrows(): 
 
        sentiment_a = row[sentiment_column1]
        sentiment_b = row[sentiment_column2]
        
        if sentiment_a['label'] == 'NEGATIVE' and sentiment_b['label'] in negative: 
            sentiment_ls.append('NEG')
        if sentiment_a['label'] == 'POSITIVE' and  sentiment_b['label'] in positive: 
            sentiment_ls.append('POS')
        if sentiment_a['label'] == 'NEUTRAL' and  sentiment_b['label'] in neutral:
            sentiment_ls.append('NEU')
        if sentiment_a['label'] == 'POSITIVE' and  sentiment_b['label'] in neutral:
            sentiment_ls.append('POS-NEU')
        if sentiment_a['label'] == 'NEGATIVE' and  sentiment_b['label'] in neutral:
            sentiment_ls.append('NEG-NEU')
        if sentiment_a['label'] == 'NEUTRAL' and  sentiment_b['label'] in positive:
            sentiment_ls.append('NEU-POS')
        if sentiment_a['label'] == 'NEUTRAL' and  sentiment_b['label'] in negative:
            sentiment_ls.append('NEU-NEG')
        if sentiment_a['label'] == 'NEGATIVE' and  sentiment_b['label'] in positive:
            sentiment_ls.append('NEG-POS')
        if sentiment_a['label'] == 'POSITIVE' and sentiment_b['label'] in negative:
            sentiment_ls.append('POS-NEG')

    df[f'{sentiment_column1}_comparison'] = sentiment_ls
    
    return df


def compare_sentiment_comments(df, sentiment_column1, sentiment_column2):
    
    sentiment_comments_ls = []
    
    negative = ['NEG', 'negative', 'NEGATIVE']
    positive = ['POS', 'positive', 'POSITIVE']
    neutral = ['NEU', 'neutral', 'NEUTRAL']

    df = df.reset_index()
    for index, row in df.iterrows(): 
        list_a = row[sentiment_column1]
        list_b = row[sentiment_column2]
        
        sentiment_ls = []
        for i in range(len(list_a)):
            sentiment_a = list_a[i]
            sentiment_b = list_b[i]

            if sentiment_a['label'] == 'NEGATIVE' and sentiment_b['label'] in negative: 
                sentiment_ls.append('NEG')
            if sentiment_a['label'] == 'POSITIVE' and  sentiment_b['label'] in positive: 
                sentiment_ls.append('POS')
            if sentiment_a['label'] == 'NEUTRAL' and  sentiment_b['label'] in neutral:
                sentiment_ls.append('NEU')
            if sentiment_a['label'] == 'POSITIVE' and  sentiment_b['label'] in neutral:
                sentiment_ls.append('POS-NEU')
            if sentiment_a['label'] == 'NEGATIVE' and  sentiment_b['label'] in neutral:
                sentiment_ls.append('NEG-NEU')
            if sentiment_a['label'] == 'NEUTRAL' and  sentiment_b['label'] in positive:
                sentiment_ls.append('NEU-POS')
            if sentiment_a['label'] == 'NEUTRAL' and  sentiment_b['label'] in negative:
                sentiment_ls.append('NEU-NEG')
            if sentiment_a['label'] == 'NEGATIVE' and  sentiment_b['label'] in positive:
                sentiment_ls.append('NEG-POS')
            if sentiment_a['label'] == 'POSITIVE' and sentiment_b['label'] in negative:
                sentiment_ls.append('POS-NEG')
        sentiment_comments_ls.append(sentiment_ls)

    df[f'{sentiment_column1}_comparison'] = sentiment_comments_ls

    return df




## Emotion Categories ## 

def compare_emotion(df, emotion_column1, emotion_column2): 
    
    emo_list_a = []
    emo_list_b = []
    emo_list = []
    
    for index, row in df.iterrows():
        
        emotion_a = row[emotion_column1]
        emotion_b = row[emotion_column2]
        
        emotion_a_rank = sorted(emotion_a, key=operator.itemgetter('score'), reverse=True)
        emotion_b_rank = sorted(emotion_b, key=operator.itemgetter('score'), reverse=True)
        
        emo_list_a.append([emotion_a_rank[0],emotion_a_rank[1]])
        emo_list_b.append([emotion_b_rank[0],emotion_b_rank[1]])
        
        if emotion_b_rank[0]['label'] == emotion_a_rank[0]['label']: 
            emo_list.append(['match', [emotion_a_rank[0], emotion_b_rank[0]]])
        elif emotion_a_rank[0]['label'] == emotion_b_rank[1]['label']: 
            if emotion_b_rank[1]['score'] > 0.2: 
                emo_list.append(['match', [emotion_a_rank[0], emotion_b_rank[1]]])
            else: 
                emo_list.append(['second_match', [emotion_a_rank[0], emotion_b_rank[1]]])
        elif emotion_b_rank[0]['label'] == emotion_a_rank[1]['label']: 
            if emotion_a_rank[1]['score'] > 0.2: 
                emo_list.append(['match', [emotion_a_rank[1], emotion_b_rank[0]]])
            else: 
                emo_list.append(['second_match', [emotion_a_rank[1], emotion_b_rank[0]]])
        elif emotion_a_rank[1]['label'] == emotion_b_rank[1]['label']: 
            if emotion_a_rank[1]['score'] > 0.2 and emotion_b_rank[1]['score'] > 0.2: 
                emo_list.append(['match', [emotion_a_rank[1], emotion_b_rank[1]]])
            else: 
                emo_list.append(['second_match', [emotion_a_rank[1], emotion_b_rank[1]]])
        else: 
            emo_list.append(['no_match', [emotion_a_rank[0]['label'], emotion_a_rank[1]['label']], [emotion_b_rank[0]['label'], emotion_b_rank[1]['label']]])

    df[f'{emotion_column1}_sorted'] = emo_list_a
    df[f'{emotion_column2}_sorted'] = emo_list_b          
    df[f'{emotion_column1}_comparison'] = emo_list
    
    return df 



def compare_emotion_comments(df, emotion_column1, emotion_column2): 
    
    emo_list_all = []
    emo_list_a_all = []
    emo_list_b_all = []
    for index, row in df.iterrows():
        
        list_a = row[emotion_column1]
        list_b = row[emotion_column2]
        
        emo_list = []
        emo_list_a = []
        emo_list_b = []
        for i in range(len(list_a)):
            emotion_a = list_a[i]
            emotion_b = list_b[i]
        
            emotion_a_rank = sorted(emotion_a, key=operator.itemgetter('score'), reverse=True)
            emotion_b_rank = sorted(emotion_b, key=operator.itemgetter('score'), reverse=True)
        
            emo_list_a.append([emotion_a_rank[0],emotion_a_rank[1]])
            emo_list_b.append([emotion_b_rank[0],emotion_b_rank[1]])

            if emotion_b_rank[0]['label'] == emotion_a_rank[0]['label']: 
                emo_list.append(['match', [emotion_a_rank[0], emotion_b_rank[0]]])
            elif emotion_a_rank[0]['label'] == emotion_b_rank[1]['label']: 
                if emotion_b_rank[1]['score'] > 0.2: 
                    emo_list.append(['match', [emotion_a_rank[0], emotion_b_rank[1]]])
                else: 
                    emo_list.append(['second_match', [emotion_a_rank[0], emotion_b_rank[1]]])
            elif emotion_b_rank[0]['label'] == emotion_a_rank[1]['label']: 
                if emotion_a_rank[1]['score'] > 0.2: 
                    emo_list.append(['match', [emotion_a_rank[1], emotion_b_rank[0]]])
                else: 
                    emo_list.append(['second_match', [emotion_a_rank[1], emotion_b_rank[0]]])
            elif emotion_a_rank[1]['label'] == emotion_b_rank[1]['label']: 
                if emotion_a_rank[1]['score'] > 0.2 and emotion_b_rank[1]['score'] > 0.2: 
                    emo_list.append(['match', [emotion_a_rank[1], emotion_b_rank[1]]])
                else: 
                    emo_list.append(['second_match', [emotion_a_rank[1], emotion_b_rank[1]]])
            else: 
                emo_list.append(['no_match', [emotion_a_rank[0]['label'], emotion_a_rank[1]['label']], [emotion_b_rank[0]['label'], emotion_b_rank[1]['label']]])
        emo_list_a_all.append(emo_list_a)
        emo_list_b_all.append(emo_list_b)
        emo_list_all.append(emo_list)
    df[f'{emotion_column1}_sorted'] = emo_list_a_all
    df[f'{emotion_column2}_sorted'] = emo_list_b_all          
    df[f'{emotion_column1}_comparison'] = emo_list_all
    
    return df 


In [74]:
title_ratings = compare_sentiment(title_ratings, sentiment_column1='title_sentiment', sentiment_column2='title_emotion_bertweet')
title_ratings = compare_emotion(title_ratings, emotion_column1='title_emotion', emotion_column2='title_emotion_bert')
title_ratings

Unnamed: 0,index,title,title_sentiment,title_emotion_bertweet,title_emotion,title_emotion_bert,title_sentiment_comparison,title_emotion_sorted,title_emotion_bert_sorted,title_emotion_comarison
0,0,Spotify on Alexa,"{'label': 'POSITIVE', 'score': 0.987148106098175}","{'label': 'NEU', 'score': 0.8330541849136353}","[{'label': 'anger', 'score': 0.005384087096899...","[{'label': 'sadness', 'score': 0.0236624572426...",POS-NEU,"[{'label': 'neutral', 'score': 0.8934263586997...","[{'label': 'joy', 'score': 0.8973285555839539}...","[no_match, [neutral, surprise], [joy, anger]]"
1,1,Can Echo Show automatically connect to iPhone ...,"{'label': 'NEGATIVE', 'score': 0.9968199729919...","{'label': 'POS', 'score': 0.599865734577179}","[{'label': 'anger', 'score': 0.012488813139498...","[{'label': 'sadness', 'score': 0.0076527502387...",NEG-POS,"[{'label': 'neutral', 'score': 0.7688468098640...","[{'label': 'joy', 'score': 0.896157443523407},...","[no_match, [neutral, surprise], [joy, fear]]"
2,2,I want to bring my smart bulb on holiday. Do I...,"{'label': 'NEGATIVE', 'score': 0.9632630348205...","{'label': 'NEU', 'score': 0.8540923595428467}","[{'label': 'anger', 'score': 0.016276195645332...","[{'label': 'sadness', 'score': 0.0004231125058...",NEG-NEU,"[{'label': 'sadness', 'score': 0.4714848101139...","[{'label': 'joy', 'score': 0.997992992401123},...","[no_match, [sadness, neutral], [joy, anger]]"
3,3,I don't get it where on the decade of ai so wh...,"{'label': 'NEGATIVE', 'score': 0.9958089590072...","{'label': 'NEU', 'score': 0.7140888571739197}","[{'label': 'anger', 'score': 0.011244812980294...","[{'label': 'sadness', 'score': 0.0052124224603...",NEG-NEU,"[{'label': 'surprise', 'score': 0.878579437732...","[{'label': 'joy', 'score': 0.9873307347297668}...","[no_match, [surprise, neutral], [joy, sadness]]"
4,4,Is there a way to make calls between two Echoe...,"{'label': 'NEGATIVE', 'score': 0.9950237274169...","{'label': 'NEU', 'score': 0.9725497364997864}","[{'label': 'anger', 'score': 0.008503718301653...","[{'label': 'sadness', 'score': 0.0275614112615...",NEG-NEU,"[{'label': 'neutral', 'score': 0.8794296979904...","[{'label': 'anger', 'score': 0.786774158477783...","[no_match, [neutral, surprise], [anger, fear]]"
...,...,...,...,...,...,...,...,...,...,...
5806,5806,She keeps going off in the middle of the night...,"{'label': 'NEGATIVE', 'score': 0.9994957447052...","{'label': 'NEU', 'score': 0.6331915855407715}","[{'label': 'anger', 'score': 0.094623029232025...","[{'label': 'sadness', 'score': 0.0036189923994...",NEG-NEU,"[{'label': 'surprise', 'score': 0.567658662796...","[{'label': 'fear', 'score': 0.7064389586448669...","[no_match, [surprise, neutral], [fear, anger]]"
5807,5807,Is the entire Alexa system falling apart? (Any...,"{'label': 'NEGATIVE', 'score': 0.9994725584983...","{'label': 'NEG', 'score': 0.798569917678833}","[{'label': 'anger', 'score': 0.088975846767425...","[{'label': 'sadness', 'score': 0.4629723429679...",NEG,"[{'label': 'surprise', 'score': 0.442749947309...","[{'label': 'sadness', 'score': 0.4629723429679...","[no_match, [surprise, neutral], [sadness, anger]]"
5808,5808,Is it possible to revert the Alexa app (not th...,"{'label': 'NEGATIVE', 'score': 0.996651828289032}","{'label': 'NEU', 'score': 0.9657372236251831}","[{'label': 'anger', 'score': 0.012898885644972...","[{'label': 'sadness', 'score': 0.0230021029710...",NEG-NEU,"[{'label': 'neutral', 'score': 0.8982800245285...","[{'label': 'joy', 'score': 0.7190351486206055}...","[no_match, [neutral, surprise], [joy, fear]]"
5809,5809,Just got a “pop-up” advertisement notification...,"{'label': 'POSITIVE', 'score': 0.9955065250396...","{'label': 'NEU', 'score': 0.8244529366493225}","[{'label': 'anger', 'score': 0.010887555778026...","[{'label': 'sadness', 'score': 0.0053306692279...",POS-NEU,"[{'label': 'neutral', 'score': 0.8453476428985...","[{'label': 'joy', 'score': 0.7792708277702332}...","[no_match, [neutral, surprise], [joy, fear]]"


In [75]:
body_ratings = compare_sentiment(body_ratings, sentiment_column1='body_filtered_sentiment', sentiment_column2='body_filtered_sentiment_cardiff')
body_ratings = compare_emotion(body_ratings, emotion_column1='body_filtered_emotion', emotion_column2='body_filtered_emotion_bert')
body_ratings

Unnamed: 0,index,body,body_filtered,body_filtered_sentiment,body_filtered_sentiment_cardiff,body_filtered_emotion,body_filtered_emotion_bert,body_filtered_sentiment_comparison,body_filtered_emotion_sorted,body_filtered_emotion_bert_sorted,body_filtered_emotion_comarison
0,0,Amazon should add an option to just view the s...,Amazon should add an option to just view the s...,"{'label': 'POSITIVE', 'score': 0.9841079711914...","{'label': 'neutral', 'score': 0.5693414807319641}","[{'label': 'anger', 'score': 0.014921673573553...","[{'label': 'sadness', 'score': 0.0083957826718...",POS-NEU,"[{'label': 'neutral', 'score': 0.9365915060043...","[{'label': 'joy', 'score': 0.8664062023162842}...","[second_match, [{'label': 'anger', 'score': 0...."
1,1,"Thinking of getting Alexa device, can the echo...","Thinking of getting Alexa device, can the echo...","{'label': 'POSITIVE', 'score': 0.9968852400779...","{'label': 'neutral', 'score': 0.6567432880401611}","[{'label': 'anger', 'score': 0.009094883687794...","[{'label': 'sadness', 'score': 0.0095788342878...",POS-NEU,"[{'label': 'neutral', 'score': 0.7449046969413...","[{'label': 'joy', 'score': 0.8240172863006592}...","[no_match, [neutral, surprise], [joy, fear]]"
2,2,Heading to visit the inlaws in another country...,Heading to visit the inlaws in another country...,"{'label': 'POSITIVE', 'score': 0.9955968260765...","{'label': 'neutral', 'score': 0.5459811687469482}","[{'label': 'anger', 'score': 0.010565615259110...","[{'label': 'sadness', 'score': 0.0015707103302...",POS-NEU,"[{'label': 'neutral', 'score': 0.7907505631446...","[{'label': 'joy', 'score': 0.9898409247398376}...","[no_match, [neutral, surprise], [joy, anger]]"
3,3,I mean chat gpt is becoming so innovative and ...,I mean chat gpt is becoming so innovative and ...,"{'label': 'NEGATIVE', 'score': 0.9915388226509...","{'label': 'negative', 'score': 0.7868428826332...","[{'label': 'anger', 'score': 0.016349205747246...","[{'label': 'sadness', 'score': 0.7200033068656...",NEG,"[{'label': 'surprise', 'score': 0.749803543090...","[{'label': 'sadness', 'score': 0.7200033068656...","[no_match, [surprise, neutral], [sadness, joy]]"
4,4,"I know that there's the Drop-In, but as far as...","I know that there is the Drop In, but as far a...","{'label': 'NEGATIVE', 'score': 0.9938561320304...","{'label': 'neutral', 'score': 0.7338858246803284}","[{'label': 'anger', 'score': 0.004683145787566...","[{'label': 'sadness', 'score': 0.0135180046781...",NEG-NEU,"[{'label': 'neutral', 'score': 0.8958482146263...","[{'label': 'anger', 'score': 0.611914396286010...","[no_match, [neutral, surprise], [anger, joy]]"
...,...,...,...,...,...,...,...,...,...,...,...
5806,5806,Occasionally I get this problem. She turns off...,Occasionally I get this problem. She turns off...,"{'label': 'NEGATIVE', 'score': 0.9994725584983...","{'label': 'negative', 'score': 0.7942632436752...","[{'label': 'anger', 'score': 0.179894521832466...","[{'label': 'sadness', 'score': 0.0450465418398...",NEG,"[{'label': 'surprise', 'score': 0.495322525501...","[{'label': 'fear', 'score': 0.6328150033950806...","[no_match, [surprise, neutral], [fear, anger]]"
5807,5807,"I'm nearing my wits end, especially with how i...","i am nearing my wits end, especially with how ...","{'label': 'NEGATIVE', 'score': 0.9995005130767...","{'label': 'negative', 'score': 0.5074052214622...","[{'label': 'anger', 'score': 0.085705302655696...","[{'label': 'sadness', 'score': 0.9497105479240...",NEG,"[{'label': 'surprise', 'score': 0.596627533435...","[{'label': 'sadness', 'score': 0.9497105479240...","[no_match, [surprise, neutral], [sadness, anger]]"
5808,5808,"In other words, I know how to reset my Echo (4...","In other words, I know how to reset my amazone...","{'label': 'POSITIVE', 'score': 0.9978129863739...","{'label': 'negative', 'score': 0.854820191860199}","[{'label': 'anger', 'score': 0.040406759828329...","[{'label': 'sadness', 'score': 0.2714154422283...",POS-NEG,"[{'label': 'surprise', 'score': 0.439128875732...","[{'label': 'anger', 'score': 0.718148887157440...","[no_match, [surprise, neutral], [anger, sadness]]"
5809,5809,Anyone else get these? First time here. If i...,Anyone else get these? First time here. If it ...,"{'label': 'NEGATIVE', 'score': 0.9993822574615...","{'label': 'negative', 'score': 0.9104802012443...","[{'label': 'anger', 'score': 0.761510372161865...","[{'label': 'sadness', 'score': 0.0208881143480...",NEG,"[{'label': 'anger', 'score': 0.761510372161865...","[{'label': 'joy', 'score': 0.44632869958877563...","[match, [{'label': 'anger', 'score': 0.7615103..."


In [34]:
comment_ratings = compare_sentiment_comments(comment_ratings, sentiment_column1='comments_list_filtered_sentiment', sentiment_column2='comments_list_filtered_sentiment_cardiff')
comment_ratings = compare_emotion_comments(comment_ratings, emotion_column1='comments_list_filtered_emotion', emotion_column2='comments_list_filtered_emotion_bert')
comment_ratings

Unnamed: 0,index,comments,comments_list_filtered,comments_list_filtered_sentiment,comments_list_filtered_sentiment_cardiff,comments_list_filtered_emotion,comments_list_filtered_emotion_bert,comments_list_filtered_sentiment_comparison,comments_list_filtered_emotion_sorted,comments_list_filtered_emotion_bert_sorted,comments_list_filtered_emotion_comarison
0,0,[],[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198...",[POS-NEU],"[[{'label': 'neutral', 'score': 0.804418623447...","[[{'label': 'anger', 'score': 0.50405991077423...","[[no_match, [neutral, sadness], [anger, fear]]]"
1,1,[],[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198...",[POS-NEU],"[[{'label': 'neutral', 'score': 0.804418623447...","[[{'label': 'anger', 'score': 0.50405991077423...","[[no_match, [neutral, sadness], [anger, fear]]]"
2,2,['CYPH3R_22: They have to be on the same netwo...,"['They have to be on the same network', 'I wou...","[{'label': 'NEGATIVE', 'score': 0.978289425373...","[{'label': 'neutral', 'score': 0.6682137250900...","[[{'label': 'anger', 'score': 0.01139766629785...","[[{'label': 'sadness', 'score': 0.160842120647...","[NEG-NEU, NEG-NEU, NEG]","[[{'label': 'neutral', 'score': 0.903585433959...","[[{'label': 'anger', 'score': 0.49706757068634...","[[no_match, [neutral, surprise], [anger, joy]]..."
3,3,['halcyon918: They are.\nAWS has launched [Be...,[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198...",[POS-NEU],"[[{'label': 'neutral', 'score': 0.804418623447...","[[{'label': 'anger', 'score': 0.50405991077423...","[[no_match, [neutral, sadness], [anger, fear]]]"
4,4,['NikkiRoxi: DROP IN is not just for use betwe...,['DROP IN is not just for use between the same...,"[{'label': 'POSITIVE', 'score': 0.997146189212...","[{'label': 'neutral', 'score': 0.5218997001647...","[[{'label': 'anger', 'score': 0.01492923498153...","[[{'label': 'sadness', 'score': 0.014556899666...","[POS-NEU, NEG-NEU, POS-NEU, NEG-NEU, NEG]","[[{'label': 'neutral', 'score': 0.943244040012...","[[{'label': 'joy', 'score': 0.8855955600738525...","[[no_match, [neutral, surprise], [joy, anger]]..."
...,...,...,...,...,...,...,...,...,...,...,...
5806,5806,['Riquende: Is it the Sleep Jar skill? The fre...,[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198...",[POS-NEU],"[[{'label': 'neutral', 'score': 0.804418623447...","[[{'label': 'anger', 'score': 0.50405991077423...","[[no_match, [neutral, sadness], [anger, fear]]]"
5807,5807,['Konarkanuck: I asked one of my devices to pu...,[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198...",[POS-NEU],"[[{'label': 'neutral', 'score': 0.804418623447...","[[{'label': 'anger', 'score': 0.50405991077423...","[[no_match, [neutral, sadness], [anger, fear]]]"
5808,5808,['josh4587: You can actually go the alexa.Amaz...,[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198...",[POS-NEU],"[[{'label': 'neutral', 'score': 0.804418623447...","[[{'label': 'anger', 'score': 0.50405991077423...","[[no_match, [neutral, sadness], [anger, fear]]]"
5809,5809,"[""ChunkyLaFunga: Alexa Settings > Notification...",[NaN],"[{'label': 'POSITIVE', 'score': 0.981534481048...","[{'label': 'neutral', 'score': 0.4124576449394...","[[{'label': 'anger', 'score': 0.02691297046840...","[[{'label': 'sadness', 'score': 0.105926208198...",[POS-NEU],"[[{'label': 'neutral', 'score': 0.804418623447...","[[{'label': 'anger', 'score': 0.50405991077423...","[[no_match, [neutral, sadness], [anger, fear]]]"


In [35]:
title_ratings.to_csv('./data/alexa_title_ratings_praw_23-May-2023.csv', sep='\t', index=False)
body_ratings.to_csv('./data/alexa_body_ratings_praw_23-May-2023.csv', sep='\t', index=False)
comment_ratings.to_csv('./data/alexa_comment_ratings_praw_23-May-2023.csv', sep='\t', index=False)