# Sentiment Analysis

## Import statements

In [1]:
import pandas as pd
import re
from functools import reduce
from textblob import TextBlob

## Save excel files from data cleaning stage into DataFrames and name the timestamp column

In [2]:
nike_og = pd.read_excel('nike.xlsx')
starbucks_og = pd.read_excel('starbucks.xlsx')
target_og = pd.read_excel('target.xlsx')

nike_og = nike_og.rename(columns = {'Unnamed: 0': 'timestamp'})
starbucks_og = starbucks_og.rename(columns = {'Unnamed: 0': 'timestamp'})
target_og = target_og.rename(columns = {'Unnamed: 0': 'timestamp'})

## Define getPolarity function that uses the TextBlob library to perform sentiment analysis on the comments

In [3]:
def getPolarity(text):
   return TextBlob(text).sentiment.polarity

## Create new dataframes with the cleaned strings and then use textblob to assign each review a polarity score

In [4]:
nike_list = []
for x in nike_og['comments']:
    
    # changes all of the characters to lowercase
    x = str.lower(x)
    
    # removes all special characters
    x = re.sub(r'[^a-zA-Z0-9\s]+', '', x)
    
    # adds the modified column values to a list
    nike_list.append(x)

# adds the nike_list to a new dataframe
nike = pd.DataFrame(nike_list, columns =['comments'])

# adds the timestamps to the new dataframe
nike = nike.join(nike_og['timestamp'])

# performs sentiment analysis on the comments from each day
nike['polarity_score'] = nike['comments'].apply(getPolarity)

# rearrange column order
nike = nike[['timestamp', 'polarity_score','comments']]

# set timestamp as index
nike.set_index('timestamp')

Unnamed: 0_level_0,polarity_score,comments
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-10-25,0.450000,simplynahj hopefully a spring collection soon...
2023-10-21,0.600000,cxrson317 enek wong keweden ndango ndelik nen...
2023-10-23,0.641667,motazazaiza kwzzedsxrrscda sdwu f tnis feio...
2023-10-20,0.060000,viva cada momento da tua vida como se fosse ...
2023-10-19,0.404928,2x champion 2x finals mvp 2x league mvp gener...
...,...,...
2023-05-12,0.075758,idk about you guys but we need them buckle sho...
2023-05-09,0.149028,you cute hi nike love your shoes sos help om...
2023-07-27,0.403125,she cool snhe insnsuper amazing niceeeeeeee...
2023-07-25,0.311538,solid can u sell the shirts of all the players...


In [5]:
star_list = []
for x in starbucks_og['comments']:
    
    # changes all of the characters to lowercase
    x = str.lower(x)
    
    # removes all special characters
    x = re.sub(r'[^a-zA-Z0-9\s]+', '', x)
    
    # adds the modified column values to a list
    star_list.append(x)

# adds the star_list to a new dataframe
starbucks = pd.DataFrame(star_list, columns =['comments'])

# adds the timestamps to the new dataframe
starbucks = starbucks.join(starbucks_og['timestamp'])

# performs sentiment analysis on the comments from each day
starbucks['polarity_score'] = starbucks['comments'].apply(getPolarity)

# rearrange column order
starbucks = starbucks[['timestamp', 'polarity_score','comments']]

# set timestamp as index
starbucks.set_index('timestamp')

Unnamed: 0_level_0,polarity_score,comments
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-10-20,0.126531,byeeeee steelenutritiondietitian we love to h...
2023-10-23,0.600000,hey starbucks we can do you your coffee at ...
2023-10-24,0.364444,psl hailadams we hope you enjoyed every si...
2023-10-18,0.541667,que perfeio de caf alm de lindo deve ser muito...
2023-10-19,0.700000,allaha kr hi icmemisim starbucks l mejor caf ...
...,...,...
2023-04-26,0.182011,charging for no ice or light ice is crazy if i...
2023-04-12,0.066667,i needed bring back the smores frap can you m...
2023-09-08,0.246667,day 10 of asking for a melanie martinez drink ...
2023-08-30,0.370130,thats really nice of you day 9 of asking for ...


In [6]:
target_list = []
for x in target_og['comments']:
    
    # changes all of the characters to lowercase
    x = str.lower(x)
    
    #removes all special characters
    x = re.sub(r'[^a-zA-Z0-9\s]+', '', x)
    
    #adds the modified column values to a list
    target_list.append(x)

# adds the target_list to a new dataframe
target = pd.DataFrame(target_list, columns =['comments'])

# adds the timestamps to the new dataframe
target = target.join(target_og['timestamp'])

# performs sentiment analysis on the comments from each day
target['polarity_score'] = target['comments'].apply(getPolarity)

# rearrange column order
target = target[['timestamp', 'polarity_score','comments']]

#set timestamp as index
target.set_index('timestamp')

Unnamed: 0_level_0,polarity_score,comments
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-09-28,0.096996,target donated 10 million to blm do not use ta...
2023-10-24,0.379378,creative and loving it wowww vwebb84 this ha...
2023-10-21,0.137720,how yall not include aaliyahjay shes thee ba...
2023-10-18,0.325082,yes to every single look love to see it i w...
2023-10-17,0.231656,oh wow this is fun imfermoraless dang where is...
...,...,...
2023-05-19,0.223730,can we be friends can you give me free tns fir...
2023-05-18,0.186389,may i get a free pair please your my fav shoe...
2023-05-17,0.181212,sos help nike can you send me some shoes hey n...
2023-05-11,0.107060,you cute hi nike love your shoes sos help lo...
