In [12]:
import pandas as pd
import nltk

# Load the NRC lexicon
def load_nrc_lexicon():
    lexicon = {}
    with open('NRC-Emotion-Lexicon-Wordlevel-v0.92.txt', 'r') as file:
        for line in file:
            if line.startswith('#'):
                continue
            word, emotion, score = line.strip().split('\t')
            if word not in lexicon:
                lexicon[word] = {}
            lexicon[word][emotion] = int(score)
    return lexicon

# with an input dataframe and the column name, output a dataframe with the appended sentiment scores
def perform_sentiment_analysis(df, text_column):
    # load the lexicon
    lexicon = load_nrc_lexicon()

    # create a placeholder list for the sentiment scores
    sentiment_scores = []
    
    # iterate through the tokens in the specified column
    for text in df[text_column]:
        # split into tokens
        tokens = nltk.word_tokenize(text)
        # convert to lowercase
        tokens = [token.lower() for token in tokens]
        # we can change the baseline for scores here
        positive_score = 0
        negative_score = 0

        # find the token in the positive or negative list
        for token in tokens:
            if token in lexicon:
                # increment the score for each match
                if 'positive' in lexicon[token]:
                    positive_score += lexicon[token]['positive']
                if 'negative' in lexicon[token]:
                    negative_score += lexicon[token]['negative']

        # append result to the sentiment list
        if positive_score > negative_score:
            sentiment_scores.append('Positive')
        elif positive_score < negative_score:
            sentiment_scores.append('Negative')
        else:
            sentiment_scores.append('Neutral')

    # append the sentiment column to the dataframe
    df['Sentiment'] = sentiment_scores
    return df

In [13]:
df = pd.read_csv("indeed_reviews_ca2.csv")

df = perform_sentiment_analysis(df, 'text')
print(df.head())

df.to_csv('indeed_reviews_ca_sentiment.csv')

                 id                         title  \
0  1glainscfj5ns800                   great place   
1  1gl83rlfbj5ns802             Fun place to work   
2  1gkvl3j0vjoq4802                Scooting along   
3  1gkon2j5cis9m801  Terrible company to work for   
4  1gk2693uollkn800                Flexible hours   

                                                text  rating  \
0  flexible and low key.  Drive your own car and ...       5   
1  Working for Uber has been a fun experience. I ...       4   
2  Good for rise and gig work. Unless of course y...       4   
3  Worked for UBER for 2 years and all they do is...       1   
4  You have to work your butt off in order to mak...       1   

                 reviewer                location  \
0                  Driver     California City, CA   
1        Uber Eats Driver  Los Angeles County, CA   
2  Director of Operations         Los Angeles, CA   
3          Driver-Partner         Los Angeles, CA   
4                  Driver      