Sentiment analysis of sentences holding keywords from emotion and technology lists

In [2]:
import re
import os
import pandas as pd
from pathlib import Path
from tqdm.notebook import tqdm
tqdm.pandas()
from afinn import Afinn

In [None]:
# Load data
input_dir = Path.cwd() / '../data/csv_files' # input directory
df = pd.read_csv(input_dir /'text_data230826.csv', sep='|')


# text string input
work = 'La Cousine Bette'
text = df[df['title'] == work].iloc[0]['clean_text']


# split the text in sentences
def sent_tokenizer(text):
    tokenized_sentences = re.split('\.', text)
    tokenized_sentences = [s.lstrip() for s in tokenized_sentences]
    return tokenized_sentences

sentence_list = sent_tokenizer(text)


#### Read two lists of keywords - technology and emotion keyowrds ####

# path to keyword lists directory 
input_dir = Path.cwd() / '../data/key_word_lists' 


# get the tech words
key_word_file_name = 'technology_list.txt'
with open(input_dir / key_word_file_name, 'r', encoding='utf-8-sig') as file:
    tech_key_words = file.read().split('\n')
    
# get the emo words    
key_word_file_name = 'emotion_list.txt'
with open(input_dir / key_word_file_name, 'r', encoding='utf-8-sig') as file:
    emo_key_words = file.read().split('\n')
    
    
#####################
# add two word lists and run to get a sentiment score
####################
def sentiment_analysis(word_list1 = emo_key_words, word_list2 = tech_key_words):
    
    
    from_word_list_one = []
    from_word_list_two = []
    sentiment_scores = []
    sentences = []
    
    for word1 in word_list1:
        for sent in sentence_list:
            if word1 in sent:
                for word2 in word_list2:
                    if word2 in sent:
                        from_word_list_one.append(word1)
                        from_word_list_two.append(word2)
                        afinn = Afinn()
                        sent_score = afinn.score(str(sent))
                        sentiment_scores.append(sent_score)
                        sentences.append(sent)
    
    senti_dataframe = pd.DataFrame({'word1': from_word_list_one, 'word2': from_word_list_two, 
                                        'sentiment_score': sentiment_scores, 'sentence': sentences})
    
    return senti_dataframe

# add positive, negative, or neutral cetegory
def apply_sentiment_cat(row):
    if row < 0:
        return 'neg'
    elif row == 0:
        return 'neu'
    elif row > 0:
        return 'pos'
                
sentiment_dataframe = sentiment_analysis(emo_key_words, tech_key_words)
sentiment_dataframe['sentiment_cat'] = sentiment_dataframe['sentiment_score'].apply(lambda x : apply_sentiment_cat(x) )

sentiment_dataframe