In [234]:
import pandas as pd
import numpy as np
import transformers
from tqdm import tqdm
import pysentiment2 as ps

In [235]:
data = pd.read_csv('EarningCallData/output.csv')

In [236]:
model_name = 'ProsusAI/finbert'
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
model = transformers.AutoModelForSequenceClassification.from_pretrained(model_name)

test = " NVIDIA stock is going very bad, I am very sad"
tokenized = tokenizer(test, return_tensors='pt')
output = model(**tokenized)
# scores softmax
scores = output.logits.softmax(dim=1).detach().numpy()
scores[0]

array([0.01014265, 0.95804197, 0.03181531], dtype=float32)

In [237]:
def get_sentiment_sentence_bert(text):
    sentences = text.split('.')
    list_sentiment = []
    for sentence in sentences:
        tokenized = tokenizer(sentence, return_tensors='pt')
        output = model(**tokenized)
        scores = output.logits.softmax(dim=1).detach().numpy()
        list_sentiment.append(scores[0])
    return np.mean(list_sentiment, axis=0)

In [238]:
def get_sentiment_bert(data, col='text'):
    texts = data[col]
    positives = []
    negatives = []
    neutrals = []
    polarities = []
    for i in tqdm(range(len(data))):
        text = texts[i]
        positive, negative, neutral = get_sentiment_sentence_bert(text)
        positives.append(positive)
        negatives.append(negative)
        neutrals.append(neutral)
        polarity = (positive - negative) / (positive + negative + neutral)
        polarities.append(polarity)

    
    # new column for sentiment
    data['positive_sentiment_bert'] = positives
    data['negative_sentiment_bert'] = negatives
    data['neutral_sentiment_bert'] = neutrals
    data['polarity_bert'] = polarities
    return data

In [239]:
get_sentiment_sentence_bert('''Good day, and welcome to the Apple Inc. Second Quarter Fiscal Year 2019 Earnings Conference Call. Today's call is being recorded. At this time, for opening remarks and introductions, I would like to turn the call over to Nancy Paxton, Senior Director of Investor Relations. Please go ahead.''')

array([0.11321773, 0.06799968, 0.81878257], dtype=float32)

In [240]:
get_sentiment_sentence_bert('''Thank you. Good afternoon, and thanks to everyone for joining us today. Speaking first is Apple's CEO, Tim Cook; and he'll be followed by CFO, Luca Maestri. After that, we'll open the call to questions from analysts.
Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including without limitation, those regarding revenue, gross margin, operating expenses, other income and expense, taxes, capital allocation and future business outlook. Actual results or trends could differ materially from our forecast. For more information, please refer to the risk factors discussed in Apple's most recently filed periodic reports on Form 10-K and Form 10-Q and the Form 8-K filed with the SEC today along with the associated press release. Apple assumes no obligation to update any forward-looking statements or information, which speak as of their respective dates.
I'd now like to turn the call over to Tim for introductory remarks.''')

array([0.15933189, 0.0639887 , 0.77667946], dtype=float32)

In [241]:
words = ['margin', 'cost', 'revenue', 'earnings', 'growth', 'debt', 'dividend', 'cashflow']

def get_sentiment_topic(data, words):
    texts = data['text'].apply(lambda x: x.lower())
    positives = {}
    negatives = {}
    neutrals = {}
    polarities = {}
    for word in words:
        positives[word] = []
        negatives[word] = []
        neutrals[word] = []
        polarities[word] = []
    for i in tqdm(range(len(data))):
        text = texts[i]
        for word in words:

            if word in text:
                positive, negative, neutral = get_sentiment_sentence_bert(text)
                polarity = (positive - negative) / (positive + negative + neutral)
            else:
                positive, negative, neutral = -1, -1, -1
                polarity = -1
            positives[word].append(positive)
            negatives[word].append(negative)
            neutrals[word].append(neutral)
            polarities[word].append(polarity)
    for word in words:
        data[f'positive_sentiment_bert_{word}'] = positives[word]
        data[f'negative_sentiment_bert_{word}'] = negatives[word]
        data[f'neutral_sentiment_bert_{word}'] = neutrals[word]
        data[f'polarity_bert_{word}'] = polarities[word]
    return data

In [242]:
data = get_sentiment_bert(data)
data = get_sentiment_topic(data, words)

100%|██████████| 3537/3537 [21:21<00:00,  2.76it/s]  
100%|██████████| 3537/3537 [41:39<00:00,  1.42it/s]  


In [243]:
data.to_csv('EarningCallData/output_sentiment.csv')

### Merging

In [256]:
data = pd.read_csv('EarningCallData/output_sentiment.csv')

In [257]:
data.columns

Index(['Unnamed: 0', 'speaker', 'text', 'type', 'speaker_type',
       'speaker_company', 'speaker_role', 'company_name', 'date',
       'positive_sentiment_bert', 'negative_sentiment_bert',
       'neutral_sentiment_bert', 'polarity_bert',
       'positive_sentiment_bert_margin', 'negative_sentiment_bert_margin',
       'neutral_sentiment_bert_margin', 'polarity_bert_margin',
       'positive_sentiment_bert_cost', 'negative_sentiment_bert_cost',
       'neutral_sentiment_bert_cost', 'polarity_bert_cost',
       'positive_sentiment_bert_revenue', 'negative_sentiment_bert_revenue',
       'neutral_sentiment_bert_revenue', 'polarity_bert_revenue',
       'positive_sentiment_bert_earnings', 'negative_sentiment_bert_earnings',
       'neutral_sentiment_bert_earnings', 'polarity_bert_earnings',
       'positive_sentiment_bert_growth', 'negative_sentiment_bert_growth',
       'neutral_sentiment_bert_growth', 'polarity_bert_growth',
       'positive_sentiment_bert_debt', 'negative_sentiment_b

In [258]:
global_sentiment_cols = ['positive_sentiment_bert', 'negative_sentiment_bert', 'neutral_sentiment_bert', 'polarity_bert']

topic_sentiment_cols = [f'positive_sentiment_bert_{word}' for word in words] + [f'negative_sentiment_bert_{word}' for word in words] + [f'neutral_sentiment_bert_{word}' for word in words] + [f'polarity_bert_{word}' for word in words]

In [259]:
def mean_company_sentiment(col):
    return col[data['speaker_type'] == 'Corporate Participant'].mean()

def mean_analyst_sentiment(col):
    return col[data['speaker_type'] == 'Conference Participant'].mean()

def mean_presentation_sentiment(col):
    return col[data['type'] == 'presentation'].mean()

def mean_qa_sentiment(col):
    return col[data['type'] == 'qna'].mean()

def mean_topic_sentiment(col):
    if len(col[col != -1]) == 0:
        return -1
    return col[col != -1].mean()

In [248]:
mean_topic_sentiment(data['positive_sentiment_bert'])

0.24863737945066441

In [260]:
# for each transcript, average global sentiment, and average sentiment per section and per speaker

aggregations = dict()
for col in global_sentiment_cols:
    aggregations[col] = ['mean', 'std', mean_company_sentiment, mean_analyst_sentiment, mean_presentation_sentiment, mean_qa_sentiment]

for col in topic_sentiment_cols:
    aggregations[col] = [mean_topic_sentiment]

data = data.groupby(['company_name', 'date'])[global_sentiment_cols + topic_sentiment_cols].agg(
    aggregations,
)


In [261]:
data.columns = ['_'.join(col).strip() for col in data.columns.values]


In [262]:
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,positive_sentiment_bert_mean,positive_sentiment_bert_std,positive_sentiment_bert_mean_company_sentiment,positive_sentiment_bert_mean_analyst_sentiment,positive_sentiment_bert_mean_presentation_sentiment,positive_sentiment_bert_mean_qa_sentiment,negative_sentiment_bert_mean,negative_sentiment_bert_std,negative_sentiment_bert_mean_company_sentiment,negative_sentiment_bert_mean_analyst_sentiment,...,neutral_sentiment_bert_dividend_mean_topic_sentiment,neutral_sentiment_bert_cashflow_mean_topic_sentiment,polarity_bert_margin_mean_topic_sentiment,polarity_bert_cost_mean_topic_sentiment,polarity_bert_revenue_mean_topic_sentiment,polarity_bert_earnings_mean_topic_sentiment,polarity_bert_growth_mean_topic_sentiment,polarity_bert_debt_mean_topic_sentiment,polarity_bert_dividend_mean_topic_sentiment,polarity_bert_cashflow_mean_topic_sentiment
company_name,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAPL.OQ,2019-Apr-30,0.275219,0.146902,0.35008,0.223716,0.290696,0.27307,0.116967,0.151545,0.072156,0.188138,...,0.486832,-1,0.093242,0.114081,0.123101,0.278946,0.419899,0.381646,0.385235,-1
AAPL.OQ,2019-Jan-29,0.21379,0.123928,0.270128,0.144346,0.285989,0.20042,0.133582,0.130856,0.085612,0.222916,...,0.494469,-1,0.160213,0.114658,0.126732,0.192974,0.107132,0.338674,0.338674,-1
AAPL.OQ,2019-Jul-30,0.239877,0.132675,0.321407,0.151821,0.315715,0.229343,0.09551,0.09079,0.078204,0.110595,...,0.48404,-1,0.243646,0.341727,0.228,0.206572,0.292085,0.368815,0.368815,-1
AAPL.OQ,2019-Oct-30,0.25927,0.124101,0.300267,0.22418,0.325788,0.251351,0.083133,0.05719,0.071861,0.085602,...,0.460355,-1,0.223295,0.44294,0.22848,0.213371,0.229426,0.383407,0.383407,-1
AAPL.OQ,2020-Apr-30,0.265942,0.120974,0.308174,0.239274,0.294049,0.262339,0.10872,0.084167,0.104589,0.117725,...,0.424189,-1,0.057587,0.277642,0.274699,0.1755,0.362306,0.307184,0.307184,-1


In [263]:
data.to_csv('EarningCallData/output_sentiment_aggregated.csv')

# Topic modeling

In [174]:
output = pd.read_csv('EarningCallData/output.csv')

In [175]:
output.reset_index(inplace = True)
output.rename(columns = {'index': 'ID'}, inplace = True)
output.head()

Unnamed: 0,ID,speaker,text,type,speaker_type,speaker_company,speaker_role,company_name,date
0,0,Operator,"Good day, and welcome to the Apple Inc. Second...",presentation,Operator,,,AAPL.OQ,2019-Apr-30
1,1,Nancy Paxton,"Thank you. Good afternoon, and thanks to every...",presentation,Corporate Participant,Apple Inc.,Senior Director of IR and Treasury,AAPL.OQ,2019-Apr-30
2,2,Timothy D. Cook,"Thanks, Nancy. Good afternoon, and thanks to a...",presentation,Corporate Participant,Apple Inc.,CEO & Director,AAPL.OQ,2019-Apr-30
3,3,Luca Maestri,"Thank you, Tim. Good afternoon, everyone.\nRev...",presentation,Corporate Participant,Apple Inc.,CFO & Senior VP,AAPL.OQ,2019-Apr-30
4,4,Nancy Paxton,"Thank you, Luca. (Operator Instructions) Opera...",presentation,Corporate Participant,Apple Inc.,Senior Director of IR and Treasury,AAPL.OQ,2019-Apr-30


In [170]:
sentence_topics = pd.read_csv('EarningCallData/sentence_with_topics.csv')

In [171]:
len(sentence_topics)

29481

In [173]:
sentence_topics['Topic'].value_counts()

Topic
-1     14461
 0      2754
 1      2263
 2      1862
 3      1619
 4      1502
 5       933
 6       361
 7       313
 8       311
 9       281
 10      264
 11      226
 12      218
 13      215
 14      208
 15      193
 16      184
 17      183
 18      177
 19      172
 20      158
 21      138
 22      128
 23      125
 24      117
 25      115
Name: count, dtype: int64

In [178]:
sentence_topics = get_sentiment_bert(sentence_topics, col='Original_Sentence')

100%|██████████| 29481/29481 [29:00<00:00, 16.94it/s] 


In [188]:
sentence_topics.to_csv('EarningCallData/sentence_with_topics_sentiment.csv')
sentence_topics.head()

Unnamed: 0,ID,Sentence,Original_Sentence,Topic,Topic_Name,positive_sentiment_bert,negative_sentiment_bert,neutral_sentiment_bert,polarity_bert,20_positive,...,15_neutral,14_positive,14_negative,14_neutral,21_positive,21_negative,21_neutral,23_positive,23_negative,23_neutral
0,1,Thank you.,Thank you.,20,20_thank_you_contact_408,0.259784,0.119282,0.620935,0.140502,0.259784,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,1,"After that, we'll open the call to questions f...","After that, we'll open the call to questions f...",-1,-1_and_the_to_of,0.210997,0.116091,0.672913,0.094906,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,1,"Good [TIME], and thanks to everyone for joinin...","Good afternoon, and thanks to everyone for joi...",-1,-1_and_the_to_of,0.575572,0.115308,0.309121,0.460264,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,1,Please note that some of the information you'l...,Please note that some of the information you'l...,-1,-1_and_the_to_of,0.198082,0.117955,0.683963,0.080128,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,1,I'd now like to turn the call over to [PERSON]...,I'd now like to turn the call over to Tim for ...,-1,-1_and_the_to_of,0.210971,0.114845,0.674184,0.096126,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [None]:
sentence_topics = pd.read_csv('EarningCallData/sentence_with_topics_sentiment.csv')

In [201]:
topics = sentence_topics['Topic'].unique()

# Initialize new columns for each topic with default values -1
for topic in topics:
    sentence_topics[f'Topic_{topic}_positive'] = -1
    sentence_topics[f'Topic_{topic}_negative'] = -1
    sentence_topics[f'Topic_{topic}_neutral'] = -1

# Populate the columns based on the topic of the sentence
for index, row in sentence_topics.iterrows():
    topic = row['Topic']
    if topic != -1:
        sentence_topics.at[index, f'Topic_{topic}_positive'] = row['positive_sentiment_bert']
        sentence_topics.at[index, f'Topic_{topic}_negative'] = row['negative_sentiment_bert']
        sentence_topics.at[index, f'Topic_{topic}_neutral'] = row['neutral_sentiment_bert']


  sentence_topics.at[index, f'Topic_{topic}_positive'] = row['positive_sentiment_bert']
  sentence_topics.at[index, f'Topic_{topic}_negative'] = row['negative_sentiment_bert']
  sentence_topics.at[index, f'Topic_{topic}_neutral'] = row['neutral_sentiment_bert']
  sentence_topics.at[index, f'Topic_{topic}_positive'] = row['positive_sentiment_bert']
  sentence_topics.at[index, f'Topic_{topic}_negative'] = row['negative_sentiment_bert']
  sentence_topics.at[index, f'Topic_{topic}_neutral'] = row['neutral_sentiment_bert']
  sentence_topics.at[index, f'Topic_{topic}_positive'] = row['positive_sentiment_bert']
  sentence_topics.at[index, f'Topic_{topic}_negative'] = row['negative_sentiment_bert']
  sentence_topics.at[index, f'Topic_{topic}_neutral'] = row['neutral_sentiment_bert']
  sentence_topics.at[index, f'Topic_{topic}_positive'] = row['positive_sentiment_bert']
  sentence_topics.at[index, f'Topic_{topic}_negative'] = row['negative_sentiment_bert']
  sentence_topics.at[index, f'Topic_{t

In [202]:
sentence_topics.head()

Unnamed: 0,ID,Sentence,Original_Sentence,Topic,Topic_Name,positive_sentiment_bert,negative_sentiment_bert,neutral_sentiment_bert,polarity_bert,Topic_20_positive,...,Topic_15_neutral,Topic_14_positive,Topic_14_negative,Topic_14_neutral,Topic_21_positive,Topic_21_negative,Topic_21_neutral,Topic_23_positive,Topic_23_negative,Topic_23_neutral
0,1,Thank you.,Thank you.,20,20_thank_you_contact_408,0.259784,0.119282,0.620935,0.140502,0.259784,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,1,"After that, we'll open the call to questions f...","After that, we'll open the call to questions f...",-1,-1_and_the_to_of,0.210997,0.116091,0.672913,0.094906,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,1,"Good [TIME], and thanks to everyone for joinin...","Good afternoon, and thanks to everyone for joi...",-1,-1_and_the_to_of,0.575572,0.115308,0.309121,0.460264,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,1,Please note that some of the information you'l...,Please note that some of the information you'l...,-1,-1_and_the_to_of,0.198082,0.117955,0.683963,0.080128,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,1,I'd now like to turn the call over to [PERSON]...,I'd now like to turn the call over to Tim for ...,-1,-1_and_the_to_of,0.210971,0.114845,0.674184,0.096126,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [221]:
new_sentence_topics = sentence_topics.drop(['positive_sentiment_bert', 'negative_sentiment_bert', 'neutral_sentiment_bert', 'polarity_bert'], axis=1)
# drop rows with topic -1
new_sentence_topics = new_sentence_topics[new_sentence_topics['Topic'] != -1]

In [222]:
new_sentence_topics = pd.merge(new_sentence_topics, output, on='ID', how='left')

In [223]:
topics_columns = [f'Topic_{topic}_positive' for topic in topics] + [f'Topic_{topic}_negative' for topic in topics] + [f'Topic_{topic}_neutral' for topic in topics]

In [224]:
def mean_topic_sentiment(col):
    if len(col[col != -1]) == 0:
        return -1
    return col[col != -1].mean()

new_sentence_topics = new_sentence_topics.groupby(['company_name', 'date'])[topics_columns].agg(mean_topic_sentiment)

In [225]:
new_sentence_topics = new_sentence_topics.reset_index()

In [226]:
new_sentence_topics = new_sentence_topics.drop(['Topic_-1_positive', 'Topic_-1_negative', 'Topic_-1_neutral'], axis=1)

In [227]:
len(new_sentence_topics)

80

In [228]:
new_sentence_topics.to_csv('EarningCallData/sentence_with_topics_sentiment_aggregated.csv')

# Merge everything

In [264]:
sentiment1 = pd.read_csv('EarningCallData/output_sentiment_aggregated.csv')
sentiment2 = pd.read_csv('EarningCallData/sentence_with_topics_sentiment_aggregated.csv')

In [265]:
total = pd.merge(sentiment1, sentiment2, on=['company_name', 'date'], how='left')

In [266]:
total

Unnamed: 0,company_name,date,positive_sentiment_bert_mean,positive_sentiment_bert_std,positive_sentiment_bert_mean_company_sentiment,positive_sentiment_bert_mean_analyst_sentiment,positive_sentiment_bert_mean_presentation_sentiment,positive_sentiment_bert_mean_qa_sentiment,negative_sentiment_bert_mean,negative_sentiment_bert_std,...,Topic_18_neutral,Topic_17_neutral,Topic_11_neutral,Topic_19_neutral,Topic_16_neutral,Topic_24_neutral,Topic_15_neutral,Topic_14_neutral,Topic_21_neutral,Topic_23_neutral
0,AAPL.OQ,2019-Apr-30,0.275219,0.146902,0.350080,0.223716,0.290696,0.273070,0.116967,0.151545,...,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.0,-1.000000
1,AAPL.OQ,2019-Jan-29,0.213790,0.123928,0.270128,0.144346,0.285989,0.200420,0.133582,0.130856,...,0.603240,-1.000000,0.254838,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.0,-1.000000
2,AAPL.OQ,2019-Jul-30,0.239877,0.132675,0.321407,0.151821,0.315715,0.229343,0.095510,0.090790,...,0.506289,0.623594,0.249058,0.674872,-1.000000,-1.000000,-1.000000,-1.000000,-1.0,-1.000000
3,AAPL.OQ,2019-Oct-30,0.259270,0.124101,0.300267,0.224180,0.325788,0.251351,0.083133,0.057190,...,0.498152,-1.000000,-1.000000,-1.000000,0.664564,-1.000000,-1.000000,-1.000000,-1.0,-1.000000
4,AAPL.OQ,2020-Apr-30,0.265942,0.120974,0.308174,0.239274,0.294049,0.262339,0.108720,0.084167,...,-1.000000,-1.000000,-1.000000,0.357446,-1.000000,0.420143,0.572193,-1.000000,-1.0,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,NVDA.OQ,2022-May-25,0.226214,0.104665,0.257631,0.232044,0.230841,0.225817,0.111464,0.097230,...,-1.000000,0.631307,0.607124,0.392175,0.577913,0.341674,-1.000000,0.557476,-1.0,-1.000000
76,NVDA.OQ,2023-Aug-23,0.216853,0.101165,0.253289,0.194056,0.228734,0.215805,0.066790,0.040552,...,0.602606,0.609860,-1.000000,0.290254,0.553353,0.444915,-1.000000,0.567004,-1.0,-1.000000
77,NVDA.OQ,2023-May-24,0.212134,0.108861,0.236323,0.200305,0.212777,0.212079,0.069089,0.037818,...,0.514402,-1.000000,0.645941,0.395099,0.444133,0.653376,-1.000000,0.589666,-1.0,0.678692
78,NVDA.OQ,2023-Nov-21,0.202270,0.102265,0.230417,0.175290,0.236167,0.198763,0.075533,0.053616,...,0.434166,0.568382,-1.000000,0.487907,0.600183,-1.000000,-1.000000,0.605516,-1.0,-1.000000


In [267]:
total.to_csv('EarningCallData/total_sentiment.csv')

In [269]:
total.shape

(80, 137)

## TEST

In [82]:
# pysentiment analysis

# using HIV-4
def get_sentiment_text_hiv4(text):
    sentences = text.split('.')
    list_sentiment = []
    for sentence in sentences:
        hiv4 = ps.HIV4()
        tokens = hiv4.tokenize(sentence)
        score = list(hiv4.get_score(tokens).values())
        list_sentiment.append(score)
    return np.mean(list_sentiment, axis=0)
        

# using loughran mcdonald
def get_sentiment_text_lm(text):
    sentences = text.split('.')
    list_sentiment = []
    for sentence in sentences:
        lm = ps.LM()
        tokens = lm.tokenize(sentence)
        score = list(lm.get_score(tokens).values())
        list_sentiment.append(score)
    return np.mean(list_sentiment, axis=0)

In [83]:
# def get_sentiment_pysentiment(data):
#     texts = data['text']
#     positives_hiv4 = []
#     negatives_hiv4 = []
#     polarities_hiv4 = []
#     subjectivities_hiv4 = []
#     positives_lm = []
#     negatives_lm = []
#     polarities_lm = []
#     subjectivities_lm = []
#     for i in tqdm(range(len(data))):
#         text = texts[i]
#         positive_hiv4, negative_hiv4, polarity_hiv4, subjectivity_hiv4 = get_sentiment_text_hiv4(text)
#         positive_lm, negative_lm, polarity_lm, subjectivity_lm = get_sentiment_text_lm(text)
#         positives_hiv4.append(positive_hiv4)
#         negatives_hiv4.append(negative_hiv4)
#         polarities_hiv4.append(polarity_hiv4)
#         subjectivities_hiv4.append(subjectivity_hiv4)
#         positives_lm.append(positive_lm)
#         negatives_lm.append(negative_lm)
#         polarities_lm.append(polarity_lm)
#         subjectivities_lm.append(subjectivity_lm)
#     data['positive_sentiment_hiv4'] = positives_hiv4
#     data['negative_sentiment_hiv4'] = negatives_hiv4
#     data['polarity_hiv4'] = polarities_hiv4
#     data['subjectivity_hiv4'] = subjectivities_hiv4
#     data['positive_sentiment_lm'] = positives_lm
#     data['negative_sentiment_lm'] = negatives_lm
#     data['polarity_lm'] = polarities_lm
#     data['subjectivity_lm'] = subjectivities_lm
#     return data