In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Load the ABSA model and tokenizer
model_name = "yangheng/deberta-v3-base-absa-v1.1"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

In [6]:
import pandas as pd
df = pd.read_excel('combined_news.xlsx')
#drop empty data
df = df.dropna(subset=['Content'])
df = df.dropna(subset=['Title'])

In [7]:
# Create a reverse mapping dictionary to get related strings for a stock ticker
ticker_mapping = {
    "META": "Meta",
    "AAPL": "Apple",
    "GOOGL": "Google",
    "MSFT": "Microsoft",
    "TSM": "Taiwan Semiconductor Manufacturing",
    "BRK": "Berkshire Hathaway",
    "TSLA": "Tesla",
    "NVDA": "Nvidia",
    "AMZN": "Amazon",
    # Add more mappings as needed
}

# Function to get related strings for a stock ticker
def get_ticker_name(ticker, mapping):
    return mapping.get(ticker, [ticker])

In [9]:
import ast
import nltk
def compute_absa_sentiment(text, aspect, method="avg"):
    if method == "avg":
        try:
            chunks = nltk.sent_tokenize(text)
            #print(chunks,len(chunks))
            sentiments = []
            for chunk in chunks:
                result = classifier(chunk,  text_pair=aspect)
                # print(result)
                label = result[0]['label']
                score = result[0]['score']
                # 将标签转换为情感分数并添加到列表中
                if label == 'Positive':
                    sentiments.append(score*1)
                elif label == 'Neutral':
                    sentiments.append(0)
                elif label == 'Negative':
                    sentiments.append(score*-1)
            score = sum(sentiments) / len(sentiments)
            if score > 0:
                positive_values = [val for val in sentiments if val > 0]
                print(sum(positive_values) / len(positive_values))
                return  sum(positive_values) / len(positive_values)
            elif score < 0:
                negative_values = [val for val in sentiments if val < 0]
                return sum(negative_values) / len(negative_values)
            else:
                return 0
        except Exception as e:
            print(f"Error processing text: {e}")
        return 0.0
    if method == "max":
        try:
            chunks = nltk.sent_tokenize(text)
            #print(chunks,len(chunks))
            sentiments = []
            for chunk in chunks:
                result = classifier(chunk,  text_pair=aspect)
                # print(result)
                label = result[0]['label']
                score = result[0]['score']
                # 将标签转换为情感分数并添加到列表中
                if label == 'Positive':
                    sentiments.append(score*1)
                elif label == 'Neutral':
                    sentiments.append(0)
                elif label == 'Negative':
                    sentiments.append(score*-1)
            score = sum(sentiments) / len(sentiments)
            if score > 0:
                return max(sentiments)
            elif score < 0:
                return min(sentiments)
            else:
                return 0
        except Exception as e:
            print(f"Error processing text: {e}")
        return 0.0

In [10]:
for index, row in df.iterrows():
    tags_str = row['Tag']
    tags = ast.literal_eval(tags_str)
    for tag in tags:
        aspect = get_ticker_name(tag, ticker_mapping)
        column_content = f'{tag}_sentiment_content'
        column_title = f'{tag}_sentiment_title'
        df.loc[index, column_content] = compute_absa_sentiment(row['Content'], [aspect], method="avg")
        df.loc[index, column_title] = compute_absa_sentiment(row['Title'], [aspect], method="avg")
df

['Lucy Yu wasn’t sure if she had smoke in her lungs or was having an anxiety attack.', 'She needed fresh air.'] 2
['A fire left Lucy Yu’s literary hub in Chinatown gutted.', 'She was determined to rebuild it.'] 2
['Lucy Yu wasn’t sure if she had smoke in her lungs or was having an anxiety attack.', 'She needed fresh air.'] 2
['A fire left Lucy Yu’s literary hub in Chinatown gutted.', 'She was determined to rebuild it.'] 2
['When Frank Pine searched Google for a link to a news article two months ago, he encountered paragraphs generated by artificial intelligence about the topic at the top of his results.', 'To see what he wanted, he had to scroll past them.'] 2
['Since Google overhauled its search engine, publishers have tried to assess the danger to their brittle business models while calling for government intervention.'] 1
['When Frank Pine searched Google for a link to a news article two months ago, he encountered paragraphs generated by artificial intelligence about the topic at th

Unnamed: 0,Title,Tag,Created Date,url,Content,GOOGL_sentiment_content,GOOGL_sentiment_title,AMZN_sentiment_content,AMZN_sentiment_title,MSFT_sentiment_content,...,TSLA_sentiment_content,TSLA_sentiment_title,META_sentiment_content,META_sentiment_title,NVDA_sentiment_content,NVDA_sentiment_title,BRK_sentiment_content,BRK_sentiment_title,TSM_sentiment_content,TSM_sentiment_title
0,A fire left Lucy Yu’s literary hub in Chinatow...,"['GOOGL', 'AMZN']",2024-06-01T09:01:40+0000,https://www.nytimes.com/2024/06/01/business/yu...,Lucy Yu wasn’t sure if she had smoke in her lu...,0.0,0.000000,0.0,0.0,,...,,,,,,,,,,
1,"Since Google overhauled its search engine, pub...","['GOOGL', 'MSFT']",2024-06-01T09:02:28+0000,https://www.nytimes.com/2024/06/01/technology/...,When Frank Pine searched Google for a link to ...,0.0,0.000000,,,0.0,...,,,,,,,,,,
2,Bhutanese Americans are the newest citizens of...,['AAPL'],2024-06-01T09:02:59+0000,https://www.nytimes.com/2024/06/01/us/asian-am...,When Dinesh Nepal first arrived in the United ...,,,,,,...,,,,,,,,,,
3,She co-starred in the Apple TV+ show with Coli...,['AAPL'],2024-06-01T09:03:39+0000,https://www.nytimes.com/2024/06/01/arts/televi...,When Kirby accepted the role of Ruby — a Bondi...,,,,,,...,,,,,,,,,,
4,"“The Boys,” “Orphan Black: Echoes,” “The Bear”...","['AMZN', 'AAPL']",2024-06-01T09:03:58+0000,https://www.nytimes.com/2024/06/01/arts/televi...,"Every month, streaming services add movies and...",,,0.0,0.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
890,"Hear triumphant tracks from John Williams, Whi...",['AAPL'],2024-07-30T18:00:02+0000,https://www.nytimes.com/2024/07/30/arts/music/...,"I just got out my thermometer to confirm and, ...",,,,,,...,,,,,,,,,,
891,The Amazon Labor Union has been divided over s...,['AMZN'],2024-07-30T19:47:43+0000,https://www.nytimes.com/2024/07/30/business/ec...,A dissident group has won control of the Amazo...,,,0.0,0.0,,...,,,,,,,,,,
892,"The tech giant’s revenue also grew 15 percent,...","['GOOGL', 'MSFT']",2024-07-30T20:51:51+0000,https://www.nytimes.com/2024/07/30/technology/...,Microsoft closed its first full fiscal year of...,0.0,0.805221,,,0.0,...,,,,,,,,,,
893,The city’s leaders are forged in a mix of weal...,['AAPL'],2024-07-30T22:29:22+0000,https://www.nytimes.com/2024/07/30/us/politics...,Good evening. The presidential race has become...,,,,,,...,,,,,,,,,,


In [11]:
df.to_csv('output/absa_avg.csv', index=False, encoding='utf-8-sig')

In [None]:
for index, row in df.iterrows():
    print(index)
    tags_str = row['Tag']
    tags = ast.literal_eval(tags_str)
    for tag in tags:
        aspect = get_ticker_name(tag, ticker_mapping)
        column_content = f'{tag}_sentiment_content'
        column_title = f'{tag}_sentiment_title'
        df.loc[index, column_content] = compute_absa_sentiment(row['Content'], [aspect], method="max")
        df.loc[index, column_title] = compute_absa_sentiment(row['Title'], [aspect], method="max")
df

In [13]:
df.to_csv('output/absa_max.csv', index=False, encoding='utf-8-sig')