### Packages

In [1]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datasets import Dataset 
from functools import partial

import transformers
from transformers import BertTokenizer, Trainer, BertForSequenceClassification, TrainingArguments, BertConfig
from transformers import PreTrainedModel, PreTrainedTokenizer, PretrainedConfig, AdamW

import torch
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
from sklearn.metrics import auc, precision_recall_curve, average_precision_score, roc_curve, f1_score

  from .autonotebook import tqdm as notebook_tqdm
2024-04-18 02:08:42.838340: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-18 02:08:43.111604: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Import pretrained model

In [2]:
tuned_bert_model = BertForSequenceClassification.from_pretrained('BERT_fine_tuned_1', id2label={0: 'Neutral', 1: 'Positive', 2: 'Negative'})
senti_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [3]:
tuned_bert_model.config.id2label

{0: 'Neutral', 1: 'Positive', 2: 'Negative'}

### Load data

In [7]:
raw = pd.read_csv('tsla_merged_final.csv', encoding='utf-8', names=['Date', 'News_Headline'])
raw = raw.drop(raw.index[0])
raw.dropna(inplace=True)
raw

Unnamed: 0,Date,News_Headline
1,2020-12-31,Tesla short sellers lost billions more on the ...
2,2020-12-31,Enphase joins Tesla as the latest clean tech f...
3,2020-12-29,"In 2021, the breadth of the competitive attack..."
4,2020-12-29,"In 2021, the breadth of the competitive attack..."
5,2020-12-28,"In 2021, the breadth of the competitive attack..."
...,...,...
12908,2015-02-13,Apple has assigned 100s of staffers to an elec...
12909,2015-01-28,"Apple, Microsoft and Tesla shops are helping h..."
12910,2015-01-17,Elon Musk says Tesla won't be profitable until...
12911,2015-01-14,Elon Musk says Tesla won't be profitable until...


In [8]:
print(len(raw))

12912


In [10]:
from tqdm import tqdm

In [15]:
neutral = list()
positive = list()
negative = list()

for i in tqdm(range(len(raw)), desc="Processing"):
    inputs = senti_tokenizer(raw.iloc[i, 1], padding=True, truncation=True, max_length=512, return_tensors='pt')  # tokenize text to be sent to model
    outputs = tuned_bert_model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    neutral.append(predictions[0][0].tolist())
    positive.append(predictions[0][1].tolist())
    negative.append(predictions[0][2].tolist())

Processing: 100%|██████████| 12912/12912 [1:39:06<00:00,  2.17it/s]  


In [16]:
df_array = np.array(raw)
df_list = list(df_array[:, 0])
table = {'News_Headline':df_list, "Positive":positive, "Negative":negative, "Neutral":neutral}      
result_ft_BERT = pd.DataFrame(table, columns = ["News_Headline", "Positive", "Negative", "Neutral"])
result_ft_BERT['Prediction'] = result_ft_BERT[['Positive', 'Negative', 'Neutral']].idxmax(axis=1)
# result_ft_BERT['label'] = raw['label'].copy()
result_ft_BERT

Unnamed: 0,News_Headline,Positive,Negative,Neutral,Prediction
0,2020-12-31,0.000794,0.998949,0.000257,Negative
1,2020-12-31,0.000421,0.000207,0.999372,Neutral
2,2020-12-29,0.999180,0.000609,0.000211,Positive
3,2020-12-29,0.999180,0.000609,0.000211,Positive
4,2020-12-28,0.999180,0.000609,0.000211,Positive
...,...,...,...,...,...
12907,2015-02-13,0.000232,0.000263,0.999505,Neutral
12908,2015-01-28,0.000266,0.000278,0.999456,Neutral
12909,2015-01-17,0.000336,0.000867,0.998797,Neutral
12910,2015-01-14,0.000327,0.000743,0.998930,Neutral


In [17]:
neutral_count = (result_ft_BERT['Prediction'] == 'Neutral').sum()
print('Neutral的數量:', neutral_count)

Neutral的數量: 7532


### Save to csv

In [18]:
type(result_ft_BERT)

pandas.core.frame.DataFrame

In [19]:
result_ft_BERT.to_csv('news_title_sentimentscore.csv', index=False)