### Packages

In [2]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datasets import Dataset 
from functools import partial

import transformers
from transformers import BertTokenizer, Trainer, BertForSequenceClassification, TrainingArguments, BertConfig
from transformers import PreTrainedModel, PreTrainedTokenizer, PretrainedConfig, AdamW

import torch
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
from sklearn.metrics import auc, precision_recall_curve, average_precision_score, roc_curve, f1_score

  from .autonotebook import tqdm as notebook_tqdm
2024-05-09 12:24:57.453306: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-09 12:24:57.494942: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Import pretrained model

In [3]:
tuned_bert_model = BertForSequenceClassification.from_pretrained('BERT_fine_tuned_1', id2label={0: 'Neutral', 1: 'Positive', 2: 'Negative'})
senti_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [4]:
tuned_bert_model.config.id2label

{0: 'Neutral', 1: 'Positive', 2: 'Negative'}

### Load data

In [5]:
raw = pd.read_csv('News_Titile.csv', encoding='utf-8', names=['Date', 'News_Headline'])
raw = raw.drop(raw.index[0])
raw.dropna(inplace=True)
raw

Unnamed: 0,Date,News_Headline
1,2024-04-29,Musk makes surprise visit to Beijing as Tesla’...
2,2024-04-28,How working for Big Tech lost ‘dream job’ status
3,2024-04-28,Elon Musk visits China as Tesla seeks self-dri...
4,2024-04-27,Nvidia jumps more than 15% this week. A key re...
5,2024-04-27,Federal regulator finds Tesla Autopilot has ‘c...
...,...,...
9414,2010-01-30,Electric Sportscar Maker Tesla Plans Public Of...
9415,2008-10-17,Tesla Says It Will Lay Off Employees and Delay...
9416,2008-10-16,Tesla Stalls--Layoffs And CEO Change Coming (U...
9417,2008-05-31,Get Ready for Tesla!


In [6]:
print(len(raw))

9418


In [7]:
from tqdm import tqdm

In [8]:
neutral = list()
positive = list()
negative = list()

for i in tqdm(range(len(raw)), desc="Processing"):
    inputs = senti_tokenizer(raw.iloc[i, 1], padding=True, truncation=True, max_length=512, return_tensors='pt')  # tokenize text to be sent to model
    outputs = tuned_bert_model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    neutral.append(predictions[0][0].tolist())
    positive.append(predictions[0][1].tolist())
    negative.append(predictions[0][2].tolist())

Processing: 100%|██████████| 9418/9418 [09:13<00:00, 17.02it/s]  


In [13]:
df_array = np.array(raw)
df_list = list(df_array[:, 0])
df_list1 = list(df_array[:, 1])
table = {'Date':df_list, 'News_Title':df_list1, "Positive":positive, "Negative":negative, "Neutral":neutral}      
result_ft_BERT = pd.DataFrame(table, columns = ["Date", "News_Title", "Positive", "Negative", "Neutral"])
result_ft_BERT['Prediction'] = result_ft_BERT[['Positive', 'Negative', 'Neutral']].idxmax(axis=1)
# result_ft_BERT['label'] = raw['label'].copy()
result_ft_BERT

Unnamed: 0,Date,News_Title,Positive,Negative,Neutral,Prediction
0,2024-04-29,Musk makes surprise visit to Beijing as Tesla’...,0.000791,0.000181,0.999028,Neutral
1,2024-04-28,How working for Big Tech lost ‘dream job’ status,0.000679,0.995537,0.003784,Negative
2,2024-04-28,Elon Musk visits China as Tesla seeks self-dri...,0.000325,0.000226,0.999449,Neutral
3,2024-04-27,Nvidia jumps more than 15% this week. A key re...,0.999188,0.000620,0.000192,Positive
4,2024-04-27,Federal regulator finds Tesla Autopilot has ‘c...,0.000554,0.999161,0.000285,Negative
...,...,...,...,...,...,...
9413,2010-01-30,Electric Sportscar Maker Tesla Plans Public Of...,0.000202,0.000218,0.999580,Neutral
9414,2008-10-17,Tesla Says It Will Lay Off Employees and Delay...,0.000335,0.990678,0.008987,Negative
9415,2008-10-16,Tesla Stalls--Layoffs And CEO Change Coming (U...,0.000364,0.998993,0.000643,Negative
9416,2008-05-31,Get Ready for Tesla!,0.000263,0.000243,0.999493,Neutral


In [14]:
neutral_count = (result_ft_BERT['Prediction'] == 'Neutral').sum()
print('Neutral的數量:', neutral_count)

Neutral的數量: 5546


In [5]:
# import pandas as pd
# raw = pd.read_csv('news_title_sentimentscore.csv', encoding='utf-8', names=['News_Headline	Positive','Negative','Neutral','Prediction'])
# filtered_df = raw[(raw['Prediction'] == 'Neutral') & (raw['Neutral'] < 0.75)]

### Save to csv

In [11]:
type(result_ft_BERT)

pandas.core.frame.DataFrame

In [15]:
result_ft_BERT.to_csv('News_Title.csv', index=False)