In [1]:
import pandas as pd
from glob import glob
import datetime
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [3]:
df=pd.read_csv('Data_original\CoinText_final.csv')
df=df.drop('Unnamed: 0',axis=1)
df.shape

(15566, 2)

In [6]:
df.head()

Unnamed: 0,CoinText,CoinDate
0,"Last week, there was a small uproar in the Web...",2022/01/19
1,Crypto asset manager Valkyrie Investments want...,2022/12/30
2,The mining industry started 2022 off strong wi...,2022/12/30
3,Investment giant BlackRock (BLK) has committed...,2022/12/30
4,"For many people, myself included, who were dra...",2022/12/30


## 情緒分數轉換

In [8]:
# 讀取檔案
files = glob('Data_original\CoinText_final.csv')

# 合併檔案
df = pd.concat((pd.read_csv(file, usecols=['CoinDate','CoinText'], dtype={ 'CoinDate': str, 'CoinText':str}) for file in files))
df.reset_index()

# 建立日期迴圈
start = datetime.datetime.strptime("2018/01/01", "%Y/%m/%d")
end = datetime.datetime.strptime("2022/12/31", "%Y/%m/%d")
date_generated = pd.date_range(start, end)


# 載入模型和tokenizer
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

#建立情緒分數清單
CoinDate_n=[]
ScorePositive=[]
ScoreNegative=[]
ScoreNeutral=[]

positive_score=0
negative_score=0
neutral_score=0
i=0

# 設定要分類的標籤
labels = ['positive', 'negative', 'neutral']



for context_date in date_generated:
    #篩選時間
    mask1 = (df["CoinDate"] == str(context_date.strftime("%Y/%m/%d")))
    if len(df[mask1])==0:      
        ScorePositive.append(ScorePositive[-1]*0.99)
        ScoreNegative.append(ScoreNegative[-1]*0.99)
        ScoreNeutral.append(ScoreNeutral[-1]*0.99)

        CoinDate_n.append(context_date.strftime("%Y/%m/%d"))
    else:
        for Context in df[mask1]['CoinText']:
            # 將文本轉換為tokens並加入特殊tokens
            inputs = tokenizer(str(Context), return_tensors='pt', padding=True, truncation=True)

            # 進行預測
            outputs = model(**inputs)
            predicted_scores = torch.softmax(outputs.logits, dim=1)
    
            

            positive_score+=float(predicted_scores[0][0])
            negative_score+=float(predicted_scores[0][1])
            neutral_score+=float(predicted_scores[0][2])


        CoinDate_n.append(context_date.strftime("%Y/%m/%d"))
        ScorePositive.append(positive_score/len(df[mask1]))
        ScoreNegative.append(negative_score/len(df[mask1]))
        ScoreNeutral.append(neutral_score/len(df[mask1]))
        positive_score=0
        negative_score=0
        neutral_score=0
    print(str(context_date))
    i+=1
    if ((i%100)==0) &(i>0):
        lo={'CoinDate':CoinDate_n,'ScorePositive':ScorePositive,'ScoreNegative':ScoreNegative,'ScoreNeutral':ScoreNeutral}
        dfz = pd.DataFrame(lo)
        dfz.to_csv('Data_score/CionScore_'+str(i)+'.csv')
        # print(str(i))


lo={'CoinDate':CoinDate_n,'ScorePositive':ScorePositive,'ScoreNegative':ScoreNegative,'ScoreNeutral':ScoreNeutral}
dfz = pd.DataFrame(lo)
dfz.to_csv('Data_score/CionScore_final.csv')

Downloading (…)okenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)lve/main/config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

2018-01-01 00:00:00
2018-01-02 00:00:00
2018-01-03 00:00:00
2018-01-04 00:00:00
2018-01-05 00:00:00
2018-01-06 00:00:00
2018-01-07 00:00:00
2018-01-08 00:00:00
2018-01-09 00:00:00
2018-01-10 00:00:00
2018-01-11 00:00:00
2018-01-12 00:00:00
2018-01-13 00:00:00
2018-01-14 00:00:00
2018-01-15 00:00:00
2018-01-16 00:00:00
2018-01-17 00:00:00
2018-01-18 00:00:00
2018-01-19 00:00:00
2018-01-20 00:00:00
2018-01-21 00:00:00
2018-01-22 00:00:00
2018-01-23 00:00:00
2018-01-24 00:00:00
2018-01-25 00:00:00
2018-01-26 00:00:00
2018-01-27 00:00:00
2018-01-28 00:00:00
2018-01-29 00:00:00
2018-01-30 00:00:00
2018-01-31 00:00:00
2018-02-01 00:00:00
2018-02-02 00:00:00
2018-02-03 00:00:00
2018-02-04 00:00:00
2018-02-05 00:00:00
2018-02-06 00:00:00
2018-02-07 00:00:00
2018-02-08 00:00:00
2018-02-09 00:00:00
2018-02-10 00:00:00
2018-02-11 00:00:00
2018-02-12 00:00:00
2018-02-13 00:00:00
2018-02-14 00:00:00
2018-02-15 00:00:00
2018-02-16 00:00:00
2018-02-17 00:00:00
2018-02-18 00:00:00
2018-02-19 00:00:00
