In [5]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax
from tqdm import tqdm
import json
def get_sentiment_scores(text):
    MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    config = AutoConfig.from_pretrained(MODEL)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL)

    def preprocess(text):
        new_text = []
        for t in text.split(" "):
            t = '@user' if t.startswith('@') and len(t) > 1 else t
            t = 'http' if t.startswith('http') else t
            new_text.append(t)
        return " ".join(new_text)

    text = preprocess(text)
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    results = []
    for i in range(scores.shape[0]):
        label = config.id2label[ranking[i]]
        score = scores[ranking[i]]
        results.append((label, np.round(float(score), 4)))
    sentiment_dict = {label: score for label, score in results}
    return sentiment_dict

def process_json_file(input_file, output_file):
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    for item in tqdm(data):
        post = item['Post']
        transferred_post = item['Transferred_Post']
        post_sentiment_scores = get_sentiment_scores(post)
        transferred_post_sentiment_scores = get_sentiment_scores(transferred_post)
        item['Post_Postive'] = post_sentiment_scores['positive']
        item['Post_Negative'] = post_sentiment_scores['negative']
        item['Post_Neutral'] = post_sentiment_scores['neutral']
        item['Transferred_Post_Postive'] = transferred_post_sentiment_scores['positive']
        item['Transferred_Post_Negative'] = transferred_post_sentiment_scores['negative']
        item['Transferred_Post_Neutral'] = transferred_post_sentiment_scores['neutral']

    with open(output_file, 'w') as f:
        json.dump(data, f, indent=4)

input_file = "/home/qiang/projects/Digital_mental_health/Dataset/Refined_dataset/2_llama/Sentiment/SAS2UAS_merge.json"
output_file = "/home/qiang/projects/Digital_mental_health/Dataset/Refined_dataset/2_llama/Sentiment/origin/SAS2UAS_merge.json"
process_json_file(input_file, output_file)


  0%|          | 0/156 [00:00<?, ?it/s]Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are 

In [7]:
import json

def calculate_average_negative_scores(input_file):
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    post_negative_scores = []
    transferred_post_negative_scores = []
    
    for entry in data:
        post_negative_scores.append(entry["Post_Negative"])
        transferred_post_negative_scores.append(entry["Transferred_Post_Negative"])
    
    # 计算平均分数
    post_negative_average = sum(post_negative_scores) / len(post_negative_scores)
    transferred_post_negative_average = sum(transferred_post_negative_scores) / len(transferred_post_negative_scores)
    
    return post_negative_average, transferred_post_negative_average

# 使用示例
input_file = "/home/qiang/projects/Digital_mental_health/Dataset/Refined_dataset/2_llama/Sentiment/origin/SAS2UAS_merge.json"
post_negative_avg, transferred_post_negative_avg = calculate_average_negative_scores(input_file)
print("Average Post Negative Score:", post_negative_avg)
print("Average Transferred Post Negative Score:", transferred_post_negative_avg)


Average Post Negative Score: 0.8744852564102564
Average Transferred Post Negative Score: 0.7085243589743588


In [13]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax
from tqdm import tqdm
import json
def get_sentiment_scores(text):
    MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    config = AutoConfig.from_pretrained(MODEL)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL)

    def preprocess(text):
        new_text = []
        for t in text.split(" "):
            t = '@user' if t.startswith('@') and len(t) > 1 else t
            t = 'http' if t.startswith('http') else t
            new_text.append(t)
        return " ".join(new_text)

    text = preprocess(text)
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    results = []
    for i in range(scores.shape[0]):
        label = config.id2label[ranking[i]]
        score = scores[ranking[i]]
        results.append((label, np.round(float(score), 4)))
    sentiment_dict = {label: score for label, score in results}
    return sentiment_dict

def process_json_file(input_file, output_file):
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    for item in tqdm(data):
        llama7B = item['llama2:7B']
        llama13B = item['llama2:13B']
        llama70B = item['llama2:70B']
        gemma7B = item['gemma:7B']
        mistral = item['mistral']
        mixtral = item['mixtral:8x7b']
        


        llama7B_scores = get_sentiment_scores(llama7B)
        llama13B_scores = get_sentiment_scores(llama13B)
        llama70B_scores = get_sentiment_scores(llama70B)
        gemma7B_scores = get_sentiment_scores(gemma7B)
        mistral_scores = get_sentiment_scores(mistral)
        mixtral_scores = get_sentiment_scores(mixtral)
       
        item['llama7B_scores'] = llama7B_scores['negative']
        item['llama13B_scores'] = llama13B_scores['negative']
        item['llama70B_scores'] = llama70B_scores['negative']
        item['gemma7B_scores'] = gemma7B_scores['negative']
        item['mistral_scores'] = mistral_scores['negative']
        item['mixtral_scores'] = mixtral_scores['negative']

    with open(output_file, 'w') as f:
        json.dump(data, f, indent=4)

input_file = "/home/qiang/projects/Digital_mental_health/Dataset/Refined_dataset/2_llama/Sentiment/IAS2EAS_merge.json"
output_file = "/home/qiang/projects/Digital_mental_health/Dataset/Refined_dataset/2_llama/Sentiment/origin/IAS2EAS_merge_all.json"
process_json_file(input_file, output_file)


  0%|          | 0/103 [00:00<?, ?it/s]Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are 