In [None]:
import pandas as pd
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

## 读取数据并导入离线模型

In [None]:
dataframe = pd.read_csv('processed_2.csv')
model_name_or_path = "./local_model"
# 加载分词器和模型
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path)

## 函数定义

In [None]:
def get_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    negative_prob = probs[0][0].item()  # 假设第一个标签是负面情感
    return negative_prob
# 情感获取
def sentiment(list):
    negative_prob = []
    for item in list:
        if isinstance(item, str):
            negative = get_sentiment(item)
            negative_prob.append(negative)
        else:
            negative_prob.append(None)
    return negative_prob

# 标题文本处理
def process_title(data):
    titles = []
    for title in data:
        if isinstance(title, str):
            if title[:4] == "慈善募捐":
                titles.append(title[7:-7])
            else:
                titles.append(title)
        else:
            titles.append(title)
    return titles

# 项目介绍文本处理
def process_detail(data):
    patterns = [
        r"（[^（）]*?图[^（）]*?）",
        r"【[^【】]*?图[^【】]*?】",
        r"（[^（）]*?照片[^（）]*?）",
        r"【[^【】]*?照片[^【】]*?】",
    ]
    photo = []  # 照片数量
    detail_ = []  # 剔除这类信息后的文本
    details = []  # 最终返回的文本
    for detail in data:
        if isinstance(detail, str):
            match = []
            for pattern in patterns:
                match = match + re.findall(pattern, detail, flags=0)
                detail = re.sub(pattern, "", detail)
            photo.append(len(match))
            detail_.append(detail)
        else:
            detail_.append("")
            photo.append(0)
    for detail in detail_:
        if detail != "":
            text = "".join(detail.split(r"', '"))
            details.append(text)
        else:
            details.append(detail)
    return photo, details

## 数据处理

### 一起跑（后面的就不用跑了）

In [None]:
# Title
titles = process_title(dataframe['项目名称'].tolist())
TEmo = sentiment(titles)
dataframe['TEmo'] = TEmo

# Brief
briefs = dataframe['项目简介'].tolist()
BEmo = sentiment(briefs)
dataframe['BEmo'] = BEmo

# Detail
_, details = process_detail(dataframe['项目介绍'].tolist())
detail_short = [detail[:500] for detail in details]
DEmo = sentiment(detail_short)

dataframe['DEmo'] = DEmo

# 数据存到新的csv里面
dataframe.to_csv(r"precessed_3.csv", index=False)