In [1]:
import numpy as np
from torchmetrics.text.rouge import ROUGEScore
import pandas as pd
from transformers import BertTokenizer, BertModel
from torch.utils.data import Dataset, DataLoader, random_split
import torch
import json
from torch import nn
from torch.optim import Adam
from tqdm import tqdm, trange
from torch.utils.data import WeightedRandomSampler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score



sum0 = 0
sum1 = 0

DATA_PATH = './NN.json'
PATH_GLOVE_MODEL = '../data/glove.6B.100d.txt'
print("Loading Glove Model")
f = open(PATH_GLOVE_MODEL, 'r', errors='ignore')
GLOVE_MODEL = {}
for line in f:
    split_lines = line.split()
    word = split_lines[0]
    word_embedding = np.array([float(value) for value in split_lines[1:]])
    GLOVE_MODEL[word] = word_embedding
vocab = GLOVE_MODEL.keys()

Loading Glove Model


In [2]:
def generate_allocation_data(sum0, sum1):
    Data = {}
    with open(DATA_PATH, 'rt', encoding='utf-8') as f:
        for idx, line in enumerate(f):
            sample = json.loads(line.strip())
            Data[idx] = sample

    rougeScore = ROUGEScore()
    total_llm_score, total_nn_score = 0, 0
    total_llm_time_cost, total_nn_time_cost = 0, 0

    texts = []
    features = []
    labels = []
    llm_time_costs = []
    nn_time_costs = []
    llm_scores = []
    nn_scores = []
    for i in trange(len(Data)):
        text = Data[i]['input']
        ground_truth = Data[i]['ground_truth']
        llm_generation = Data[i]['generation']
        llm_time_cost = Data[i]['time']
        nn_generation = Data[i]['nn_generation']
        nn_time_cost = Data[i]['nn_time']
        feature = []
        for i in (llm_generation + nn_generation).split():
            if i in vocab:
                feature.append(GLOVE_MODEL[i])
        if len(feature) == 0:
            continue
        feature = np.sum(feature, axis=0)

        llm_score = rougeScore(llm_generation, ground_truth)['rougeL_fmeasure']
        nn_score = rougeScore(nn_generation, ground_truth)['rougeL_fmeasure']
        total_llm_score += llm_score
        total_nn_score += nn_score
        total_llm_time_cost += llm_time_cost
        total_nn_time_cost += nn_time_cost

        texts.append(llm_generation + nn_generation)
        features.append(feature)
        llm_time_costs.append(llm_time_cost)
        nn_time_costs.append(nn_time_cost)
        llm_scores.append(llm_score)
        nn_scores.append(nn_score)
        if llm_score > nn_score:
            sum0 += 1
            labels.append(0)
        else:
            sum1 += 1
            labels.append(1)

    print(total_llm_score, total_nn_score)
    print(total_llm_time_cost, total_nn_time_cost)
    print(sum0, sum1)

    allocation_df = pd.DataFrame({"Text": texts, "Feature": features, "Label": labels, "LLM_time_cost": llm_time_costs, "NN_time_cost": nn_time_costs, "LLM_score": llm_scores, "NN_score": nn_scores})

    return allocation_df

allocation_df = generate_allocation_data(sum0, sum1)


100%|██████████| 11301/11301 [07:27<00:00, 25.26it/s]


tensor(1866.3857) tensor(1476.1383)
15595.123625516891 4739.562722682953
7373 3928


In [5]:
seed = 67

for i in range(3):
    print(f"----------------------{i}----------------------")
    train_df, test_df = train_test_split(allocation_df, test_size=0.2, random_state=seed + i)
    X_train, X_test, y_train, y_test, llm_scores, nn_scores, llm_time_costs, nn_time_costs = train_df['Feature'].to_list(), test_df['Feature'].to_list(), train_df['Label'].to_list(), test_df['Label'].to_list(), test_df['LLM_score'].to_list(), test_df['NN_score'].to_list(), test_df['LLM_time_cost'].to_list(), test_df['NN_time_cost'].to_list()

    print(X_train)
    pca = PCA(n_components=50)
    X_train_pca = pca.fit_transform(X_train)

    # 将测试集应用于相同的 PCA 转换
    X_test_pca = pca.transform(X_test)

    # 实例化 SVM 分类器，并用 PCA 降维后的训练集进行拟合
    svm = SVC(C=0.1, kernel='rbf')
    svm.fit(X_train_pca, y_train)

    # 预测测试集
    y_pred = svm.predict(X_test_pca)

    # 计算准确度
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    total_score, total_time_cost = 0, 0
    for pred, llm_score, nn_score, llm_time_cost, nn_time_cost in zip(y_pred, llm_scores, nn_scores, llm_time_costs, nn_time_costs):
        if pred == 0:
            total_score += llm_score.item()
            total_time_cost += llm_time_cost
        else:
            total_score += nn_score.item()
            total_time_cost += nn_time_cost
    print("system:", total_score, total_time_cost)

    total_score, total_time_cost = 0, 0
    for truth, llm_score, nn_score, llm_time_cost, nn_time_cost in zip(y_test, llm_scores, nn_scores, llm_time_costs, nn_time_costs):
        if truth == 0:
            total_score += llm_score.item()
            total_time_cost += llm_time_cost
        else:
            total_score += nn_score.item()
            total_time_cost += nn_time_cost
    print("best:", total_score, total_time_cost)

    total_score, total_time_cost = 0, 0
    for llm_score, llm_time_cost in zip(llm_scores, llm_time_costs):
        total_score += llm_score.item()
        total_time_cost += llm_time_cost
    print("llm:", total_score, total_time_cost)

    total_score, total_time_cost = 0, 0
    for nn_score, nn_time_cost in zip(nn_scores, nn_time_costs):
        total_score += nn_score.item()
        total_time_cost += nn_time_cost
    print("nn:", total_score, total_time_cost)


----------------------67----------------------


IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Accuracy: 0.6824413976116762
system: 380.18348698402406 2933.126845598221
best: 408.664289386943 2134.284155368805
llm: 378.2113728513068 3307.418973684311
nn: 292.896293444559 950.8068039417267
----------------------68----------------------


IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Accuracy: 0.6629809818664307
system: 379.04171503707767 2870.627744436264
best: 409.2718108519912 2135.3852078914642
llm: 377.17198260873556 3085.3264033794403
nn: 296.98885338008404 940.2483470439911
----------------------69----------------------


IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Accuracy: 0.6638655462184874
system: 373.5245298621303 2976.4405405521393
best: 406.07065110094845 2082.459053993225
llm: 371.18498994680704 3385.6143465042114
nn: 295.1798997335136 936.712233543396
