In [None]:
import torch

# 定义文件路径
file_path = "/root/cjiang/info_flow/Context-Cite/input_level_decompose/subspace_decompose/language_probing/results/llama-3.1-8b-instruct_counterfact_results.pt"

# 加载数据
data_all = torch.load(file_path)

In [None]:
import torch
import numpy as np

# 初始化存储不同语言的embedding的字典
lang_embeddings = {
    'French': [],
    'German': [],
    'Italian': [],
    'Portuguese': [],
    'Spanish': [],
    'original': []
}

# 遍历所有数据，收集每种语言的embeddings
for data in data_all:
    for lang in lang_embeddings.keys():
        if lang in data and 'lang_embedding' in data[lang]:
            # 如果是tensor则转换为numpy
            if isinstance(data[lang]['lang_embedding'], torch.Tensor):
                embedding = data[lang]['lang_embedding'].numpy()
            else:
                embedding = data[lang]['lang_embedding']
            lang_embeddings[lang].append(embedding)

# 将列表转换为numpy数组
for lang in lang_embeddings:
    lang_embeddings[lang] = np.stack(lang_embeddings[lang])

# 打印每种语言的embedding形状
for lang, embeddings in lang_embeddings.items():
    print(f"{lang} embeddings shape: {embeddings.shape}")

In [None]:
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score

def plot_tsne_with_metrics(lang_embeddings, selected_languages=None):
    """
    对选定语言的embeddings进行TSNE可视化并计算聚类评估指标
    """
    if selected_languages is None:
        selected_languages = list(lang_embeddings.keys())
    
    for lang in selected_languages:
        if lang not in lang_embeddings:
            raise ValueError(f"Language {lang} not found in embeddings")
    
    all_embeddings = []
    labels = []
    colors = ['red', 'blue', 'green', 'purple', 'orange', 'brown']
    
    # 创建显示标签的映射
    display_names = {
        'original': 'English',  # 将'original'映射为'English'
        'French': 'French',
        'German': 'German',
        'Italian': 'Italian',
        'Portuguese': 'Portuguese',
        'Spanish': 'Spanish'
    }
    
    lang_to_color = dict(zip(selected_languages, colors[:len(selected_languages)]))
    
    # 准备数据和标签
    for lang in selected_languages:
        all_embeddings.append(lang_embeddings[lang])
        labels.extend([lang] * len(lang_embeddings[lang]))
    
    all_embeddings = np.vstack(all_embeddings)
    labels = np.array(labels)
    
    # 使用TSNE进行降维
    tsne = TSNE(n_components=2, random_state=42, perplexity=30)
    embeddings_2d = tsne.fit_transform(all_embeddings)
    
    # 计算评估指标
    from sklearn.preprocessing import LabelEncoder
    le = LabelEncoder()
    numeric_labels = le.fit_transform(labels)
    
    metrics = {
        'silhouette': silhouette_score(embeddings_2d, numeric_labels),
        'davies_bouldin': davies_bouldin_score(embeddings_2d, numeric_labels),
        'calinski_harabasz': calinski_harabasz_score(embeddings_2d, numeric_labels)
    }
    
    # 设置图片大小和字体大小
    plt.figure(figsize=(6, 4))  # 减小图片大小
    plt.rcParams.update({'font.size': 12})  # 增加基础字体大小
    
    # 绘制散点图
    for lang in selected_languages:
        mask = labels == lang
        plt.scatter(
            embeddings_2d[mask, 0],
            embeddings_2d[mask, 1],
            c=lang_to_color[lang],
            label=display_names[lang],  # 使用显示名称
            alpha=0.6,
            s=30  # 增加点的大小
        )
    
    plt.legend(fontsize=14)  # 增加图例字体大小
    plt.title('TSNE visualization of language embeddings', fontsize=20)  # 增加标题字体大小
    plt.xlabel('TSNE dimension 1', fontsize=18)  # 增加x轴标签字体大小
    plt.ylabel('TSNE dimension 2', fontsize=18)  # 增加y轴标签字体大小
    
    # 调整布局以防止文字重叠
    plt.tight_layout()
    plt.show()
    
    # 打印评估指标
    print("Clustering Evaluation Metrics:")
    print(f"Silhouette Score: {metrics['silhouette']:.3f}")
    print(f"Davies-Bouldin Score: {metrics['davies_bouldin']:.3f}")
    print(f"Calinski-Harabasz Score: {metrics['calinski_harabasz']:.3f}")
    
    return metrics
# 使用示例：
selected_langs = ['French', 'German','original']
metrics = plot_tsne_with_metrics(lang_embeddings, selected_langs)

In [None]:
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

def plot_tsne(lang_embeddings, selected_languages=None):
    """
    对选定语言的embeddings进行TSNE可视化
    Args:
        lang_embeddings: 包含所有语言embeddings的字典
        selected_languages: 要可视化的语言列表，如果为None则使用所有语言
    """
    # 如果没有指定语言，使用所有语言
    if selected_languages is None:
        selected_languages = list(lang_embeddings.keys())
    
    # 检查选择的语言是否有效
    for lang in selected_languages:
        if lang not in lang_embeddings:
            raise ValueError(f"Language {lang} not found in embeddings")
    
    # 准备数据
    all_embeddings = []
    labels = []
    colors = ['red', 'blue', 'green', 'purple', 'orange', 'brown']
    lang_to_color = dict(zip(selected_languages, colors[:len(selected_languages)]))
    
    # 只处理选定的语言
    for lang in selected_languages:
        all_embeddings.append(lang_embeddings[lang])
        labels.extend([lang] * len(lang_embeddings[lang]))
    
    # 将所有embeddings拼接成一个大矩阵
    all_embeddings = np.vstack(all_embeddings)
    
    # 使用TSNE进行降维
    tsne = TSNE(n_components=2, random_state=42, perplexity=30)
    embeddings_2d = tsne.fit_transform(all_embeddings)
    
    # 绘制散点图
    plt.figure(figsize=(10, 8))
    for lang in selected_languages:
        mask = np.array(labels) == lang
        plt.scatter(
            embeddings_2d[mask, 0],
            embeddings_2d[mask, 1],
            c=lang_to_color[lang],
            label=lang,
            alpha=0.6
        )
    
    plt.legend()
    plt.title('TSNE visualization of selected language embeddings')
    plt.xlabel('TSNE dimension 1')
    plt.ylabel('TSNE dimension 2')
    plt.show()

# 使用示例：
# 选择特定语言进行可视化
selected_langs = ['French', 'German', 'Spanish']  # 可以根据需要选择语言
plot_tsne(lang_embeddings, selected_langs)

# 或者可视化所有语言
# plot_tsne(lang_embeddings)

In [None]:
from cite_functions import attr_state_manager
import cite_functions as cf
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
from transformers import AutoModelForCausalLM, AutoTokenizer
import plotly.io as pio
from plotly.subplots import make_subplots
from tuned_lens.plotting import PredictionTrajectory
from tuned_lens.nn.lenses import LogitLens
model_path = r"/root/models/llama_3_1_8b_instruct/"
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_path)

In [None]:
def get_state_predictions(model, tokenizer, state, k=5):
    # Get logits from combined attribute states
    logits = model.lm_head(state)
    traj_log_probs = torch.from_numpy(
        logits.log_softmax(dim=-1).squeeze().detach().cpu().numpy()
    )
    topk_indices = torch.topk(traj_log_probs, k=k)
    probs = torch.exp(traj_log_probs[topk_indices.indices])
    token_probs = []
    for idx, prob in zip(topk_indices.indices, probs):
        token = tokenizer.decode(idx)
        token_probs.append((idx.item(), token, prob.item()))
    
    return token_probs

In [None]:
get_state_predictions(model, tokenizer, data_all[4]['German']['lang_embedding'], k=5)

In [None]:
get_state_predictions(model, tokenizer, data_all[4]['German']['semantic_embedding'], k=5)

In [None]:
results = []
languages = ['original', 'French', 'German', 'Italian', 'Portuguese', 'Spanish']

for data in data_all:
    sample_result = {}
    for lang in languages:
        if lang in data:
            lang_result = {
                'prompt': data[lang]['prompt'],
                'answer': data[lang]['answer']
            }
            
            # 获取语言嵌入的预测
            if 'lang_embedding' in data[lang]:
                lang_preds = get_state_predictions(model, tokenizer, data[lang]['lang_embedding'], k=5)
                lang_result['lang_predictions'] = lang_preds
            
            # 获取语义嵌入的预测
            if 'semantic_embedding' in data[lang]:
                semantic_preds = get_state_predictions(model, tokenizer, data[lang]['semantic_embedding'], k=5)
                lang_result['semantic_predictions'] = semantic_preds
            
            sample_result[lang] = lang_result
    
    results.append(sample_result)

# 打印示例结果
print("Sample results for first data point:")
for lang in languages:
    if lang in results[0]:
        print(f"\n{lang}:")
        print(f"Prompt: {results[0][lang]['prompt']}")
        print(f"Answer: {results[0][lang]['answer']}")
        print("Language embedding predictions:")
        for token_id, token, prob in results[0][lang]['lang_predictions']:
            print(f"  {token}: {prob:.4f}")
        print("Semantic embedding predictions:")
        for token_id, token, prob in results[0][lang]['semantic_predictions']:
            print(f"  {token}: {prob:.4f}")

In [None]:
results[4]

In [None]:
import json

with open('predictions.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=4, ensure_ascii=False)

In [None]:
results[4]

In [None]:
data_all[4]