##  ivector

In [None]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch.nn as nn
import json
import optuna
import torch.optim as optim



# 解析 ivector.txt 文件
def parse_ivector_file(filepath):
    data = []
    with open(filepath, 'r') as file:
        for line in file:
            parts = line.strip().split(maxsplit=1)
            name_age = parts[0].split('/')
            subject = name_age[0]
            features = list(map(float, parts[1].strip('[]').split()))
            data.append((subject, features))
    return pd.DataFrame(data, columns=['Subject', 'Features'])

# 读取 ivector 文件
ivector_df = parse_ivector_file('../generated_ivector.txt')
# ivector_df = parse_ivector_file('./ivector_feature/ivector.txt')
# 读取 new_kin_relationship.csv 文件并清洗数据
new_kin_relationships_file = pd.read_csv('./new_kin_relationships.csv')
new_kin_relationships_file.columns = ['Subject1', 'Subject2', 'Relationship1', 'Relationship2']
new_kin_relationships_file['Subject1'] = new_kin_relationships_file['Subject1'].str.strip()
new_kin_relationships_file['Subject2'] = new_kin_relationships_file['Subject2'].str.strip()

# 读取 genders.csv 文件并合并性别信息
genders_df = pd.read_csv('./new_genders.csv')
genders_df.columns = ['Subject', 'Gender']
genders_df['Subject'] = genders_df['Subject'].str.strip()

# 合并性别信息到ivector_df
ivector_df = ivector_df.merge(genders_df, on='Subject', how='inner')

# 合并数据
merged_df1 = new_kin_relationships_file.merge(ivector_df, left_on='Subject1', right_on='Subject', how='inner')
merged_df = merged_df1.merge(ivector_df, left_on='Subject2', right_on='Subject', how='inner', suffixes=('_1', '_2'))

# 打印合并后的数据
print(merged_df.head())
feature_lengths = merged_df['Features_1'].apply(len)
print("不同长度的特征数组数目：")
print(feature_lengths.value_counts())

# 按subject分割数据集
subjects = merged_df['Subject_1'].unique()
train_val_subjects, test_subjects = train_test_split(subjects, test_size=0.2, random_state=42)
train_subjects, val_subjects = train_test_split(train_val_subjects, test_size=0.125, random_state=42)
print(len(train_subjects),len(val_subjects),len(test_subjects))
train = merged_df[merged_df['Subject_1'].isin(train_subjects)]
val = merged_df[merged_df['Subject_1'].isin(val_subjects)]
test = merged_df[merged_df['Subject_1'].isin(test_subjects)]


# 标准化特征
scaler = StandardScaler()
train_features_1 = scaler.fit_transform(np.vstack(train['Features_1']))
train_features_2 = scaler.fit_transform(np.vstack(train['Features_2']))
val_features_1 = scaler.transform(np.vstack(val['Features_1']))
val_features_2 = scaler.transform(np.vstack(val['Features_2']))
test_features_1 = scaler.transform(np.vstack(test['Features_1']))
test_features_2 = scaler.transform(np.vstack(test['Features_2']))

class KinshipDataset(Dataset):
    def __init__(self, anchor_features, positive_features, relationships, ivector_df, scaler):
        self.anchor_features = anchor_features
        self.positive_features = positive_features
        self.relationships = relationships
        self.ivector_df = ivector_df
        self.scaler = scaler
        self.subjects = list(set(self.relationships['Subject1']).union(set(self.relationships['Subject2'])))
        self.subject_gender = dict(zip(self.ivector_df['Subject'], self.ivector_df['Gender']))

    def __len__(self):
        return len(self.anchor_features)

    def __getitem__(self, idx):
        anchor = self.anchor_features[idx]
        positive = self.positive_features[idx]
        anchor_subject = self.relationships.iloc[idx]['Subject1']
        positive_subject = self.relationships.iloc[idx]['Subject2']
        positive_gender = self.subject_gender[positive_subject]

        # 随机选择一个负样本，确保性别相同
        while True:
            negative_subject = np.random.choice(self.subjects)
            if (negative_subject != anchor_subject and 
                negative_subject != positive_subject and 
                self.subject_gender[negative_subject] == positive_gender):
                break
        
        negative_features = self.ivector_df[self.ivector_df['Subject'] == negative_subject]['Features'].values[0]
        negative = self.scaler.transform([negative_features])[0]

        return (
            torch.tensor(anchor, dtype=torch.float32), 
            torch.tensor(positive, dtype=torch.float32), 
            torch.tensor(negative, dtype=torch.float32),
            anchor_subject,
            positive_subject,
            negative_subject
        )

# 准备数据加载器
train_dataset = KinshipDataset(train_features_1, train_features_2, train, ivector_df, scaler)
val_dataset = KinshipDataset(val_features_1, val_features_2, val, ivector_df, scaler)
test_dataset = KinshipDataset(test_features_1, test_features_2, test, ivector_df, scaler)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

print(len(train_dataset),len(val_dataset),len(test_dataset))


## xvector

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 读取 xvector.txt 文件并解析数据
def parse_xvector_file(filepath):
    data = []
    with open(filepath, 'r') as file:
        for line in file:
            parts = line.strip().split(maxsplit=1)
            name = parts[0].rsplit('-', 1)[0]  # 确保格式一致
            features = parts[1].strip('[]').split()
            features = np.array([float(x) for x in features])
            data.append((name, features))
    return pd.DataFrame(data, columns=['Subject', 'Features'])

xvector_df = parse_xvector_file('../generated_xvector.txt')
# xvector_df = parse_xvector_file('./xvector_feature/xvector.txt')
# 读取 new_kin_relationship.csv 文件并清洗数据
new_kin_relationships_file = pd.read_csv('./new_kin_relationships.csv')
new_kin_relationships_file.columns = ['Subject1', 'Subject2', 'Relationship1', 'Relationship2']
new_kin_relationships_file['Subject1'] = new_kin_relationships_file['Subject1'].str.strip()
new_kin_relationships_file['Subject2'] = new_kin_relationships_file['Subject2'].str.strip()

# 读取 genders.csv 文件并合并性别信息
genders_df = pd.read_csv('./new_genders.csv')
genders_df.columns = ['Subject', 'Gender']
genders_df['Subject'] = genders_df['Subject'].str.strip()

# 合并性别信息到xvector_df
xvector_df = xvector_df.merge(genders_df, on='Subject', how='inner')

# 合并数据
merged_df1 = new_kin_relationships_file.merge(xvector_df, left_on='Subject1', right_on='Subject', how='inner')
merged_df = merged_df1.merge(xvector_df, left_on='Subject2', right_on='Subject', how='inner', suffixes=('_1', '_2'))

# 打印合并后的数据
print(merged_df.head())
feature_lengths = merged_df['Features_1'].apply(len)
print("不同长度的特征数组数目：")
print(feature_lengths.value_counts())

# 按subject分割数据集
subjects = merged_df['Subject_1'].unique()
train_val_subjects, test_subjects = train_test_split(subjects, test_size=0.2, random_state=42)
train_subjects, val_subjects = train_test_split(train_val_subjects, test_size=0.125, random_state=42)
print(len(train_subjects),len(val_subjects),len(test_subjects))
train = merged_df[merged_df['Subject_1'].isin(train_subjects)]
val = merged_df[merged_df['Subject_1'].isin(val_subjects)]
test = merged_df[merged_df['Subject_1'].isin(test_subjects)]


# 标准化特征
scaler = StandardScaler()
train_features_1 = scaler.fit_transform(np.vstack(train['Features_1']))
train_features_2 = scaler.fit_transform(np.vstack(train['Features_2']))
val_features_1 = scaler.transform(np.vstack(val['Features_1']))
val_features_2 = scaler.transform(np.vstack(val['Features_2']))
test_features_1 = scaler.transform(np.vstack(test['Features_1']))
test_features_2 = scaler.transform(np.vstack(test['Features_2']))

class KinshipDataset(Dataset):
    def __init__(self, anchor_features, positive_features, relationships, xvector_df, scaler):
        self.anchor_features = anchor_features
        self.positive_features = positive_features
        self.relationships = relationships
        self.xvector_df = xvector_df
        self.scaler = scaler
        self.subjects = list(set(self.relationships['Subject1']).union(set(self.relationships['Subject2'])))
        self.subject_gender = dict(zip(self.xvector_df['Subject'], self.xvector_df['Gender']))

    def __len__(self):
        return len(self.anchor_features)

    def __getitem__(self, idx):
        anchor = self.anchor_features[idx]
        positive = self.positive_features[idx]
        anchor_subject = self.relationships.iloc[idx]['Subject1']
        positive_subject = self.relationships.iloc[idx]['Subject2']
        positive_gender = self.subject_gender[positive_subject]

        # 随机选择一个负样本，确保性别相同
        while True:
            negative_subject = np.random.choice(self.subjects)
            if (negative_subject != anchor_subject and 
                negative_subject != positive_subject and 
                self.subject_gender[negative_subject] == positive_gender):
                break
        
        negative_features = self.xvector_df[self.xvector_df['Subject'] == negative_subject]['Features'].values[0]
        negative = self.scaler.transform([negative_features])[0]

        return (
            torch.tensor(anchor, dtype=torch.float32), 
            torch.tensor(positive, dtype=torch.float32), 
            torch.tensor(negative, dtype=torch.float32),
            anchor_subject,
            positive_subject,
            negative_subject
        )

# 准备数据加载器
train_dataset = KinshipDataset(train_features_1, train_features_2, train, xvector_df, scaler)
val_dataset = KinshipDataset(val_features_1, val_features_2, val, xvector_df, scaler)
test_dataset = KinshipDataset(test_features_1, test_features_2, test, xvector_df, scaler)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

print(len(train_dataset),len(val_dataset),len(test_dataset))



## wav2vec features

In [5]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

# 解析 resvector.txt 文件
def parse_resvector_file0(filepath):
    data = []
    with open(filepath, 'r') as file:
        for line in file:
            parts = line.strip().split(maxsplit=1)
            name_age = parts[0].split('/')
#             print(name_age)
            subject = name_age[0]
            age = int(name_age[1])
            features_str = parts[1].strip('[]').rstrip(',')
            features = list(map(float, features_str.split(', ')))
            data.append((subject, age, features))
    return pd.DataFrame(data, columns=['Subject', 'Age', 'Features'])
# 解析 resvector.txt 文件
def parse_resvector_file1(filepath):
    data = []
    with open(filepath, 'r') as file:
        for line in file:
            parts = line.strip().split(maxsplit=1)
            name_age = parts[0].split('/')
#             print(name_age)
            subject = name_age[0]
            age = int(name_age[1])
            features_str = parts[1].strip('[]').rstrip(',')
            features = list(map(float, features_str.split(', ')))
            data.append((subject, age, features))
    return pd.DataFrame(data, columns=['Subject', 'Age', 'Features'])
# 读取 ivector 文件
xvector_df = parse_resvector_file0('./generated_wav2vec_pretrained.txt')
# xvector_df = parse_resvector_file1('./wav2vec_pretrained.txt')

# 读取 new_kin_relationship.csv 文件并清洗数据
new_kin_relationships_file = pd.read_csv('./new_kin_relationships.csv')
new_kin_relationships_file.columns = ['Subject1', 'Subject2', 'Relationship1', 'Relationship2']
new_kin_relationships_file['Subject1'] = new_kin_relationships_file['Subject1'].str.strip()
new_kin_relationships_file['Subject2'] = new_kin_relationships_file['Subject2'].str.strip()

# 读取 genders.csv 文件并合并性别信息
genders_df = pd.read_csv('./new_genders.csv')
genders_df.columns = ['Subject', 'Gender']
genders_df['Subject'] = genders_df['Subject'].str.strip()

# 合并性别信息到xvector_df
xvector_df = xvector_df.merge(genders_df, on='Subject', how='inner')

# 合并数据
merged_df1 = new_kin_relationships_file.merge(xvector_df, left_on='Subject1', right_on='Subject', how='inner')
merged_df = merged_df1.merge(xvector_df, left_on='Subject2', right_on='Subject', how='inner', suffixes=('_1', '_2'))

# 打印合并后的数据
print(merged_df.head())
feature_lengths = merged_df['Features_1'].apply(len)
print("不同长度的特征数组数目：")
print(feature_lengths.value_counts())

# 按subject分割数据集
subjects = merged_df['Subject_1'].unique()
train_val_subjects, test_subjects = train_test_split(subjects, test_size=0.2, random_state=42)
train_subjects, val_subjects = train_test_split(train_val_subjects, test_size=0.125, random_state=42)
print(len(train_subjects),len(val_subjects),len(test_subjects))
train = merged_df[merged_df['Subject_1'].isin(train_subjects)]
val = merged_df[merged_df['Subject_1'].isin(val_subjects)]
test = merged_df[merged_df['Subject_1'].isin(test_subjects)]


# 标准化特征
scaler = StandardScaler()
train_features_1 = scaler.fit_transform(np.vstack(train['Features_1']))
train_features_2 = scaler.fit_transform(np.vstack(train['Features_2']))
val_features_1 = scaler.transform(np.vstack(val['Features_1']))
val_features_2 = scaler.transform(np.vstack(val['Features_2']))
test_features_1 = scaler.transform(np.vstack(test['Features_1']))
test_features_2 = scaler.transform(np.vstack(test['Features_2']))

class KinshipDataset(Dataset):
    def __init__(self, anchor_features, positive_features, relationships, xvector_df, scaler):
        self.anchor_features = anchor_features
        self.positive_features = positive_features
        self.relationships = relationships
        self.xvector_df = xvector_df
        self.scaler = scaler
        self.subjects = list(set(self.relationships['Subject1']).union(set(self.relationships['Subject2'])))
        self.subject_gender = dict(zip(self.xvector_df['Subject'], self.xvector_df['Gender']))

    def __len__(self):
        return len(self.anchor_features)

    def __getitem__(self, idx):
        anchor = self.anchor_features[idx]
        positive = self.positive_features[idx]
        anchor_subject = self.relationships.iloc[idx]['Subject1']
        positive_subject = self.relationships.iloc[idx]['Subject2']
        positive_gender = self.subject_gender[positive_subject]

        # 随机选择一个负样本，确保性别相同
        while True:
            negative_subject = np.random.choice(self.subjects)
            if (negative_subject != anchor_subject and 
                negative_subject != positive_subject and 
                self.subject_gender[negative_subject] == positive_gender):
                break
        
        negative_features = self.xvector_df[self.xvector_df['Subject'] == negative_subject]['Features'].values[0]
        negative = self.scaler.transform([negative_features])[0]

        return (
            torch.tensor(anchor, dtype=torch.float32), 
            torch.tensor(positive, dtype=torch.float32), 
            torch.tensor(negative, dtype=torch.float32),
            anchor_subject,
            positive_subject,
            negative_subject
        )

# 准备数据加载器
train_dataset = KinshipDataset(train_features_1, train_features_2, train, xvector_df, scaler)
val_dataset = KinshipDataset(val_features_1, val_features_2, val, xvector_df, scaler)
test_dataset = KinshipDataset(test_features_1, test_features_2, test, xvector_df, scaler)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

print(len(train_dataset),len(val_dataset),len(test_dataset))



    Subject1     Subject2 Relationship1 Relationship2  Subject_1  Age_1  \
0  Tom_Hanks  Colin_Hanks        Father           Son  Tom_Hanks     28   
1  Tom_Hanks  Colin_Hanks        Father           Son  Tom_Hanks     28   
2  Tom_Hanks  Colin_Hanks        Father           Son  Tom_Hanks     28   
3  Tom_Hanks  Colin_Hanks        Father           Son  Tom_Hanks     28   
4  Tom_Hanks  Colin_Hanks        Father           Son  Tom_Hanks     28   

                                          Features_1 Gender_1    Subject_2  \
0  [0.008313, 0.003505, -0.011092, -0.005062, -0....     Male  Colin_Hanks   
1  [0.008313, 0.003505, -0.011092, -0.005062, -0....     Male  Colin_Hanks   
2  [0.008313, 0.003505, -0.011092, -0.005062, -0....     Male  Colin_Hanks   
3  [0.008313, 0.003505, -0.011092, -0.005062, -0....     Male  Colin_Hanks   
4  [0.008313, 0.003505, -0.011092, -0.005062, -0....     Male  Colin_Hanks   

   Age_2                                         Features_2 Gender_2  
0     25 

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import StandardScaler



# old model
class TripletNet(nn.Module):
    def __init__(self, input_dim):
        super(TripletNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256,  bias=False)
        self.bn1 = nn.BatchNorm1d(256)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256, 128,  bias=False)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

    
#  new model   
# class TripletNet(nn.Module):
#     def __init__(self, input_dim):
#         super(TripletNet, self).__init__()
#         self.fc1 = nn.Linear(input_dim, 256)
#         self.bn1 = nn.BatchNorm1d(256)
#         self.dropout = nn.Dropout(0.5)  # 添加Dropout层
#         self.relu = nn.ReLU()
#         self.fc2 = nn.Linear(256, 128)

#     def forward(self, x):
#         x = self.fc1(x)
#         x = self.bn1(x)
#         x = self.dropout(x)  # 应用Dropout
#         x = self.relu(x)
#         x = self.fc2(x)
#         return x
    
# 定义三元组损失函数，调整正则化项的系数 lambda=1e-3
class TripletLoss(nn.Module):
    def __init__(self, margin=1, lambda_reg=1e-3):  # 增加正则化系数
        super(TripletLoss, self).__init__()
        self.margin = margin
        self.lambda_reg = lambda_reg

    def forward(self, anchor, positive, negative):
        pos_dist = torch.nn.functional.pairwise_distance(anchor, positive)
        neg_dist = torch.nn.functional.pairwise_distance(anchor, negative)
        triplet_loss = torch.relu(pos_dist - neg_dist + self.margin).mean()

        # 正则化项
        reg_term = (anchor.norm(2) + positive.norm(2) + negative.norm(2)).mean()
        loss = triplet_loss + self.lambda_reg * reg_term
        return loss

# 定义训练函数
def train_epoch(model, criterion, optimizer, data_loader):
    model.train()
    total_loss = 0.0
    for anchor, positive, negative, _, _, _ in data_loader:
        optimizer.zero_grad()
        anchor_out = model(anchor)
        positive_out = model(positive)
        negative_out = model(negative)
        loss = criterion(anchor_out, positive_out, negative_out)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(data_loader)
    return avg_loss

# 定义验证和测试时使用的函数来计算最佳阈值
def evaluate_best_threshold_by_accuracy(model, data_loader):
    model.eval()
    all_distances = []
    all_labels = []

    # 计算所有anchor-positive和anchor-negative样本之间的距离，并收集真实标签
    with torch.no_grad():
        for data in data_loader:
            anchor, positive, negative = data[:3]
            anchor_out = model(anchor)
            positive_out = model(positive)
            negative_out = model(negative)
            
            pos_dist = torch.nn.functional.pairwise_distance(anchor_out, positive_out)
            neg_dist = torch.nn.functional.pairwise_distance(anchor_out, negative_out)
            
            all_distances.extend(pos_dist.cpu().numpy())  # 正样本对距离
            all_labels.extend([1] * len(pos_dist))  # 正样本对标签
            
            all_distances.extend(neg_dist.cpu().numpy())  # 负样本对距离
            all_labels.extend([0] * len(neg_dist))  # 负样本对标签

    # 将正负样本对距离结合在一起，并获取最小和最大距离
    all_distances = np.array(all_distances)
    all_labels = np.array(all_labels)
    min_distance = np.min(all_distances)
    max_distance = np.max(all_distances)

    # 设置阈值范围，通常设置为从最小距离到最大距离的范围
    thresholds = np.linspace(min_distance, max_distance, 100)

    best_accuracy = 0
    best_threshold =0

    # 遍历所有阈值，计算在该阈值下的准确率
    for threshold in thresholds:
        # 对于正样本，距离小于阈值的应该分类为正样本 (1)
        # 对于负样本，距离大于阈值的应该分类为负样本 (0)
        predictions = (all_distances < threshold).astype(int)
        accuracy = np.mean(predictions == all_labels)

        # 找到准确率最高的阈值
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_threshold = threshold

    print(f"Best Threshold: {best_threshold}, Best Accuracy: {best_accuracy * 100:.2f}%")
    return best_threshold

# 定义测试阶段使用的函数
def evaluate_with_threshold(model, data_loader, threshold):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in data_loader:
            anchor, positive, negative = data[:3]
            anchor_out = model(anchor)
            positive_out = model(positive)
            negative_out = model(negative)

            pos_dist = torch.nn.functional.pairwise_distance(anchor_out, positive_out)
            neg_dist = torch.nn.functional.pairwise_distance(anchor_out, negative_out)

            # 正样本 (anchor-positive) 距离小于阈值则为正确
            correct += (pos_dist < threshold).sum().item()
            # 负样本 (anchor-negative) 距离大于阈值则为正确
            correct += (neg_dist >= threshold).sum().item()
            total += 2 * anchor.size(0)  # 每个batch里有2倍的样本（正负样本对）

    accuracy = correct / total
    print(f"Test Accuracy with threshold {threshold}: {accuracy * 100:.2f}%")
    return accuracy

# 初始化模型、损失函数和优化器
model = TripletNet(input_dim=train_features_1.shape[1])
criterion = TripletLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) 

# 训练
num_epochs = 5
for epoch in range(num_epochs):
    train_loss = train_epoch(model, criterion, optimizer, train_loader)
    val_loss = train_epoch(model, criterion, optimizer, test_loader)
    
    print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')


best_threshold = evaluate_best_threshold_by_accuracy(model, test_loader)

# 在测试集上使用最佳阈值进行评估
test_accuracy = evaluate_with_threshold(model, val_loader, best_threshold)


Epoch 1, Train Loss: 0.0819, Validation Loss: 0.0372
Best Threshold: 1.080179699168487, Best Accuracy: 71.49%
Test Accuracy with threshold 1.080179699168487: 68.39%


In [None]:
def evaluate_with_threshold_by_relationship(model, data_loader, threshold):
    model.eval()

    # 初始化每种亲属关系的计数和准确率
    relationship_accuracies = {
        'BB': ['Brother', 'Brother'],
        'SS': ['Sister', 'Sister'],
        'BS': ['Brother', 'Sister'],
        'FD': ['Father', 'Daughter'],
        'FS': ['Father', 'Son'],
        'MD': ['Mother', 'Daughter'],
        'MS': ['Mother', 'Son']
    }

    # 用于存储每种亲属关系的统计信息
    relationship_stats = {key: {'correct_pos': 0, 'correct_neg': 0, 'total': 0} for key in relationship_accuracies.keys()}

    correct_pos_total = 0
    correct_neg_total = 0
    total_pos_neg = 0

    with torch.no_grad():
        for data in data_loader:
            anchor, positive, negative, anchor_subject, positive_subject, negative_subject = data

            anchor_out = model(anchor)
            positive_out = model(positive)
            negative_out = model(negative)

            # 计算正对和负对的距离
            pos_dist = torch.nn.functional.pairwise_distance(anchor_out, positive_out)
            neg_dist = torch.nn.functional.pairwise_distance(anchor_out, negative_out)

            # 遍历每个三元组，找到对应的亲属关系
            for idx in range(len(anchor_subject)):
                # 查找每对 anchor 和 positive 的亲属关系
                relationship_df = merged_df[
                    (merged_df['Subject1'] == anchor_subject[idx]) &
                    (merged_df['Subject2'] == positive_subject[idx])
                ]
                
                if not relationship_df.empty:
                    relationship1 = relationship_df.iloc[0]['Relationship1']
                    relationship2 = relationship_df.iloc[0]['Relationship2']
                    
                    # 通过亲属关系映射找到对应的关系类别
                    for key, value in relationship_accuracies.items():
                        if relationship1 == value[0] and relationship2 == value[1]:
                            relationship_stats[key]['total'] += 1  # 正对和负对的总样本数量相同

                            # 统计正对的预测情况
                            if pos_dist[idx] < threshold:
                                relationship_stats[key]['correct_pos'] += 1
                                correct_pos_total += 1  # 全局正对统计
                            
                            # 统计负对的预测情况
                            if neg_dist[idx] >= threshold:
                                relationship_stats[key]['correct_neg'] += 1
                                correct_neg_total += 1  # 全局负对统计

                            total_pos_neg += 1  # 记录正对和负对样本的总数

    # 计算整体准确率
    overall_accuracy = (correct_pos_total + correct_neg_total) / (2 * total_pos_neg)
    print(f"Overall Test Accuracy: {overall_accuracy * 100:.2f}%")

    # 打印每种亲属关系的统计信息
    for relationship, stats in relationship_stats.items():
        if stats['total'] > 0:
            pos_accuracy = stats['correct_pos'] / stats['total']
            neg_accuracy = stats['correct_neg'] / stats['total']
            overall_relationship_accuracy = (stats['correct_pos'] + stats['correct_neg']) / (2 * stats['total'])
            print(f"共有 {stats['total']} 条 {relationship} 样本正对, {stats['total']} 条 {relationship} 样本负对")
            print(f"{relationship} 样本正对预测正确 {stats['correct_pos']} 条，负对预测正确 {stats['correct_neg']} 条")
            print(f"{relationship} 的总准确率为: {overall_relationship_accuracy * 100:.2f}%")
        else:
            print(f"{relationship}: 0 条样本")

    return overall_accuracy


# 使用最佳阈值并统计每种亲属关系和整体的预测情况
test_accuracy = evaluate_with_threshold_by_relationship(model, val_loader, best_threshold)
