In [1]:
import torch
import torchvision.transforms as tvt
from torch.utils.data import DataLoader
from tqdm import tqdm
import shutil
import pandas as pd
import numpy as np
import os
torch.manual_seed(1234)
from evaluate_metrics import compute_eer
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, f1_score, recall_score, precision_score
font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 14}

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [18]:
# load in the wild protocol
supcon_feb07_proto = pd.read_csv("../traindata/supcon_cnsl_feb07/protocol.txt", sep=" ", header=None)
supcon_feb07_proto.columns = ['utt', 'subset', '-', 'label']
supcon_feb07_proto['label'].value_counts()
supcon_feb07_proto['utt'] = supcon_feb07_proto['utt'].apply(lambda x: x.split("/")[-1].split(".")[0])
supcon_feb07_proto


Unnamed: 0,utt,subset,-,label
0,LA_T_3424442,train,-,bonafide
1,hifigan_LA_T_3424442,train,-,spoof
2,waveglow_LA_T_3424442,train,-,spoof
3,hn-sinc-nsf-hifi_LA_T_3424442,train,-,spoof
4,LA_T_3976536,train,-,bonafide
...,...,...,...,...
73355,03MAPK0028_000058,dev,-,spoof
73356,LA_T_9861717,dev,-,spoof
73357,000036_tortoise-tts_train_grace,dev,-,spoof
73358,LA_T_4019514,dev,-,spoof


In [19]:
supcon_feb07_df = pd.read_csv("../traindata/supcon_cnsl_feb07/df_meta_feb07.csv")
supcon_feb07_df.fillna("-", inplace=True)
supcon_feb07_df = supcon_feb07_df.astype(str)
supcon_feb07_df['Source link'] = supcon_feb07_df['Source link'].apply(lambda x: x.replace("youtu.be", "youtube.com").replace("www.youtube.com", "youtube.com"))
supcon_feb07_df


Unnamed: 0,Utterence name (file name),Speaker name,TTS or VC,Is multi-speaker?,Language,Gender,Age,Noise type 1,Source link,utt,path,label,group,Model
0,ziroud_6,TTS_Hooligun,TTS,No,Korean,Male,Child,Music,https://youtube.com/672_mR42M2I?si=ZhYgv0GWEc3...,ziroud_6,fake/ziroud_6.wav,spoof,internOct24,-
1,elle_hanni_3.wav,HANNI,-,No,Korean,Female,Adult,Music,https://youtube.com/watch?v=KoTbLLK9-UM,elle_hanni_3,real/elle_hanni_3.wav,bonafide,internOct24,-
2,230914_Yonhap_Messi_8,Go HyunSil,-,No,Korean,Female,Adult,Music,https://youtube.com/YOgGBg9yD8M?si=W3xTPwXN7zI...,230914_Yonhap_Messi_8,real/230914_Yonhap_Messi_8.wav,bonafide,internOct24,-
3,Frontyard_6,TTS_issue,TTS,No,Korean,Female,Teenager,Music,https://youtube.com/watch?v=eXcekCx7Bxo,Frontyard_6,fake/Frontyard_6.wav,spoof,internOct24,-
4,gridanews_10,TTS_gridanews,TTS,No,Korean,Male,Adult,Music,https://youtube.com/watch?v=J0RlmWPSlgY,gridanews_10,fake/gridanews_10.wav,spoof,internOct24,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112349,VC_FanBingBing_to_Tsai_Chin_1.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_1,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion
112350,VC_FanBingBing_to_Tsai_Chin_2.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_2,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion
112351,VC_FanBingBing_to_Tsai_Chin_3.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_3,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion
112352,VC_FanBingBing_to_Tsai_Chin_4.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_4,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion


In [14]:
supcon_feb07_df[supcon_feb07_df['group']=='intern_jan']

Unnamed: 0,Utterence name (file name),Speaker name,TTS or VC,Is multi-speaker?,Language,Gender,Age,Noise type 1,Source link,utt,path,label,group,Model
111542,Olivia_Vella_1.wav,Olivia Vella,-,No,English,Female,Teenager,-,https://youtube.com/watch?v=Eu_Gl0woeOw,Olivia_Vella_1,./2024_RealSample/Olivia_Vella_1.wav,bonafide,intern_jan,-
111543,Lucy_Androski_3.wav,Lucy Androski,-,No,English,Female,Teenager,-,https://youtube.com/watch?v=0vdPxLfAsqo,Lucy_Androski_3,./2024_RealSample/Lucy_Androski_3.wav,bonafide,intern_jan,-
111544,Gye-Josu_4.wav,Gye-Josu,-,No,Chinese,Male,Elder,-,https://youtube.com/watch?v=voEZnAtIpCI,Gye-Josu_4,./2024_RealSample/Gye-Josu_4.wav,bonafide,intern_jan,-
111545,yoon_2.wav,President Yoon,-,No,Korean,Male,Adult,-,https://youtube.com/watch?v=7pss74wlg8E,yoon_2,./2024_RealSample/yoon_2.wav,bonafide,intern_jan,-
111546,Kang_JiYoung_3.wav,Kang JiYoung,-,No,Korean,Female,Adult,-,https://youtube.com/watch?v=nrH4sJ4iCuY,Kang_JiYoung_3,./2024_RealSample/Kang_JiYoung_3.wav,bonafide,intern_jan,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112349,VC_FanBingBing_to_Tsai_Chin_1.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_1,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion
112350,VC_FanBingBing_to_Tsai_Chin_2.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_2,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion
112351,VC_FanBingBing_to_Tsai_Chin_3.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_3,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion
112352,VC_FanBingBing_to_Tsai_Chin_4.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_4,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion


In [20]:
proto_merge = pd.merge(supcon_feb07_df, supcon_feb07_proto, on='utt', how='inner')
proto_merge

Unnamed: 0,Utterence name (file name),Speaker name,TTS or VC,Is multi-speaker?,Language,Gender,Age,Noise type 1,Source link,utt,path,label_x,group,Model,subset,-,label_y
0,ziroud_6,TTS_Hooligun,TTS,No,Korean,Male,Child,Music,https://youtube.com/672_mR42M2I?si=ZhYgv0GWEc3...,ziroud_6,fake/ziroud_6.wav,spoof,internOct24,-,dev,-,spoof
1,elle_hanni_3.wav,HANNI,-,No,Korean,Female,Adult,Music,https://youtube.com/watch?v=KoTbLLK9-UM,elle_hanni_3,real/elle_hanni_3.wav,bonafide,internOct24,-,eval,Music,bonafide
2,230914_Yonhap_Messi_8,Go HyunSil,-,No,Korean,Female,Adult,Music,https://youtube.com/YOgGBg9yD8M?si=W3xTPwXN7zI...,230914_Yonhap_Messi_8,real/230914_Yonhap_Messi_8.wav,bonafide,internOct24,-,dev,Music,bonafide
3,Frontyard_6,TTS_issue,TTS,No,Korean,Female,Teenager,Music,https://youtube.com/watch?v=eXcekCx7Bxo,Frontyard_6,fake/Frontyard_6.wav,spoof,internOct24,-,eval,Music,spoof
4,NDH_Blueclub_6,NDH_Blueclub,TTS,Yes,Korean,Male,Adult,Music,https://youtube.com/watch?v=l1mUUp0puys,NDH_Blueclub_6,fake/NDH_Blueclub_6.wav,spoof,internOct24,-,dev,-,spoof
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45241,VC_FanBingBing_to_Tsai_Chin_1.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_1,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion,eval,-,spoof
45242,VC_FanBingBing_to_Tsai_Chin_2.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_2,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion,eval,-,spoof
45243,VC_FanBingBing_to_Tsai_Chin_3.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_3,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion,train,-,spoof
45244,VC_FanBingBing_to_Tsai_Chin_4.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_4,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion,eval,-,spoof


In [21]:
proto_merge[proto_merge['group']=='intern_jan']

Unnamed: 0,Utterence name (file name),Speaker name,TTS or VC,Is multi-speaker?,Language,Gender,Age,Noise type 1,Source link,utt,path,label_x,group,Model,subset,-,label_y
44434,Olivia_Vella_1.wav,Olivia Vella,-,No,English,Female,Teenager,-,https://youtube.com/watch?v=Eu_Gl0woeOw,Olivia_Vella_1,./2024_RealSample/Olivia_Vella_1.wav,bonafide,intern_jan,-,train,-,bonafide
44435,Lucy_Androski_3.wav,Lucy Androski,-,No,English,Female,Teenager,-,https://youtube.com/watch?v=0vdPxLfAsqo,Lucy_Androski_3,./2024_RealSample/Lucy_Androski_3.wav,bonafide,intern_jan,-,eval,-,bonafide
44436,Gye-Josu_4.wav,Gye-Josu,-,No,Chinese,Male,Elder,-,https://youtube.com/watch?v=voEZnAtIpCI,Gye-Josu_4,./2024_RealSample/Gye-Josu_4.wav,bonafide,intern_jan,-,train,-,bonafide
44437,yoon_2.wav,President Yoon,-,No,Korean,Male,Adult,-,https://youtube.com/watch?v=7pss74wlg8E,yoon_2,./2024_RealSample/yoon_2.wav,bonafide,intern_jan,-,eval,-,bonafide
44438,Kang_JiYoung_3.wav,Kang JiYoung,-,No,Korean,Female,Adult,-,https://youtube.com/watch?v=nrH4sJ4iCuY,Kang_JiYoung_3,./2024_RealSample/Kang_JiYoung_3.wav,bonafide,intern_jan,-,eval,-,bonafide
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45241,VC_FanBingBing_to_Tsai_Chin_1.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_1,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion,eval,-,spoof
45242,VC_FanBingBing_to_Tsai_Chin_2.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_2,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion,eval,-,spoof
45243,VC_FanBingBing_to_Tsai_Chin_3.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_3,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion,train,-,spoof
45244,VC_FanBingBing_to_Tsai_Chin_4.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_4,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,spoof,intern_jan,Diffusion,eval,-,spoof


In [13]:
intern24_df = pd.read_csv("../traindata/intern_2024/metadata_jan30.csv")
intern24_df.fillna("-", inplace=True)
intern24_df = intern24_df.astype(str)
# print(intern24_df.head)
intern24_df['source_group'] = intern24_df.apply(lambda x: x['Source link'].replace("//", "/").split("/")[1], axis=1)
intern24_df['source_group'] = intern24_df['source_group'].apply(lambda x: 'www.youtube.com' if (x == 'youtu.be' or x == 'youtube.com') else x)
intern24_df

Unnamed: 0,utt,Utterence name (file name),Speaker name,Is multi-speaker?,Language,Gender,Noise type 1,Age,Source link,label,path,Model,Noise Type,source_group
0,Olivia_Vella_1,Olivia_Vella_1.wav,Olivia Vella,No,English,Female,-,Teenager,https://www.youtube.com/watch?v=Eu_Gl0woeOw,bonafide,./2024_RealSample/Olivia_Vella_1.wav,-,-,www.youtube.com
1,Lucy_Androski_3,Lucy_Androski_3.wav,Lucy Androski,No,English,Female,-,Teenager,https://www.youtube.com/watch?v=0vdPxLfAsqo,bonafide,./2024_RealSample/Lucy_Androski_3.wav,-,-,www.youtube.com
2,Gye-Josu_4,Gye-Josu_4.wav,Gye-Josu,No,Chinese,Male,-,Elder,https://www.youtube.com/watch?v=voEZnAtIpCI,bonafide,./2024_RealSample/Gye-Josu_4.wav,-,-,www.youtube.com
3,yoon_2,yoon_2.wav,President Yoon,No,Korean,Male,-,Adult,https://www.youtube.com/watch?v=7pss74wlg8E,bonafide,./2024_RealSample/yoon_2.wav,-,-,www.youtube.com
4,Kang_JiYoung_3,Kang_JiYoung_3.wav,Kang JiYoung,No,Korean,Female,-,Adult,https://www.youtube.com/watch?v=nrH4sJ4iCuY,bonafide,./2024_RealSample/Kang_JiYoung_3.wav,-,-,www.youtube.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
807,VC_FanBingBing_to_Tsai_Chin_1,VC_FanBingBing_to_Tsai_Chin_1.wav,Tsai Chin,-,Chinese,Female,-,Elder,https://huggingface.co/spaces/amphion/singing_...,spoof,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,Diffusion,-,huggingface.co
808,VC_FanBingBing_to_Tsai_Chin_2,VC_FanBingBing_to_Tsai_Chin_2.wav,Tsai Chin,-,Chinese,Female,-,Elder,https://huggingface.co/spaces/amphion/singing_...,spoof,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,Diffusion,-,huggingface.co
809,VC_FanBingBing_to_Tsai_Chin_3,VC_FanBingBing_to_Tsai_Chin_3.wav,Tsai Chin,-,Chinese,Female,-,Elder,https://huggingface.co/spaces/amphion/singing_...,spoof,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,Diffusion,-,huggingface.co
810,VC_FanBingBing_to_Tsai_Chin_4,VC_FanBingBing_to_Tsai_Chin_4.wav,Tsai Chin,-,Chinese,Female,-,Elder,https://huggingface.co/spaces/amphion/singing_...,spoof,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,Diffusion,-,huggingface.co


In [4]:
mergedf = pd.merge(supcon_feb07_df, intern24_df, on='utt', how='inner')
mergedf

Unnamed: 0,Utterence name (file name)_x,Speaker name_x,TTS or VC,Is multi-speaker?_x,Language_x,Gender_x,Age_x,Noise type 1_x,Source link_x,utt,...,Language_y,Gender_y,Noise type 1_y,Age_y,Source link_y,label_y,path_y,Model_y,Noise Type,source_group
0,Olivia_Vella_1.wav,Olivia Vella,-,No,English,Female,Teenager,-,https://youtube.com/watch?v=Eu_Gl0woeOw,Olivia_Vella_1,...,English,Female,-,Teenager,https://www.youtube.com/watch?v=Eu_Gl0woeOw,bonafide,./2024_RealSample/Olivia_Vella_1.wav,-,-,www.youtube.com
1,Lucy_Androski_3.wav,Lucy Androski,-,No,English,Female,Teenager,-,https://youtube.com/watch?v=0vdPxLfAsqo,Lucy_Androski_3,...,English,Female,-,Teenager,https://www.youtube.com/watch?v=0vdPxLfAsqo,bonafide,./2024_RealSample/Lucy_Androski_3.wav,-,-,www.youtube.com
2,Gye-Josu_4.wav,Gye-Josu,-,No,Chinese,Male,Elder,-,https://youtube.com/watch?v=voEZnAtIpCI,Gye-Josu_4,...,Chinese,Male,-,Elder,https://www.youtube.com/watch?v=voEZnAtIpCI,bonafide,./2024_RealSample/Gye-Josu_4.wav,-,-,www.youtube.com
3,yoon_2.wav,President Yoon,-,No,Korean,Male,Adult,-,https://youtube.com/watch?v=7pss74wlg8E,yoon_2,...,Korean,Male,-,Adult,https://www.youtube.com/watch?v=7pss74wlg8E,bonafide,./2024_RealSample/yoon_2.wav,-,-,www.youtube.com
4,Kang_JiYoung_3.wav,Kang JiYoung,-,No,Korean,Female,Adult,-,https://youtube.com/watch?v=nrH4sJ4iCuY,Kang_JiYoung_3,...,Korean,Female,-,Adult,https://www.youtube.com/watch?v=nrH4sJ4iCuY,bonafide,./2024_RealSample/Kang_JiYoung_3.wav,-,-,www.youtube.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
807,VC_FanBingBing_to_Tsai_Chin_1.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_1,...,Chinese,Female,-,Elder,https://huggingface.co/spaces/amphion/singing_...,spoof,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,Diffusion,-,huggingface.co
808,VC_FanBingBing_to_Tsai_Chin_2.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_2,...,Chinese,Female,-,Elder,https://huggingface.co/spaces/amphion/singing_...,spoof,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,Diffusion,-,huggingface.co
809,VC_FanBingBing_to_Tsai_Chin_3.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_3,...,Chinese,Female,-,Elder,https://huggingface.co/spaces/amphion/singing_...,spoof,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,Diffusion,-,huggingface.co
810,VC_FanBingBing_to_Tsai_Chin_4.wav,Tsai Chin,-,-,Chinese,Female,Elder,-,https://huggingface.co/spaces/amphion/singing_...,VC_FanBingBing_to_Tsai_Chin_4,...,Chinese,Female,-,Elder,https://huggingface.co/spaces/amphion/singing_...,spoof,./2024_FakeSample/VC_Sample/VC_FanBingBing_to_...,Diffusion,-,huggingface.co


In [9]:
print(mergedf['group'].value_counts())
print(mergedf['subset_x'].value_counts())


intern_jan    812
Name: group, dtype: int64


KeyError: 'subset_x'