In [64]:
import pandas as pd
import os

In [65]:
def from_txt_to_csv(dataset='bp4d', img_path_file=None, label_file=None):
    
    if dataset == 'bp4d':
        img_path = pd.read_csv(img_path_file, names=['img_path'])
        label = pd.read_csv(label_file,
                            names=['AU1','AU2','AU4','AU6','AU7','AU10','AU12','AU14','AU15','AU17','AU23','AU24'],
                            sep=' ')
    elif dataset == 'disfa':
        img_path = pd.read_csv('/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_train_img_path_fold1.txt', names=['img_path'])
        label = pd.read_csv('/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_train_label_fold1.txt',
                            names=['AU1','AU2','AU4','AU6','AU7','AU10','AU12','AU14','AU15','AU17','AU23','AU24'],
                            sep=' ')
    return pd.concat([img_path, label], axis=1)

In [66]:
def add_person_column_to_second(df):
    """
    给CSV文件添加person列，person的值从img_path中提取，并将person列放到第二列。

    Args:
        csv_file (str): CSV文件路径。
    """
    # 提取person值
    df['person'] = df['img_path'].apply(lambda x: os.path.basename(os.path.dirname(x)).split('_')[0])

    # 获取所有列的列表
    cols = df.columns.tolist()

    # 将person列移动到第二列
    cols.insert(1, cols.pop(cols.index('person')))

    # 重新排列DataFrame的列
    df = df[cols]
    
    return df

In [67]:
train_1f = from_txt_to_csv('bp4d',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_train_img_path_fold1.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_train_label_fold1.txt')
test_1f = from_txt_to_csv('bp4d',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_test_img_path_fold1.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_test_label_fold1.txt')

In [68]:
len(train_1f), len(test_1f), len(train_1f) + len(test_1f)

(99611, 42554, 142165)

In [69]:
train_1f = add_person_column_to_second(train_1f)
test_1f = add_person_column_to_second(test_1f)
train_1f.to_csv('train_1f.csv', index=None)
test_1f.to_csv('test_1f.csv', index=None)

In [71]:
total_file = pd.concat([train_1f, test_1f], axis=0)
print(len(total_file))
total_file.to_csv('total.csv', index=None)

142165


## Fold 2

In [72]:
train_2f = from_txt_to_csv('bp4d',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_train_img_path_fold2.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_train_label_fold2.txt')
test_2f = from_txt_to_csv('bp4d',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_test_img_path_fold2.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_test_label_fold2.txt')

In [73]:
len(train_2f), len(test_2f), len(train_2f) + len(test_2f)

(93106, 49059, 142165)

In [74]:
train_2f = add_person_column_to_second(train_2f)
test_2f = add_person_column_to_second(test_2f)
train_2f.to_csv('train_2f.csv', index=None)
test_2f.to_csv('test_2f.csv', index=None)

## Fold 3

In [75]:
train_3f = from_txt_to_csv('bp4d',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_train_img_path_fold3.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_train_label_fold3.txt')
test_3f = from_txt_to_csv('bp4d',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_test_img_path_fold3.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/bp4d/list/BP4D_test_label_fold3.txt')

In [76]:
len(train_3f), len(test_3f), len(train_3f) + len(test_3f)

(91613, 50552, 142165)

In [77]:
train_3f = add_person_column_to_second(train_3f)
test_3f = add_person_column_to_second(test_3f)
train_3f.to_csv('train_3f.csv', index=None)
test_3f.to_csv('test_3f.csv', index=None)