In [1]:
import pandas as pd
import os

In [2]:
def from_txt_to_csv(dataset='disfa', img_path_file=None, label_file=None):
    if dataset == 'disfa':
        img_path = pd.read_csv(img_path_file, names=['img_path'])
        label = pd.read_csv(label_file,
                            names=['AU1','AU2','AU4','AU6','AU9','AU12','AU25','AU26'],
                            sep=' ')
    return pd.concat([img_path, label], axis=1)

In [9]:
def add_person_column_to_second(df):
    """
    给CSV文件添加person列，person的值从img_path中提取，并将person列放到第二列。

    Args:
        csv_file (str): CSV文件路径。
    """
    # 提取person值，适用于"SN002/0.jpg"格式
    df['person'] = df['img_path'].apply(lambda x: os.path.dirname(x).split('/')[0])

    # 获取所有列的列表
    cols = df.columns.tolist()

    # 将person列移动到第二列
    cols.insert(1, cols.pop(cols.index('person')))

    # 重新排列DataFrame的列
    df = df[cols]
    
    return df

In [10]:
train_1f = from_txt_to_csv('disfa',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_train_img_path_fold1.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_train_label_fold1.txt')
test_1f = from_txt_to_csv('disfa',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_test_img_path_fold1.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_test_label_fold1.txt')

In [11]:
len(train_1f), len(test_1f), len(train_1f) + len(test_1f)

(86731, 43583, 130314)

In [12]:
train_1f

Unnamed: 0,img_path,AU1,AU2,AU4,AU6,AU9,AU12,AU25,AU26
0,SN002/0.jpg,0,0,0,0,0,0,0,0
1,SN002/1.jpg,0,0,0,0,0,0,0,0
2,SN002/2.jpg,0,0,0,0,0,0,0,0
3,SN002/3.jpg,0,0,0,0,0,0,0,0
4,SN002/4.jpg,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
86726,SN029/4840.jpg,1,1,0,0,0,0,0,0
86727,SN029/4841.jpg,1,1,0,0,0,0,0,0
86728,SN029/4842.jpg,1,1,0,0,0,0,0,0
86729,SN029/4843.jpg,1,1,0,0,0,0,0,0


In [13]:
train_1f = add_person_column_to_second(train_1f)
test_1f = add_person_column_to_second(test_1f)
train_1f.to_csv('train_1f.csv', index=None)
test_1f.to_csv('test_1f.csv', index=None)

In [14]:
total_file = pd.concat([train_1f, test_1f], axis=0)
print(len(total_file))
total_file.to_csv('total.csv', index=None)

130314


## Fold 2

In [16]:
train_2f = from_txt_to_csv('disfa',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_train_img_path_fold2.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_train_label_fold2.txt')
test_2f = from_txt_to_csv('disfa',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_test_img_path_fold2.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_test_label_fold2.txt')

In [17]:
len(train_2f), len(test_2f), len(train_2f) + len(test_2f)

(87132, 43182, 130314)

In [18]:
train_2f = add_person_column_to_second(train_2f)
test_2f = add_person_column_to_second(test_2f)
train_2f.to_csv('train_2f.csv', index=None)
test_2f.to_csv('test_2f.csv', index=None)

## Fold 3

In [19]:
train_3f = from_txt_to_csv('disfa',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_train_img_path_fold3.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_train_label_fold3.txt')
test_3f = from_txt_to_csv('disfa',
                           img_path_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_test_img_path_fold3.txt',
                           label_file='/mnt/sda/yiren/code/uda/cross_au/data/disfa/list/DISFA_test_label_fold3.txt')

In [20]:
len(train_3f), len(test_3f), len(train_3f) + len(test_3f)

(86765, 43549, 130314)

In [21]:
train_3f = add_person_column_to_second(train_3f)
test_3f = add_person_column_to_second(test_3f)
train_3f.to_csv('train_3f.csv', index=None)
test_3f.to_csv('test_3f.csv', index=None)