In [23]:
import os
from tqdm import tqdm
import pandas as pd
import numpy as np

In [24]:
dataset_path = 'datasetv1/dsv1_1.csv'
df = pd.read_csv(dataset_path)

df.head()

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,AU09_r,...,AU12_r,AU14_r,AU15_r,AU17_r,AU20_r,AU23_r,AU25_r,AU26_r,AU45_r,label
0,0,0,0,0.0,0.0,0.82,0.0,0.0,0.0,0.0,...,0.37,0.04,0.14,1.69,0.57,1.84,0.0,0.0,0.0,1
1,1,1,1,0.0,0.0,0.75,0.0,0.0,0.0,0.0,...,0.15,0.18,0.37,1.31,0.52,1.06,0.0,0.1,0.0,1
2,2,2,2,0.0,0.0,0.68,0.0,0.0,0.0,0.0,...,0.02,0.17,0.68,1.0,0.48,0.45,0.0,0.1,0.0,1
3,3,3,3,0.0,0.0,0.77,0.0,0.0,0.07,0.0,...,0.0,0.08,1.11,0.86,0.3,0.15,0.01,0.25,0.0,1
4,4,4,4,0.19,0.0,0.82,0.03,0.0,0.22,0.0,...,0.0,0.01,1.3,0.91,0.23,0.0,0.07,0.37,0.0,1


In [25]:
# Removing the unnamed columns created by pandas during cleaning
df = df.loc[:,~df.columns.str.startswith('Unnamed')]
print(df.head())


   AU01_r  AU02_r  AU04_r  AU05_r  AU06_r  AU07_r  AU09_r  AU10_r  AU12_r  \
0    0.00     0.0    0.82    0.00     0.0    0.00     0.0    0.22    0.37   
1    0.00     0.0    0.75    0.00     0.0    0.00     0.0    0.49    0.15   
2    0.00     0.0    0.68    0.00     0.0    0.00     0.0    0.57    0.02   
3    0.00     0.0    0.77    0.00     0.0    0.07     0.0    0.60    0.00   
4    0.19     0.0    0.82    0.03     0.0    0.22     0.0    0.62    0.00   

   AU14_r  AU15_r  AU17_r  AU20_r  AU23_r  AU25_r  AU26_r  AU45_r  label  
0    0.04    0.14    1.69    0.57    1.84    0.00    0.00     0.0      1  
1    0.18    0.37    1.31    0.52    1.06    0.00    0.10     0.0      1  
2    0.17    0.68    1.00    0.48    0.45    0.00    0.10     0.0      1  
3    0.08    1.11    0.86    0.30    0.15    0.01    0.25     0.0      1  
4    0.01    1.30    0.91    0.23    0.00    0.07    0.37     0.0      1  


In [26]:
# Shuffling the dataset to randomize the data
df = df.sample(frac=1).reset_index(drop=True)

In [27]:
df.head()

Unnamed: 0,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,AU09_r,AU10_r,AU12_r,AU14_r,AU15_r,AU17_r,AU20_r,AU23_r,AU25_r,AU26_r,AU45_r,label
0,0.24,0.1,0.0,0.0,0.0,0.0,0.15,0.0,0.0,0.0,0.11,0.0,0.0,0.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,0.0,0.87,0.07,0.21,1.25,1.63,1.53,0.0,0.0,0.0,0.0,0.71,0.0,0.0,1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.18,0.14,0.52,0.0,0.68,0.21,0.0,0
3,0.0,0.0,0.0,0.0,0.21,1.29,0.07,0.55,0.24,0.76,0.0,0.28,0.41,0.0,0.84,0.55,0.31,1
4,0.0,0.0,0.0,0.0,0.49,2.21,0.08,0.68,0.46,0.08,0.32,0.0,0.08,0.0,0.98,0.31,0.0,1


In [31]:
# Cleaning individual feature map files
# Initializing paths
fakes_path = 'Feature_Maps/Fake/'
real_path = 'Feature_Maps/Real/'
dataset_path = 'datasetv1/features/'

In [9]:
# Iterating files and encoding labels for fake features
for filename in tqdm(os.listdir(fakes_path)):
    file_path = os.path.join(fakes_path, filename)
    df = pd.read_csv(file_path)
    df.loc[df['label'] == 'fake', 'label'] = 1
    df.to_csv(file_path)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1116/1116 [00:22<00:00, 48.74it/s]


In [10]:
# Iterating files and encoding labels for fake features
for filename in tqdm(os.listdir(real_path)):
    file_path = os.path.join(real_path, filename)
    df = pd.read_csv(file_path)
    df.loc[df['label'] == 'real', 'label'] = 0
    df.to_csv(file_path)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 590/590 [00:11<00:00, 50.12it/s]


In [64]:
def load_csv_file(file_path):
    df = pd.read_csv(file_path, index_col=0)
    label = df['label'].iloc[0]
    df = df.loc[:,~df.columns.str.startswith('label')]
    df = df.loc[:,~df.columns.str.startswith('Unnamed')]
    frame_features = df.to_numpy()
    
    return frame_features, label

In [65]:
video_data = []
labels = []

# Iterate over csv files and load into list as numpy array
# Labels loaded separately
for filename in tqdm(os.listdir(dataset_path)):
    if filename.endswith('.csv'):
        file_path = os.path.join(dataset_path, filename)
        
        frame_features, label = load_csv_file(file_path)
        labels.append(label)
        video_data.append(frame_features)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 1706/1706 [00:04<00:00, 341.32it/s]


In [61]:
print(len(video_data))

1706


In [62]:
print(len(labels))

1706


In [68]:
print(len(video_data[0][0]))

18
