## Create Training Set:

In [2]:
from fastai.vision import *
from tqdm import tqdm
import cv2

In [3]:
source_path = Path('../data/')
save_path = Path('../val_data/')

## Create Validation Set:

### Videos from folder 0->3

In [4]:
def get_annots(folder):
    f = get_files(folder, extensions=['.json']) # Extract the metadata
    a = pd.read_json(f[0]).T
    a.reset_index(inplace=True)
    a.rename({'index':'fname'}, axis=1, inplace=True)
    a.fname = folder.name + '/' + a.fname.astype(str)
    return a

In [23]:
def get_metadata(SOURCE, include=None, exclude=None):
    """
    extract the metadata from all the folders contained in SOURCE.
    """
    meta = []
    
    for i in SOURCE.iterdir(): # iterate over the files in SOURCE
        if i.is_dir() and (i/'metadata.json').is_file(): # Get only the directories
            if include is not None and i.name in include: # Check if in include
                print(f'Extracting data from the {i.name} folder')
                a = get_annots(i)
                meta.append(a)
            if exclude is not None and i.name not in exclude: # Check if not in exlcude
                print(f'Extracting data from the {i.name} folder')
                a = get_annots(i)
                meta.append(a)
    
    metadata = pd.concat(meta)
    metadata.reset_index(drop=True, inplace=True)
    
    return metadata

In [6]:
source_path = Path('../data/')

In [7]:
#train_meta = get_metadata(source_path, exclude=['dfdc_train_part_0', 'dfdc_train_part_1', 'dfdc_train_part_2', 'dfdc_train_part_3'])
val_meta = get_metadata(source_path, include=['dfdc_train_part_0', 'dfdc_train_part_1', 'dfdc_train_part_2', 'dfdc_train_part_3'])

Extracting data from the dfdc_train_part_2 folder
Extracting data from the dfdc_train_part_3 folder
Extracting data from the dfdc_train_part_0 folder
Extracting data from the dfdc_train_part_1 folder


In [8]:
val_meta.head()

Unnamed: 0,fname,label,split,original
0,dfdc_train_part_2/qyyebirxwe.mp4,FAKE,train,ejhhokmvpe.mp4
1,dfdc_train_part_2/ntjlknlcvn.mp4,FAKE,train,nthpnwylxo.mp4
2,dfdc_train_part_2/qivpypiwlp.mp4,FAKE,train,hszwwswewp.mp4
3,dfdc_train_part_2/lpkgabskbw.mp4,FAKE,train,rnxzqumvvl.mp4
4,dfdc_train_part_2/vctemjbusz.mp4,FAKE,train,sznkemeqro.mp4


In [9]:
#export
def runnit(f):
    def _func(*args, **kwargs):
        command = f(*args, **kwargs)
        p = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        return p
    return _func

In [10]:
#export
def _ffmpeg_web_defaults():
    '''
    These are some reasonable values for uploading. i.e. YouTube, etc.
    '''
    return dict(video_encoder='libx264', video_bitrate='1.5M', fps=30, scale=.5, crf=23, #17-28
                audio_encoder='aac', audio_bitrate='128k')

def _ffmpeg_defaults():
    return dict(video_encoder=None, video_bitrate=None, fps=None, scale=None, crf=None,
                audio_encoder=None, audio_bitrate=None)

def _ffmpeg_fmts():
    "ffmpeg options syntax"
    return dict(video_encoder='-c:v {video_encoder:s}', 
                video_bitrate='-b:v {video_bitrate:s}', 
                fps='-r {fps:d}', 
                #scale='-vf scale=iw*{scale:.2f}:ih*{scale:.2f}', 
                scale='-vf scale=iw*{scale:.2f}:-1',
                crf='-crf {crf:d}',
                audio_bitrate='-b:a {audio_bitrate}', 
                audio_encoder='-c:a {audio_encoder}')

#@show_vid_info
@runnit
def run_ffmpeg(fpath_from=None, fpath_to=None, **kwargs):
    '''
    Run ffmpeg
    '''
    ps = _ffmpeg_defaults()
    ps.update(kwargs)
    pstr = []
    for n, s in _ffmpeg_fmts().items():
        if ps[n] is None: pstr.append('')
        else: pstr.append(s.format(**ps))
    pstr = ' '.join(pstr)
    return f'ffmpeg -i {fpath_from} {pstr} {fpath_to}'

In [11]:
def create_degraded_videos(source_path, save_path, fnames):
    
        for fname in tqdm(fnames):
            deg = random.randint(1,3)
            
            if deg==1:
                f = Path(f'{save_path}/{Path(fname).parents[0].name}').mkdir(parents=True, exist_ok=True)
                fpath_to = Path(fname).parts[-2]+'/copy_'+ Path(fname).parts[-1]
                
                !cp {source_path/fname} {save_path/fpath_to}
            
            else:
                f = Path(f'{save_path}/{Path(fname).parents[0].name}').mkdir(parents=True, exist_ok=True)
                fpath_to = Path(fname).parts[-2]+'/degraded_'+ Path(fname).parts[-1]
                run_ffmpeg(fpath_from=source_path/fname, fpath_to=save_path/fpath_to, crf=28, scale=0.5, fps=15)

In [12]:
create_degraded_videos(source_path, save_path, val_meta.fname)

100%|██████████| 6236/6236 [1:30:13<00:00,  1.15it/s]
