<a href="https://colab.research.google.com/github/romitbarua/MultiModalDeepFake/blob/main/TestingNotebooks/PCAvsMFCC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#### Articles Used to Generate Code
#https://towardsdatascience.com/eigenfaces-recovering-humans-from-ghosts-17606c328184
#https://machinelearningmastery.com/face-recognition-using-principal-component-analysis/

In [2]:
#mount the google drive
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [3]:
#import appropriate packages
import os
import cv2
import dlib
from google.colab.patches import cv2_imshow
import numpy as np 
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import librosa
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import pandas as pd
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
import itertools

In [4]:
SYM_PATH = '/content/drive/MyDrive/DeepFakeDetection'
%cd $SYM_PATH
%pip install -e .

/content/drive/MyDrive/DeepFakeDetection
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Obtaining file:///content/drive/MyDrive/DeepFakeDetection
Installing collected packages: DeepFake
  Running setup.py develop for DeepFake
Successfully installed DeepFake-0.1.0


In [5]:
#import custom packages
from packages.DlibManager import DlibManager

In [6]:
def normalize(data, debug_mode = False):
  if debug_mode:
    print('NORMALIZE DATA')
    print('Data Shape: ', data.shape)
    print('Data Type: ', type(data.dtype))
    print('Mean Shape:', np.mean(data, axis=0).shape)
    print('Std Shape:', np.mean(data, axis=0).shape)
    print('------------------------------')
  return (data - np.mean(data, axis=0))/np.std(data, axis=0)

In [7]:
#source: https://stackoverflow.com/questions/2566412/find-nearest-value-in-numpy-array
def find_nearest_idx(array, value):
  array = np.asarray(array)
  return (np.abs(array - value)).argmin()
  

In [8]:
def match_mfcc_dlib(mfccs, dlib_frames, duration, win_time, hop_time, debug_mode=False):

  win_time = win_time/1000
  hop_time = hop_time/1000

  mfcc_times = np.arange(0, mfccs.shape[0], 1)*(hop_time)+0.5*win_time
  
  second_per_frame = duration/len(dlib_frames)
  dlib_frame_times = np.arange(0, len(dlib_frames), 1)*second_per_frame

  new_dlib_frames = [dlib_frames[find_nearest_idx(dlib_frame_times, time)] for time in mfcc_times]
  
  return new_dlib_frames


In [9]:
def return_dlib_items(video_path, return_items, detector, predictor, debug_mode=False):

  assert return_items in ['lips']

  video = cv2.VideoCapture(video_path)
  dlib_video = DlibManager(predictor, detector, video)
  video.release()

  if debug_mode:
    print('Successfully loaded the Dlib Video')

  if return_items == 'lips':
    if debug_mode:
      print('Returning Lip Frames')
      print('Number of Lip Frames: ', len(dlib_video.lip_frames))
    return dlib_video.lip_frames
  else:
    return -1



In [10]:
def return_mfccs(audio_path, sample_rate, window_time, hop_time, mfcc_frames, n_mfcc, delta_order, normalize_mfcc=True, debug_mode=False):

  audio, _ = librosa.load(audio_path, sr=sample_rate)
  audio_duration = librosa.get_duration(audio)

  win_length = int(sample_rate/1000 * window_time)
  hop_length = int(sample_rate/1000 * hop_time)

  mfcc = librosa.feature.mfcc(audio, n_mfcc=n_mfcc, win_length=win_length, hop_length=hop_length).T

  if delta_order > 0:
    mfcc = librosa.feature.delta(mfcc, order=delta_order)

  if normalize_mfcc:
    mfcc = normalize(mfcc, debug_mode)

  return mfcc, audio_duration




In [11]:
#load the training data from the disk
def load_FakeAvCeleb_mfcc_lip_data(metadata, ids, video_method, sample=False, audio_sample_rate=22050, window_time=25, hop_time=10, n_mfcc=12, delta_order = 0, debug_mode=False):

  predictor_path = '/content/drive/MyDrive/DeepFakeDetection/model/shape_predictor_68_face_landmarks.dat'
  detector = dlib.get_frontal_face_detector()
  predictor = dlib.shape_predictor(predictor_path)
  
  video_details = {}
  failed_to_load = []

  for idx, id in enumerate(ids):
    print()
    print(f'\rVideo #{idx+1} out of {len(ids)}', end="")

    if not sample:
      video_path = metadata[(metadata['source'] == id) & (metadata['method'] == video_method)]['full_path'].values[0]
    else:
      video_path = metadata[(metadata['source'] == id) & (metadata['method']==video_method)].sample(1)['full_path'].values[0]

    audio_path = video_path.replace('.mp4', '.wav')

    #run the functions outside of the try except block
    if debug_mode:
      lip_frames = return_dlib_items(video_path, 'lips', detector, predictor, debug_mode)
      mfcc, duration = return_mfccs(audio_path, audio_sample_rate, window_time, hop_time, len(lip_frames), n_mfcc=n_mfcc, delta_order=delta_order)
      print('Shape of MFCC: ', mfcc.shape)
      print('Shape of Dlib Frames: ', len(lip_frames))
      lip_frames = match_mfcc_dlib(mfcc, lip_frames, duration)
      print('Expanded DLib Frames to Match MFCC Frames')
      print('Expanded Dlib Frame Shape: ', len(lip_frames))


    try:
      lip_frames = return_dlib_items(video_path, 'lips', detector, predictor, debug_mode)

      mfcc, duration = return_mfccs(audio_path, audio_sample_rate, window_time, hop_time, len(lip_frames), n_mfcc=n_mfcc, delta_order=delta_order)
      
      lip_frames = match_mfcc_dlib(mfcc, lip_frames, duration, window_time, hop_time)

      video_details[id] = [video_path, lip_frames, mfcc]

    except:
      #print(f'Failed to Upload: {id}')
      failed_to_load.append(video_path)

  return video_details

In [12]:
def prep_images_for_pca(frames, grey_image=True, height=90, width=70):

  processed_frames = []

  for frame in frames:
    if grey_image:
      frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = cv2.resize(frame, (height, width))
    processed_frames.append(frame)

  num_frames = len(processed_frames)
  processed_frames = np.array(processed_frames).reshape(num_frames, height*width)

  return processed_frames


In [13]:
def run_pca(data, num_components, fit, data_mean=None, pca=None, debug_mode=False):

  assert ((fit == False and not isinstance(data_mean, type(None)) and not isinstance(pca, type(None))) or fit), "Must provide a data_mean if you are reversing"

  if debug_mode:
    print('Running PCA:')
    print('Data Shape: ', data.shape)
    print()

  if fit:
    data_mean = np.mean(data, axis=0)
    data_centered = data - data_mean

    pca = PCA()
    pca_data = pca.fit_transform(data_centered)

  else:
    data_centered = data - data_mean
    pca_data = pca.transform(data_centered)

  weights = np.dot(data_centered, pca.components_[:num_components].T)

  return pca, weights, data_mean, data_centered

In [14]:

def generate_mfcc_pca_lips_X_y_data(video_details, fit_pca, num_components=10, pca=None, data_mean=None, debug_mode = False):
  
  lips_data = None
  X  = None
  y = None

  if debug_mode:
    print('Generating X & y for the training data:')
    print('Size of Video Details: ', len(video_details.keys()))

  for idx, key in enumerate(video_details.keys()):

    if debug_mode:
      print(f'Running #{idx+1} of {len(video_details)}')
      print('Items to Unpack: ', len(video_details[key]))

    video_path, lip_frames, mfcc = video_details[key]

    if debug_mode:
      print('Number of Lip Frames: ', len(lip_frames))
      print('Shape of Lip 1: ', lip_frames[0].shape)

    pca_ready_lips = prep_images_for_pca(lip_frames)

    if debug_mode:
      print('PCA Processed Lips Shape: ', pca_ready_lips.shape)

    if isinstance(lips_data, type(None)):
      lips_data = pca_ready_lips
      y = mfcc
    else:
      lips_data = np.vstack((lips_data, pca_ready_lips))
      y = np.vstack((y, mfcc))

  if debug_mode:
    print('Shape of Lips Data: ', lips_data.shape)

  pca, weights, lips_data_mean, data_centered = run_pca(lips_data, num_components, fit_pca, data_mean, pca, debug_mode=debug_mode)

  X = weights

  return X, y, lips_data_mean, pca, lips_data


In [15]:
def display_eigenfaces(pca, num_components):
  #show the eigenfaces
  eigenfaces = pca.components_[:num_components]
  
  # Show the first 16 eigenfaces
  fig, axes = plt.subplots(2,5,figsize=(16,8))
  idx = 0
  for i in range(5):
    for j in range(2):
      axes[j][i].imshow(eigenfaces[idx].reshape((70, 90)), cmap="gray")
      idx += 1
  plt.show()

In [16]:
def pca_image_reconstruction(pca, centered_image, image_mean, num_components=10, width=70, height=90, image_idx=0):
  
  eigenfaces = pca.components_[:num_components]
  samples, features = centered_image.shape
  weights = np.dot(centered_image, eigenfaces.T)
  recovered_image = (np.dot(weights[image_idx,:], eigenfaces)+image_mean).reshape(width, height)
  return recovered_image



In [17]:
def train_models(X, y, model_type='LinearRegression'):

  num_coef = y.shape[1]
  models = []
  for i in range(num_coef):
    assert X.shape[0] == y[:, i].shape[0], 'X and y must have the same number of rows'
    
    if model_type == 'LinearRegression':
      model = LinearRegression()
    elif model_type == 'RandomForest':
      model = RandomForestRegressor()
    model.fit(X, y[:, i])
    models.append(model)

  return models

In [18]:
def eval_test_data(real_testing_details, fake_testing_details, models, data_mean, pca, components=None, debug_mode=False):

  #store the average per video mean squared error
  real_video_error = []
  fake_video_error = []

  for idx, key in enumerate(real_testing_details.keys()):

    if key not in fake_testing_details.keys():
      continue

    X_test_real, y_test_real, lips_data_mean, pca, lip_data_real = generate_mfcc_pca_lips_X_y_data({key:real_testing_details[key]}, fit_pca=False, data_mean=data_mean, pca=pca, debug_mode=debug_mode)
    X_test_fake, y_test_fake, lips_data_mean, pca, lip_data_fake = generate_mfcc_pca_lips_X_y_data({key:fake_testing_details[key]}, fit_pca=False, data_mean=data_mean, pca=pca, debug_mode=debug_mode)

    if not isinstance(components, type(None)):
      X_test_real = X_test_real[:, components]
      X_test_fake = X_test_fake[:, components]

    #store the per frame error
    real_mfcc_errors = []
    fake_mfcc_errors = []

    for idx, model in enumerate(models):
      y_pred_real = model.predict(X_test_real)
      y_pred_fake = model.predict(X_test_fake)

      real_err = mean_squared_error(y_true=y_test_real[:, idx], y_pred=y_pred_real)
      fake_err = mean_squared_error(y_true=y_test_fake[:, idx], y_pred=y_pred_fake)

      real_mfcc_errors.append(real_err)
      fake_mfcc_errors.append(fake_err)

    real_video_error.append(real_mfcc_errors)
    fake_video_error.append(fake_mfcc_errors)

  return real_video_error, fake_video_error



## Version #1: Run PCA Across All Training Videos

In [None]:
#load the metadata
metadata = pd.read_csv('/content/drive/MyDrive/DeepFakeDetection/data/FakeAVCeleb_v1.2/meta_data.csv')
metadata = metadata[(metadata['method']=='real') | (metadata['method']=='wav2lip')]
metadata = metadata.rename(columns={'Unnamed: 9':'full_path'})
metadata['full_path'] = metadata['full_path'].str.replace('FakeAVCeleb/', '/content/drive/MyDrive/DeepFakeDetection/data/FakeAVCeleb_v1.2/')
metadata['full_path'] = metadata['full_path'] + '/' + metadata['path']
metadata = metadata[(metadata['gender']=='men') & (metadata['race']=='African')]

#generate the training & testing ids
training_ids = np.random.choice(metadata.source.unique(), int(metadata.source.unique().shape[0]*0.8), replace=False)
testing_ids = np.array(metadata[~metadata['source'].isin(training_ids)]['source'].unique())

#get the training details
print('\nLoading Training Details')
training_details = load_FakeAvCeleb_mfcc_lip_data(metadata, training_ids, 'real', debug_mode=False)

#run PCA & generate X & Y
print('\nRunning PCA & Generating Training X & y')
X_train, y_train, lips_data_mean, pca, train_lips_data = generate_mfcc_pca_lips_X_y_data(training_details, True, debug_mode=False)

#fit training models
print('\nFitting Models')
models = train_models(X_train, y_train, 'LinearRegression')

#get testing details (1 real video, 1 fake video)
print('\nLoading Real Testing Details')
real_testing_details = load_FakeAvCeleb_mfcc_lip_data(metadata, testing_ids, 'real', debug_mode=False)
print('\nLoading Fake Testing Details')
fake_testing_details = load_FakeAvCeleb_mfcc_lip_data(metadata, testing_ids, 'wav2lip', sample=True, debug_mode=False)

# evaluate testing
print('\nEvaluating Testing Details')
real_video_error, fake_video_error = eval_test_data(real_testing_details, fake_testing_details, models, lips_data_mean, pca, debug_mode=False)

sns.histplot(fake_video_error, label='Fake', color='blue')
sns.histplot(real_video_error, label='Real', color='red')
plt.legend()
plt.show()


Loading Training Details

Video #1 out of 40
Video #2 out of 40
Video #3 out of 40
Video #4 out of 40
Video #5 out of 40
Video #6 out of 40
Video #7 out of 40
Video #8 out of 40
Video #9 out of 40
Video #10 out of 40
Video #11 out of 40
Video #12 out of 40
Video #13 out of 40
Video #14 out of 40
Video #15 out of 40
Video #16 out of 40
Video #17 out of 40
Video #18 out of 40
Video #19 out of 40
Video #20 out of 40
Video #21 out of 40
Video #22 out of 40
Video #23 out of 40
Video #24 out of 40
Video #25 out of 40
Video #26 out of 40
Video #27 out of 40
Video #28 out of 40

In [None]:
display_eigenfaces(pca, 10)

In [None]:
#randomly select 10 faces

random_training_ids = np.random.choice(np.array(list(training_details.keys())),5, replace=False)
random_lips = []
for training_id in random_training_ids:
  rand_idx = np.random.choice(np.arange(0, len(training_details[training_id][1])))
  image = training_details[training_id][1][rand_idx]
  random_lips.append(image)


In [None]:
pca_ready_images = prep_images_for_pca(random_lips)
reconstruction_pca, reconstruction_weights, reconstruction_data_mean, reconstruction_data_centered = run_pca(pca_ready_images, 10, False, lips_data_mean, pca)

reconstructed_images = [pca_image_reconstruction(reconstruction_pca, reconstruction_data_centered, reconstruction_data_mean, num_components=5, width=70, height=90, image_idx=i) for i in range(5)]

fig, axes = plt.subplots(2,5,figsize=(16,8))
for i in range(5):
    axes[0][i].imshow(random_lips[i])
    axes[1][i].imshow(reconstructed_images[i].reshape((70, 90)), cmap="gray")
plt.show()

#random_lips = np.array(random_lips)
#random_lips
#reconstruced_image = pca_image_reconstruction(pca, centered_image, image_mean, num_components=10, width=70, height=90)


In [None]:
pca_ready_images = prep_images_for_pca(random_lips)
reconstruction_pca, reconstruction_weights, reconstruction_data_mean, reconstruction_data_centered = run_pca(pca_ready_images, 5, False, lips_data_mean, pca)

reconstructed_images = [pca_image_reconstruction(reconstruction_pca, reconstruction_data_centered, reconstruction_data_mean, num_components=15, width=70, height=90, image_idx=i) for i in range(5)]

fig, axes = plt.subplots(2,5,figsize=(16,8))
for i in range(5):
    axes[0][i].imshow(random_lips[i])
    axes[1][i].imshow(reconstructed_images[i].reshape((70, 90)), cmap="gray")
plt.show()


## Version #2: Predict Delta Order 1 MFCCs

In [None]:
#load the metadata
metadata = pd.read_csv('/content/drive/MyDrive/DeepFakeDetection/data/FakeAVCeleb_v1.2/meta_data.csv')
metadata = metadata[(metadata['method']=='real') | (metadata['method']=='wav2lip')]
metadata = metadata.rename(columns={'Unnamed: 9':'full_path'})
metadata['full_path'] = metadata['full_path'].str.replace('FakeAVCeleb/', '/content/drive/MyDrive/DeepFakeDetection/data/FakeAVCeleb_v1.2/')
metadata['full_path'] = metadata['full_path'] + '/' + metadata['path']
metadata = metadata[(metadata['gender']=='men') & (metadata['race']=='African')]

#generate the training & testing ids
training_ids = np.random.choice(metadata.source.unique(), int(metadata.source.unique().shape[0]*0.8), replace=False)
testing_ids = np.array(metadata[~metadata['source'].isin(training_ids)]['source'].unique())

#get the training details
print('\nLoading Training Details')
training_details = load_FakeAvCeleb_mfcc_lip_data(metadata, training_ids, 'real', delta_order=1, debug_mode=False)

#run PCA & generate X & Y
print('\nRunning PCA & Generating Training X & y')
X_train, y_train, lips_data_mean, pca, train_lips_data = generate_mfcc_pca_lips_X_y_data(training_details, True, debug_mode=False)

#fit training models
print('\nFitting Models')
models = train_models(X_train, y_train, 'LinearRegression')

#get testing details (1 real video, 1 fake video)
print('\nLoading Real Testing Details')
real_testing_details = load_FakeAvCeleb_mfcc_lip_data(metadata, testing_ids, 'real', delta_order=1, debug_mode=False)
print('\nLoading Fake Testing Details')
fake_testing_details = load_FakeAvCeleb_mfcc_lip_data(metadata, testing_ids, 'wav2lip', delta_order=1, sample=True, debug_mode=False)

# evaluate testing
print('\nEvaluating Testing Details')
real_video_error, fake_video_error = eval_test_data(real_testing_details, fake_testing_details, models, lips_data_mean, pca, debug_mode=False)

sns.histplot(fake_video_error, label='Fake', color='blue')
sns.histplot(real_video_error, label='Real', color='red')
plt.legend()
plt.show()

## Version #2: Overfit With Single Video

In [None]:
#load the metadata
metadata = pd.read_csv('/content/drive/MyDrive/DeepFakeDetection/data/FakeAVCeleb_v1.2/meta_data.csv')
metadata = metadata[(metadata['method']=='real') | (metadata['method']=='wav2lip')]
metadata = metadata.rename(columns={'Unnamed: 9':'full_path'})
metadata['full_path'] = metadata['full_path'].str.replace('FakeAVCeleb/', '/content/drive/MyDrive/DeepFakeDetection/data/FakeAVCeleb_v1.2/')
metadata['full_path'] = metadata['full_path'] + '/' + metadata['path']
metadata = metadata[(metadata['gender']=='men')]

testing_ids = ['id00166', 'id00173', 'id00475']
test_real_error = []
test_fake_error = []

for test_id in testing_ids:
#generate the training & testing ids
  training_ids = np.array([test_id])
  testing_ids = np.array([test_id])

  #get the training details
  print('Loading Training Details')
  training_details = load_FakeAvCeleb_mfcc_lip_data(metadata, training_ids, 'real', debug_mode=False)

  #run PCA & generate X & Y
  print('Running PCA & Generating Training X & y')
  X_train, y_train, lips_data_mean, pca, lips_data = generate_mfcc_pca_lips_X_y_data(training_details, True, debug_mode=False)

  #fit training models
  print('Fitting Models')
  models = train_models(X_train, y_train, 'LinearRegression')

  #get testing details (1 real video, 1 fake video)
  print('Loading Real Testing Details')
  real_testing_details = load_FakeAvCeleb_mfcc_lip_data(metadata, testing_ids, 'real', debug_mode=False)
  print('Loading Fake Testing Details')
  fake_testing_details = load_FakeAvCeleb_mfcc_lip_data(metadata, testing_ids, 'wav2lip', sample=True, debug_mode=False)

  # evaluate testing
  print('Evaluating Testing Details')
  real_video_error, fake_video_error = eval_test_data(real_testing_details, fake_testing_details, models, lips_data_mean, pca, debug_mode=False)
  test_real_error.extend(real_video_error)
  test_fake_error.extend (fake_video_error)

sns.histplot(test_fake_error, label='Fake', color='blue')
sns.histplot(test_real_error, label='Real', color='red')
plt.legend()
plt.show()

## PCA & MFCC Search

In [19]:
#source: https://stackoverflow.com/questions/464864/how-to-get-all-possible-combinations-of-a-list-s-elements
#get all possible combinations of pca components
weight_combs = []
weight_idx = list(np.arange(0, 15, 1))
for L in range(len(weight_idx) + 1):
    for subset in itertools.combinations(weight_idx, L):
        weight_combs.append(subset)

In [None]:
#load the metadata
metadata = pd.read_csv('/content/drive/MyDrive/DeepFakeDetection/data/FakeAVCeleb_v1.2/meta_data.csv')
metadata = metadata[(metadata['method']=='real') | (metadata['method']=='wav2lip')]
metadata = metadata.rename(columns={'Unnamed: 9':'full_path'})
metadata['full_path'] = metadata['full_path'].str.replace('FakeAVCeleb/', '/content/drive/MyDrive/DeepFakeDetection/data/FakeAVCeleb_v1.2/')
metadata['full_path'] = metadata['full_path'] + '/' + metadata['path']
metadata = metadata[(metadata['gender']=='men') & (metadata['race']=='African')]

#generate the training & testing ids
training_ids = np.random.choice(metadata.source.unique(), int(metadata.source.unique().shape[0]*0.8), replace=False)
testing_ids = np.array(metadata[~metadata['source'].isin(training_ids)]['source'].unique())

#get the training details
print('\nLoading Training Details')
training_details = load_FakeAvCeleb_mfcc_lip_data(metadata, training_ids, 'real', debug_mode=False)

#run PCA & generate X & Y
print('\nRunning PCA & Generating Training X & y')
X_train, y_train, lips_data_mean, pca, train_lips_data = generate_mfcc_pca_lips_X_y_data(training_details, True, num_components=15, debug_mode=False)

#fit training models
print('\nFitting Models')
model_versions = []
for idx, weight_comb in enumerate(weight_combs[1:]):
  print(f'Model {idx+1} of {len(weight_combs[1:])}')
  models = train_models(X_train[:, weight_comb], y_train, 'LinearRegression')
  model_versions.append(models)

#get testing details (1 real video, 1 fake video)
print('\nLoading Real Testing Details')
real_testing_details = load_FakeAvCeleb_mfcc_lip_data(metadata, testing_ids, 'real', debug_mode=False)
print('\nLoading Fake Testing Details')
fake_testing_details = load_FakeAvCeleb_mfcc_lip_data(metadata, testing_ids, 'wav2lip', sample=True, debug_mode=False)

#sns.histplot(fake_video_error, label='Fake', color='blue')
#sns.histplot(real_video_error, label='Real', color='red')
#plt.legend()
#plt.show()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Model 11115 of 32767
Model 11116 of 32767
Model 11117 of 32767
Model 11118 of 32767
Model 11119 of 32767
Model 11120 of 32767
Model 11121 of 32767
Model 11122 of 32767
Model 11123 of 32767
Model 11124 of 32767
Model 11125 of 32767
Model 11126 of 32767
Model 11127 of 32767
Model 11128 of 32767
Model 11129 of 32767
Model 11130 of 32767
Model 11131 of 32767
Model 11132 of 32767
Model 11133 of 32767
Model 11134 of 32767
Model 11135 of 32767
Model 11136 of 32767
Model 11137 of 32767
Model 11138 of 32767
Model 11139 of 32767
Model 11140 of 32767
Model 11141 of 32767
Model 11142 of 32767
Model 11143 of 32767
Model 11144 of 32767
Model 11145 of 32767
Model 11146 of 32767
Model 11147 of 32767
Model 11148 of 32767
Model 11149 of 32767
Model 11150 of 32767
Model 11151 of 32767
Model 11152 of 32767
Model 11153 of 32767
Model 11154 of 32767
Model 11155 of 32767
Model 11156 of 32767
Model 11157 of 32767
Model 11158 of 32767
Model 11159

In [None]:
# evaluate testing
print('\nEvaluating Testing Details')
error_log_real = []
error_log_fake = []
for idx, models in enumerate(model_versions):
  print(f'Evaluating Model {idx+1} of {len(model_versions)}')
  real_video_error, fake_video_error = eval_test_data(real_testing_details, fake_testing_details, models, lips_data_mean, pca, components=weight_combs[idx], debug_mode=False)
  error_log_real.append(real_video_error)
  error_log_fake.append(fake_video_error)

In [None]:
'''
def evaluate(X_test, y_):
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=12)

  model = LinearRegression()
  model.fit(X_train, y_train)

  pred = model.predict(X_test)
  err = np.sqrt(mean_squared_error(y_true=y_test, y_pred=pred))/len(pred)

  return pred, err
'''


In [None]:
import itertools
comb = []
stuff = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
for L in range(len(stuff) + 1):
    for subset in itertools.combinations(stuff, L):
        comb.append(subset)

In [None]:
len(comb)*20