In [1]:
import sys
sys.path.append("/home/ubuntu/MultiModalDeepFake")
import nemo.collections.asr as nemo_asr 
import pandas as pd

2023-05-18 04:39:57.876903: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-18 04:39:57.988770: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-18 04:39:58.546449: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-05-18 04:39:58.546515: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] 

In [2]:
from packages.LJDataLoader import LJDataLoader
from packages.AudioEmbeddingsManager import AudioEmbeddingsManager
from packages.ModelManager import ModelManager
from packages.CadenceModelManager import CadenceModelManager
import packages.AnalysisManager as am
from packages.SmileFeatureManager import SmileFeatureManager

################################################################################
###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)
###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)
################################################################################



In [3]:
def generate_split(fake_cols, file_path):

    
    loader = LJDataLoader(data_path=file_path)
    loader.sample(0.1)
    loader.splitData()

    source_architectures = ['Full_Band_MelGan', 'HifiGan', 'MelGan', 'MelGanLarge', 'Multi_Band_MelGan', 'Parallel_WaveGan', 'Waveglow']
    new_col_name = 'RandWaveFake'
    loader.selectRandomArchitecture(target_col=new_col_name, source_cols=source_architectures)
    
    source_architectures = ['RandWaveFake', 'ElevenLabs', 'UberDuck']
    new_col_name = 'Fake'
    loader.selectRandomArchitecture(target_col=new_col_name, source_cols=source_architectures)

    #data_df = loader.generateFinalDataFrame(real_col='Real', fake_cols=['RandWaveFake', 'ElevenLabs', 'UberDuck'])
    data_df = loader.generateFinalDataFrame(real_col='Real', fake_cols=fake_cols)
    
    train_count = data_df[data_df['type'] == 'train'].shape[0]
    dev_count = data_df[data_df['type'] == 'dev'].shape[0]
    test_count = data_df[data_df['type'] == 'test'].shape[0]

    print(f'# of Train instances: {train_count}')
    print(f'# of Dev instances: {dev_count}')
    print(f'# of Test instances: {test_count}')
    
    return data_df

In [4]:
def generate_features(data_df):
    speaker_model = nemo_asr.models.EncDecSpeakerLabelModel.from_pretrained(model_name='titanet_large')
    embedding_manager = AudioEmbeddingsManager(model=speaker_model, data=data_df)
    em_feature_df, em_feature_cols = embedding_manager.generateFeatureDf()
    
    cadence_manager = CadenceModelManager(data_df)
    cad_feature_df, cad_feature_cols, scalar =  cadence_manager.run_cadence_feature_extraction_pipeline() # Add param for load features or not
    
    smile_manager = SmileFeatureManager(data_df)
    os_binary_feature_df, os_binary_feature_cols = smile_manager.generateFeatureDf('random_forest', label_type='binary')
    os_multiclass_feature_df, os_multiclass_feature_cols = smile_manager.generateFeatureDf('random_forest', label_type='multiclass')
    
    feature_store = {}
    feature_store['titanet'] = (em_feature_df, em_feature_cols)
    feature_store['openSmile_binary'] = (os_binary_feature_df, os_binary_feature_cols)
    feature_store['openSmile_multiclass'] = (os_multiclass_feature_df, os_multiclass_feature_cols)
    feature_store['cadence'] = (cad_feature_df, cad_feature_cols)
    
    return feature_store
    
    

In [5]:
def train_eval(feature_store, fake_cols):
    results_cols = ['model', 'fake_cols', 'label_type', 'acc', 'cls_acc', 'loss']
    results = pd.DataFrame(columns=results_cols)
    
    for label_type in ['label', 'multiclass_label']:
        for k, v in feature_store.items():
            model_manager = ModelManager('logreg', v[0], v[1], merge_train_dev=True)
            model_manager.trainPredict(label_col=label_type)
            print(model_manager.class_accuracy)
            results = results.append(pd.DataFrame({'model':[k], 'label_type':[label_type], 'fake_cols':[fake_cols], 'acc':[model_manager.accuracy], 'cls_acc':[model_manager.class_accuracy],  'loss':[model_manager.log_loss_value]}))
    
    return results
    
    

In [6]:
def run(fake_cols, metadata_path, name):
    data_df = generate_split(fake_cols, metadata_path)
    feature_store = generate_features(data_df)
    results = train_eval(feature_store, fake_cols)
    results.to_csv(f'/home/ubuntu/data/results/{name}.csv', index=False)
    
    

In [7]:
#file_path = '/home/ubuntu/data/wavefake_data/LJ_metadata_16000KHz.csv'
#run(['ElevenLabs'], file_path, '16KHz_ElevenLabs')

For the columns, we want:
- Which dataset are we using
- What are the fake cols
- Binary/Multi-Class
- Feature Generation Method
- Accuracy
- Loss

In [8]:
#file_path = '/home/ubuntu/data/wavefake_data/LJ_metadata_16000KHz.csv'
#run(['UberDuck'], file_path, '16KHz_UberDuck')

In [9]:
#file_path = '/home/ubuntu/data/wavefake_data/LJ_metadata_16000KHz.csv'
#run(['UberDuck', 'ElevenLabs'], file_path, '16KHz_ElevenLabs_and_UberDuck')

In [10]:
file_path = '/home/ubuntu/data/wavefake_data/LJ_metadata_16000KHz.csv'
run(['ElevenLabs', 'UberDuck', 'RandWaveFake'], file_path, '16KHz_Mix')

# of Train instances: 3132
# of Dev instances: 1044
# of Test instances: 1048
[NeMo I 2023-05-18 04:40:03 cloud:56] Found existing object /home/ubuntu/.cache/torch/NeMo/NeMo_1.15.0/titanet-l/492c0ab8416139171dc18c21879a9e45/titanet-l.nemo.
[NeMo I 2023-05-18 04:40:03 cloud:62] Re-using file from: /home/ubuntu/.cache/torch/NeMo/NeMo_1.15.0/titanet-l/492c0ab8416139171dc18c21879a9e45/titanet-l.nemo
[NeMo I 2023-05-18 04:40:03 common:913] Instantiating model from pre-trained checkpoint


[NeMo W 2023-05-18 04:40:03 modelPT:156] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
    sample_rate: 16000
    labels: null
    batch_size: 64
    shuffle: true
    time_length: 3
    is_tarred: false
    tarred_audio_filepaths: null
    tarred_shard_strategy: scatter
    augmentor:
      noise:
        manifest_path: /manifests/noise/rir_noise_manifest.json
        prob: 0.5
        min_snr_db: 0
        max_snr_db: 15
      speed:
        prob: 0.5
        sr: 16000
        resample_type: kaiser_fast
        min_speed_rate: 0.95
        max_speed_rate: 1.05
    num_workers: 15
    pin_memory: true
    
[NeMo W 2023-05-18 04:40:03 modelPT:163] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_valida

[NeMo I 2023-05-18 04:40:03 label_models:126] Setting angular: true/false in decoder is deprecated and will be removed in 1.13 version, use specific loss with _target_
[NeMo I 2023-05-18 04:40:03 features:267] PADDING: 16
[NeMo I 2023-05-18 04:40:04 save_restore_connector:243] Model EncDecSpeakerLabelModel was successfully restored from /home/ubuntu/.cache/torch/NeMo/NeMo_1.15.0/titanet-l/492c0ab8416139171dc18c21879a9e45/titanet-l.nemo.


100%|██████████| 10/10 [00:02<00:00,  3.90it/s]


Normalizing amplitudes
Window size: 50
Truncating audio 100/5224 (2%)
Truncating audio 200/5224 (4%)
Truncating audio 300/5224 (6%)
Truncating audio 400/5224 (8%)
Truncating audio 500/5224 (10%)
Truncating audio 600/5224 (11%)
Truncating audio 700/5224 (13%)
Truncating audio 800/5224 (15%)
Truncating audio 900/5224 (17%)
Truncating audio 1000/5224 (19%)
Truncating audio 1100/5224 (21%)
Truncating audio 1200/5224 (23%)
Truncating audio 1300/5224 (25%)
Truncating audio 1400/5224 (27%)
Truncating audio 1500/5224 (29%)
Truncating audio 1600/5224 (31%)
Truncating audio 1700/5224 (33%)
Truncating audio 1800/5224 (34%)
Truncating audio 1900/5224 (36%)
Truncating audio 2000/5224 (38%)
Truncating audio 2100/5224 (40%)
Truncating audio 2200/5224 (42%)
Truncating audio 2300/5224 (44%)
Truncating audio 2400/5224 (46%)
Truncating audio 2500/5224 (48%)
Truncating audio 2600/5224 (50%)
Truncating audio 2700/5224 (52%)
Truncating audio 2800/5224 (54%)
Truncating audio 2900/5224 (56%)
Truncating audio 

      ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
    
      arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
    
      ret = ret.dtype.type(ret / rcount)
    
      return _methods._mean(a, axis=axis, dtype=dtype,
    
      ret = ret.dtype.type(ret / rcount)
    


ValueError: Input X contains NaN.
LogisticRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [None]:
#file_path = '/home/ubuntu/data/wavefake_data/LJ_metadata_16KHz_Laundered.csv'
#run(['ElevenLabs'], file_path, '16KHz_ElevenLabs_Laundered')

In [None]:
#file_path = '/home/ubuntu/data/wavefake_data/LJ_metadata_16KHz_Laundered.csv'
#run(['UberDuck'], file_path, '16KHz_UberDuck_Laundered')

In [None]:
#file_path = '/home/ubuntu/data/wavefake_data/LJ_metadata_16KHz_Laundered.csv'
#run(['UberDuck', 'ElevenLabs'], file_path, '16KHz_ElevenLabs_and_UberDuck_Laundered')

In [None]:
#file_path = '/home/ubuntu/data/wavefake_data/LJ_metadata_16KHz_Laundered.csv'
#run(['ElevenLabs', 'UberDuck', 'RandWaveFake'], file_path, '16KHz_Mix_Laundered')