In [69]:
! pip install datasets --quiet
! pip install evaluate --quiet
! pip install transformers --quiet
!pip install huggingface_hub --quiet

!pip install accelerate -U --quiet
!pip install transformers[torch] --quiet
!pip install shap --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m532.9/532.9 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25h

### Generate Labels for Data

In [67]:
# create metadata (labels) to create the dataset object

import os
import csv

directory = "/content/drive/MyDrive/deceptive-16khz/"
data = []

for filename in os.listdir(directory):
    if filename.endswith(".wav"):
        label = filename.split("_")[1]
        label = 1 if label == 'lie' else 0
        data.append((filename, label))

csv_file_path = "/content/drive/MyDrive/deceptive-16khz/metadata.csv"


with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["file_name", "label"])
    writer.writerows(data)

print(f"Metadata file created at {csv_file_path}")


Metadata file created at /content/drive/MyDrive/deceptive-16khz/metadata.csv


### Split data into sliding windows

In [72]:
# Windowing function
def window_audio(audio_array, window_size=10, overlap=0.75):
    sr = 16000
    window_size_samples = int(window_size * sr)
    overlap_samples = int(window_size_samples * overlap)

    windows = []
    for i in range(0, len(audio_array) - window_size_samples, overlap_samples):
        window = audio_array[i:i + window_size_samples]
        windows.append(window)

    return windows


In [73]:
def preprocess_function(examples):
    audio_arrays = [x["array"] for x in examples["audio"]]
    inputs = feature_extractor(
        audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000*10, truncation=True)
    return inputs

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    # Calculate precision, recall, and f1 score
    precision = precision_score(y_true=eval_pred.label_ids, y_pred=predictions, average='weighted')
    recall = recall_score(y_true=eval_pred.label_ids, y_pred=predictions, average='weighted')
    f1 = f1_score(y_true=eval_pred.label_ids, y_pred=predictions, average='weighted')

    return {
        "precision": precision,
        "recall": recall,
        "f1_score": f1}

### Training


In [None]:
from datasets import load_dataset
from datasets import Audio
from datasets import DatasetDict, Dataset
from transformers import AutoFeatureExtractor
import evaluate
import numpy as np
from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score,confusion_matrix,precision_score, recall_score
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
from collections import defaultdict
from transformers import EarlyStoppingCallback


#read data
dataset = load_dataset("audiofolder", data_dir="/content/drive/MyDrive/deceptive-16khz",split='train')
dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
print(dataset)

# apply windowing to the dataset
windowed_dataset = defaultdict(list)
for example in dataset:
    audio_array = example["audio"]["array"]
    windows = window_audio(audio_array)

    for window in windows:
        windowed_dataset["audio"].append({"array": window})
        windowed_dataset["label"].append(example["label"])

dataset = Dataset.from_dict(windowed_dataset)
print(dataset)
feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/hubert-base-ls960")


num_labels = 2
kf = KFold(n_splits=5, shuffle=True, random_state=42)
all_f1_scores = []
all_confusion_matrices = []
for fold, (train_idx, test_idx) in enumerate(kf.split(dataset)):
    print(f"\n----- Fold {fold + 1} -----")

    # Create datasets for this fold
    train_dataset = dataset.select(train_idx)
    test_dataset = dataset.select(test_idx)

    # Preprocess the datasets
    encoded_train_dataset = train_dataset.map(preprocess_function, remove_columns="audio", batched=True)
    encoded_test_dataset = test_dataset.map(preprocess_function, remove_columns="audio", batched=True)

    # Model initialization
    num_labels = 2
    model = AutoModelForAudioClassification.from_pretrained(
        "facebook/hubert-base-ls960", num_labels=num_labels
    )

    training_args = TrainingArguments(
        output_dir= "hubert_deception-1",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=3e-5,
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        per_device_eval_batch_size=8,
        num_train_epochs=10,
        warmup_ratio=0.1,
        logging_steps=10,
        load_best_model_at_end=True,
        push_to_hub=False,
      )

    feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/hubert-large-ls960-ft")

    trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=encoded_train_dataset,
      eval_dataset=encoded_test_dataset,
      tokenizer=feature_extractor,
      compute_metrics=compute_metrics,
      callbacks=[EarlyStoppingCallback(3, 0.0)]
  )

    trainer.train()

    eval_results = trainer.evaluate()

    # Print F1 score for this fold
    print(f"Fold {fold + 1} - F1 Score: {eval_results['eval_f1_score']}")
        # Append F1 score and confusion matrix to lists
    all_f1_scores.append(eval_results['eval_f1_score'])

# Calculate mean F1 score
mean_f1_score = np.mean(all_f1_scores)
print(f"\nMean F1 Score across all folds: {mean_f1_score}")



Resolving data files:   0%|          | 0/117 [00:00<?, ?it/s]

Dataset({
    features: ['audio', 'label'],
    num_rows: 115
})
Dataset({
    features: ['audio', 'label'],
    num_rows: 333
})

----- Fold 1 -----


Map:   0%|          | 0/266 [00:00<?, ? examples/s]

Map:   0%|          | 0/67 [00:00<?, ? examples/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.weight', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'classifier.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


preprocessor_config.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.690371,0.257518,0.507463,0.341658
2,0.679700,0.616628,0.807989,0.791045,0.787632
4,0.544500,0.436827,0.904338,0.895522,0.894819
6,0.457800,0.323328,0.928721,0.925373,0.925173
8,0.323500,0.283696,0.916245,0.910448,0.910047
9,0.322600,0.279669,0.916245,0.910448,0.910047


  _warn_prf(average, modifier, msg_start, len(result))


Fold 1 - F1 Score: 0.9100465414861177

----- Fold 2 -----


Map:   0%|          | 0/266 [00:00<?, ? examples/s]

Map:   0%|          | 0/67 [00:00<?, ? examples/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.weight', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'classifier.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.69351,0.228113,0.477612,0.308759
2,0.674400,0.608473,0.728282,0.641791,0.612793
4,0.532900,0.518286,0.831677,0.80597,0.803617
6,0.404200,0.484304,0.831677,0.80597,0.803617
8,0.289700,0.481652,0.841902,0.820896,0.819291


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 2 - F1 Score: 0.8347931702987411

----- Fold 3 -----


Map:   0%|          | 0/266 [00:00<?, ? examples/s]

Map:   0%|          | 0/67 [00:00<?, ? examples/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.weight', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'classifier.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.684285,0.609142,0.567164,0.447949
2,0.664900,0.570319,0.720808,0.716418,0.717179
4,0.501400,0.404073,0.848106,0.835821,0.832073
6,0.420200,0.350914,0.901824,0.880597,0.877237
8,0.317500,0.350782,0.901824,0.880597,0.877237
9,0.306500,0.329529,0.901824,0.880597,0.877237


Fold 3 - F1 Score: 0.8772367057769314

----- Fold 4 -----


Map:   0%|          | 0/267 [00:00<?, ? examples/s]

Map:   0%|          | 0/66 [00:00<?, ? examples/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.weight', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'classifier.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.686357,0.297521,0.545455,0.385027
2,0.674700,0.594201,0.722078,0.712121,0.702713
4,0.487900,0.420622,0.852098,0.848485,0.847061
6,0.377300,0.365125,0.900826,0.878788,0.875524
8,0.355000,0.339492,0.911205,0.893939,0.891634


  _warn_prf(average, modifier, msg_start, len(result))


Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.686357,0.297521,0.545455,0.385027
2,0.674700,0.594201,0.722078,0.712121,0.702713
4,0.487900,0.420622,0.852098,0.848485,0.847061
6,0.377300,0.365125,0.900826,0.878788,0.875524
8,0.328000,0.327293,0.911205,0.893939,0.891634
9,0.286200,0.326978,0.911205,0.893939,0.891634


Fold 4 - F1 Score: 0.8916342792626528

----- Fold 5 -----


Map:   0%|          | 0/267 [00:00<?, ? examples/s]

Map:   0%|          | 0/66 [00:00<?, ? examples/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.weight', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'classifier.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.686231,0.683777,0.651515,0.591056
2,0.678200,0.573075,0.725541,0.727273,0.719968
4,0.507000,0.455773,0.75586,0.757576,0.753792
6,0.456700,0.305077,0.921212,0.909091,0.906656
8,0.348400,0.275331,0.932851,0.924242,0.922659
9,0.379100,0.27108,0.932851,0.924242,0.922659


Fold 5 - F1 Score: 0.9226590964973065

Mean F1 Score across all folds: 0.8872739586643499


In [None]:
from datasets import load_dataset, Audio

dataset = load_dataset("audiofolder", data_dir="/content/drive/MyDrive/deceptive-16khz",split = "train")
dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))

from transformers import pipeline

classifier = pipeline("audio-classification", model="/content/hubert_deception-1/checkpoint-80")
for file in dataset :
  audio = file["audio"]["path"]
  print(classifier(audio))

Resolving data files:   0%|          | 0/117 [00:00<?, ?it/s]

In [None]:
from transformers import pipeline

classifier = pipeline("audio-classification", model="/content/hubert_deception-1/checkpoint-80")
for file in dataset :
  audio = file["audio"]["path"]
  print(classifier(audio))

[{'score': 0.6854604482650757, 'label': 'LABEL_1'}, {'score': 0.3145395517349243, 'label': 'LABEL_0'}]
[{'score': 0.9022054672241211, 'label': 'LABEL_1'}, {'score': 0.09779457747936249, 'label': 'LABEL_0'}]
[{'score': 0.7441938519477844, 'label': 'LABEL_0'}, {'score': 0.25580617785453796, 'label': 'LABEL_1'}]
[{'score': 0.9032216668128967, 'label': 'LABEL_1'}, {'score': 0.09677833318710327, 'label': 'LABEL_0'}]
[{'score': 0.8836219310760498, 'label': 'LABEL_1'}, {'score': 0.11637815088033676, 'label': 'LABEL_0'}]
[{'score': 0.8940549492835999, 'label': 'LABEL_1'}, {'score': 0.10594508796930313, 'label': 'LABEL_0'}]
[{'score': 0.8764867782592773, 'label': 'LABEL_0'}, {'score': 0.12351320683956146, 'label': 'LABEL_1'}]
[{'score': 0.8802664279937744, 'label': 'LABEL_0'}, {'score': 0.11973357945680618, 'label': 'LABEL_1'}]
[{'score': 0.8819144368171692, 'label': 'LABEL_0'}, {'score': 0.11808554828166962, 'label': 'LABEL_1'}]
[{'score': 0.8738248944282532, 'label': 'LABEL_0'}, {'score': 0.1

# Train Model on Bag of Lies Dataset

### Generate Metadata

In [66]:
import pandas as pd
import re
import os
import csv

annot = pd.read_csv('/content/drive/MyDrive/bag-of-lies/BagOfLies/Annotations.csv')
annot[['video','truth']]
annot['video'] = annot['video'].str.replace("./Finalised/","")
annot['video'] = annot['video'].str.replace("/","_")
annot['video'] = annot['video'].str.replace("_video.mp4","")

def rearrange_string(input_str):
    parts = input_str.split('_')
    rearranged_str = f"{parts[2]}_{parts[3]}_{parts[0]}_{parts[1]}"
    return rearranged_str

for i in range(len(annot['video'])):
  annot['video'][i] = rearrange_string(annot['video'][i])


directory = "/content/drive/MyDrive/bag-of-lies/BagOfLies/Audio"
data = []
for filename in os.listdir(directory):
    if filename.endswith(".wav"):
        user_run = filename.replace(".wav", "")
        label = annot.copy().loc[annot['video'] == user_run]['truth'].reset_index()['truth'][0]
        if label == 1:
          label = 0
        else:
          label = 1
        data.append((filename, label))

csv_file_path = "/content/drive/MyDrive/bag-of-lies/BagOfLies/metadata.csv"


with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["file_name", "label"])
    writer.writerows(data)

print(f"Metadata file created at {csv_file_path}")



  annot['video'] = annot['video'].str.replace("./Finalised/","")
  annot['video'] = annot['video'].str.replace("_video.mp4","")
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annot['video'][i] = rearrange_string(annot['video'][i])


Metadata file created at /content/drive/MyDrive/bag-of-lies/BagOfLies/metadata.csv


### Training

In [70]:
from datasets import load_dataset
from datasets import Audio
from datasets import DatasetDict, Dataset
from transformers import AutoFeatureExtractor
import evaluate
import numpy as np
from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score,confusion_matrix,precision_score, recall_score
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
from collections import defaultdict
from transformers import EarlyStoppingCallback

In [None]:
#read data
dataset = load_dataset("audiofolder", data_dir="/content/drive/MyDrive/bag-of-lies/BagOfLies/Audio",split='train')
dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))

# apply windowing to the dataset
windowed_dataset = defaultdict(list)
for example in dataset:
    audio_array = example["audio"]["array"]
    windows = window_audio(audio_array)

    for window in windows:
        windowed_dataset["audio"].append({"array": window})
        windowed_dataset["label"].append(example["label"])

dataset = Dataset.from_dict(windowed_dataset)

########################## training ##############################################

feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/hubert-base-ls960")

num_labels = 2
kf = KFold(n_splits=5, shuffle=True, random_state=42)

all_f1_scores = []
all_confusion_matrices = []
for fold, (train_idx, test_idx) in enumerate(kf.split(dataset)):
    print(f"\n----- Fold {fold + 1} -----")

    train_dataset = dataset.select(train_idx)
    test_dataset = dataset.select(test_idx)

    encoded_train_dataset = train_dataset.map(preprocess_function, remove_columns="audio", batched=True)
    encoded_test_dataset = test_dataset.map(preprocess_function, remove_columns="audio", batched=True)

    num_labels = 2
    model = AutoModelForAudioClassification.from_pretrained(
        "facebook/hubert-base-ls960", num_labels=num_labels
    )

    training_args = TrainingArguments(
        output_dir= "hubert_deception-1",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=3e-5,
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        per_device_eval_batch_size=8,
        num_train_epochs=10,
        warmup_ratio=0.1,
        logging_steps=10,
        load_best_model_at_end=True,
        push_to_hub=False,
      )

    feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/hubert-large-ls960-ft")

    trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=encoded_train_dataset,
      eval_dataset=encoded_test_dataset,
      tokenizer=feature_extractor,
      compute_metrics=compute_metrics,
      callbacks=[EarlyStoppingCallback(3, 0.0)]
  )

    trainer.train()

    eval_results = trainer.evaluate()

    # Print F1 score for this fold
    print(f"Fold {fold + 1} - F1 Score: {eval_results['eval_f1_score']}")
        # Append F1 score and confusion matrix to lists
    all_f1_scores.append(eval_results['eval_f1_score'])

# Calculate mean F1 score
mean_f1_score = np.mean(all_f1_scores)
print(f"\nMean F1 Score across all folds: {mean_f1_score}")


Resolving data files:   0%|          | 0/326 [00:00<?, ?it/s]


----- Fold 1 -----


Map:   0%|          | 0/240 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.681259,0.795855,0.672131,0.619181
2,0.674200,0.64286,0.652186,0.639344,0.617907
4,0.644800,0.614188,0.795855,0.672131,0.619181
6,0.608900,0.59669,0.795855,0.672131,0.619181
8,0.646800,0.602131,0.795855,0.672131,0.619181
9,0.612600,0.602095,0.795855,0.672131,0.619181


Fold 1 - F1 Score: 0.6191807514720209

----- Fold 2 -----


Map:   0%|          | 0/241 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.689641,0.580952,0.566667,0.568113
1,0.693400,0.685877,0.501743,0.45,0.379402
2,0.679800,0.654413,0.610716,0.616667,0.598953
4,0.642600,0.633252,0.724613,0.683333,0.647759
5,0.634700,0.633468,0.724613,0.683333,0.647759
6,0.631100,0.637001,0.724613,0.683333,0.647759


Fold 2 - F1 Score: 0.6477586019754694

----- Fold 3 -----


Map:   0%|          | 0/241 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.687182,0.797,0.65,0.606885
1,0.691500,0.672978,0.764866,0.666667,0.637959
2,0.683800,0.657402,0.632097,0.616667,0.609866
4,0.655100,0.623064,0.617094,0.6,0.59095
5,0.648700,0.613893,0.676106,0.633333,0.615509
6,0.629800,0.602663,0.691087,0.666667,0.659125
8,0.629900,0.604752,0.726553,0.65,0.623556
9,0.627200,0.604666,0.726553,0.65,0.623556


Fold 3 - F1 Score: 0.6591251885369532

----- Fold 4 -----


Map:   0%|          | 0/241 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.688994,0.71234,0.583333,0.519626
1,0.696100,0.680625,0.582961,0.583333,0.582985
2,0.680300,0.672949,0.559689,0.55,0.542016
4,0.665600,0.661451,0.567643,0.566667,0.556762
5,0.648200,0.644886,0.575754,0.566667,0.560831
6,0.646800,0.644258,0.502245,0.5,0.498888
8,0.608100,0.643003,0.503429,0.5,0.496652
9,0.630200,0.643143,0.503429,0.5,0.496652


Fold 4 - F1 Score: 0.5662067643958992

----- Fold 5 -----


Map:   0%|          | 0/241 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1 Score
0,No log,0.704043,0.788679,0.466667,0.396475
1,0.697000,0.697123,0.797,0.516667,0.472348
2,0.677400,0.69377,0.797,0.516667,0.472348


In [13]:
"""
import pydub
def convert_video_to_wav(video_file, output_file):
    audio = pydub.AudioSegment.from_file(video_file, format="mp4")
    audio.export(output_file, format="wav")
    if os.path.exists(video_file):
      os.remove(video_file)
"""

"""
import os
def get_video_files(root_folder):
    video_files = []
    for user_folder in os.listdir(root_folder):
        user_path = os.path.join(root_folder, user_folder)
        for run_folder in os.listdir(user_path):
            run_path = os.path.join(user_path, run_folder)
            for filename in os.listdir(run_path):
                if filename.endswith(".mp4") :
                    video_file = os.path.join(run_path, filename)
                    video_files.append(video_file)
    return video_files
"""

In [27]:
"""
for i, video_file in enumerate(video_files):
    user_folder = os.path.basename(os.path.dirname(video_file))
    run_number = os.path.basename(os.path.dirname(os.path.dirname(video_file)))
    filename, extension = os.path.splitext(os.path.basename(video_file))

    output_file = os.path.join("/content/drive/MyDrive/bag-of-lies/BagOfLies/Audio", f"{user_folder}_{run_number}.wav")
    convert_video_to_wav(video_file, output_file)

    print(f"Converted {video_file} to {output_file}")
"""

'\nfor i, video_file in enumerate(video_files):\n    user_folder = os.path.basename(os.path.dirname(video_file))\n    run_number = os.path.basename(os.path.dirname(os.path.dirname(video_file)))\n    filename, extension = os.path.splitext(os.path.basename(video_file))\n\n    output_file = os.path.join("/content/drive/MyDrive/bag-of-lies/BagOfLies/Audio", f"{user_folder}_{run_number}.wav")\n    convert_video_to_wav(video_file, output_file)\n\n    print(f"Converted {video_file} to {output_file}")\n'