In [15]:
# This notebook can be used to run the pragmatic similarity metric (https://www.cs.utep.edu/nigel/papers/similarity-kos.pdf) on the DRAL dataset (https://www.cs.utep.edu/nigel/papers/interspeech2023.pdf & https://www.cs.utep.edu/nigel/papers/dral-techreport2.pdf).
# The objective of this metric is to find sentences with similar pragmatic intent in the dataset, even when the textual content is quite different.

In [2]:
!pip install -r ../requirements.txt -q

In [3]:
import sys
import os
import random

# Add parent directory to Python path
sys.path.append(os.path.abspath('..'))

import cosine_similarity as cs
import feature_extractor as fe
import feature_selection as fs
import similarity_finder as sf

similarity_finder = sf.SimilarityFinder(feature_selection=True, directory_path=os.path.dirname(os.getcwd()), clips_for_comparison_path='data/dral_en.csv')


  from pkg_resources import resource_stream, resource_exists


pygame 2.6.1 (SDL 2.28.4, Python 3.12.11)
Hello from the pygame community. https://www.pygame.org/contribute.html
torch version: 2.8.0+cpu
torch audio version: 2.8.0+cpu
device: cpu
Sample Rate: 16000
model class: <class 'torchaudio.pipelines._wav2vec2.utils._Wav2Vec2Model'>


In [4]:
# Obtain DRAL data and put in the right place

import os, tarfile, shutil, requests
from tqdm.notebook import tqdm

# Paths
data_dir = "../data"
dral_dir = os.path.join(data_dir, "dral")
target_check = os.path.join(dral_dir, "fragments-short")
url = "https://www.cs.utep.edu/nigel/dral/DRAL-16kHz.tgz"
tgz_path = os.path.join(data_dir, "DRAL-16kHz.tgz")

if not os.path.exists(target_check):
    os.makedirs(dral_dir, exist_ok=True)

    print("Downloading DRAL dataset...")
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        total_size = int(r.headers.get('content-length', 0))
        block_size = 1024 * 1024
        with open(tgz_path, 'wb') as f, tqdm(
            total=total_size, unit='B', unit_scale=True, desc="Downloading"
        ) as pbar:
            for chunk in r.iter_content(chunk_size=block_size):
                if chunk:
                    f.write(chunk)
                    pbar.update(len(chunk))

    print("Extracting selected folders...")
    with tarfile.open(tgz_path, "r:gz") as tar:
        wanted = {"fragments-short", "metadata"}
        members = [
            m for m in tar.getmembers()
            if m.name.startswith("DRAL16kHz/")
            and len(m.name.split("/")) > 1
            and m.name.split("/")[1] in wanted
        ]
        tar.extractall(path=data_dir, members=members)

    # Move wanted folders into data/dral/
    extracted_root = os.path.join(data_dir, "DRAL16kHz")
    for folder in wanted:
        src = os.path.join(extracted_root, folder)
        dst = os.path.join(dral_dir, folder)
        if os.path.exists(src):
            shutil.move(src, dst)

    # Cleanup
    os.remove(tgz_path)
    shutil.rmtree(extracted_root, ignore_errors=True)

    print("DRAL data ready in:", os.path.abspath(dral_dir))
else:
    print("DRAL data already present at:", os.path.abspath(dral_dir))


DRAL data already present at: C:\Users\LENOVO\UTEP\Coding\Pragmatic_Similarity_Computation\data\dral


In [9]:
# code to make a call to find similar clips and record its overlap
def find_similar_clips(id, path):
    global recently_updated_clips
    first_place, second_place, third_place, thousandth_place, fifteen_hundredth_place, last_place = \
        similarity_finder.find_similar(path)
    clip = {'id': id,
            'path': path,
            'duration': None,
            'best_path': first_place[1],
            'best_cos': round(first_place[0].item(), 2),
            'second_path': second_place[1],
            'second_cos': round(second_place[0].item(), 2),
            'third_path': third_place[1],
            'third_cos': round(third_place[0].item(), 2),            
            'thousandth_path': thousandth_place[1],
            'thousandth_cos': round(thousandth_place[0].item(), 2),
            'fifteen_hundredth_path': fifteen_hundredth_place[1],
            'fifteen_hundredth_cos': round(fifteen_hundredth_place[0].item(), 2),
            'worst_path': last_place[1],
            'worst_cos': round(last_place[0].item(), 2)
            }
    print(f"\n{clip}\n")
    recently_updated_clips.append(clip)
    return clip

In [10]:
# code to record a 5 second audio fragment yourself and have it compared with the dataset.
# expect cosine similarity to be not too high because of difference between monologue and dialogue

import sounddevice as sd
from datetime import datetime 
from scipy.io.wavfile import write

def record_audio():
    fs = 16000
    seconds = 5
    
    print("Recording...")
    myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=1)
    sd.wait()  # Wait until recording is finished
    
    recording_name = f"recording_{datetime.now():%Y%m%d_%H%M%S}.wav"
    recording_path = os.path.join(os.path.dirname(os.getcwd()), "clips", recording_name)
    write(recording_path, fs, myrecording)
    print("Saved recording") 
    return recording_path


In [15]:
sim_eval_option = "choose_random_dataset_clip" # Choose from "record_audio", "choose_existing_clip", "choose_specific_dataset_clip", "choose_random_dataset_clip"
dataset_loc = "data/dral/fragments-short"

if sim_eval_option == "record_audio":
    audio_path = record_audio()
elif sim_eval_option == "choose_existing_clip": # assumes audio file stored in clips dir
    audio_filename = "LJ025-0076.wav" # a random LJ Speech sample that is out of distribution (read speech rather than dialogue speech)
    audio_path = os.path.join(os.path.dirname(os.getcwd()), "clips", audio_filename)
elif sim_eval_option == "choose_specific_dataset_clip": 
    audio_filename = "EN_001_19.wav" # provide name of specific file in DRAL dataset
    audio_path = os.path.join(os.path.dirname(os.getcwd()), dataset_loc, audio_filename)
elif sim_eval_option == "choose_random_dataset_clip":
    audio_folder = os.path.join(os.path.dirname(os.getcwd()), dataset_loc)
    prefix = "EN_" if similarity_finder.language.lower() == "english" else "ES_"
    files = [f for f in os.listdir(audio_folder) if f.startswith(prefix) and f.endswith((".wav", ".mp3"))]
    if not files:
        raise FileNotFoundError(f"No {prefix}-files found in {audio_folder}")
    audio_path = os.path.join(audio_folder, random.choice(files))
else:
    raise NotImplementedError("This evaluation option is not implemented")

clip = find_similar_clips(0, audio_path) # ARGS: audio_id and audio_path. Possible to add multiple or create for loop

# some of the short fragments (e.g. EN_101_18) have multiple speakers speaking

C:\Users\LENOVO\UTEP\Coding\Pragmatic_Similarity_Computation\data\dral\fragments-short\EN_032_11.wav C:\Users\LENOVO\UTEP\Coding\Pragmatic_Similarity_Computation\data\dral\fragments-short\EN_032_11.wav
Skipping same clip comparison

{'id': 0, 'path': 'C:\\Users\\LENOVO\\UTEP\\Coding\\Pragmatic_Similarity_Computation\\data/dral/fragments-short\\EN_032_11.wav', 'duration': None, 'best_path': 'C:\\Users\\LENOVO\\UTEP\\Coding\\Pragmatic_Similarity_Computation\\data\\dral\\fragments-short\\EN_033_1.wav', 'best_cos': 0.81, 'second_path': 'C:\\Users\\LENOVO\\UTEP\\Coding\\Pragmatic_Similarity_Computation\\data\\dral\\fragments-short\\EN_020_13.wav', 'second_cos': 0.79, 'third_path': 'C:\\Users\\LENOVO\\UTEP\\Coding\\Pragmatic_Similarity_Computation\\data\\dral\\fragments-short\\EN_026_41.wav', 'third_cos': 0.79, 'thousandth_path': 'C:\\Users\\LENOVO\\UTEP\\Coding\\Pragmatic_Similarity_Computation\\data\\dral\\fragments-short\\EN_078_26.wav', 'thousandth_cos': 0.53, 'fifteen_hundredth_path': '

In [16]:
# play selected clips one by one, with a pause of a second
similarity_finder.play_clip(clip['path'])
similarity_finder.play_clip(clip['best_path'])
similarity_finder.play_clip(clip['second_path'])
similarity_finder.play_clip(clip['third_path'])
similarity_finder.play_clip(clip['thousandth_path'])
similarity_finder.play_clip(clip['fifteen_hundredth_path'])
similarity_finder.play_clip(clip['worst_path'])

"""
for clip in recently_updated_clips:
    similarity_finder.play_clip(clip['path'])
    similarity_finder.play_clip(clip['best_path'])
    similarity_finder.play_clip(clip['second_path'])
    similarity_finder.play_clip(clip['thousandth_path'])
    similarity_finder.play_clip(clip['fifteen_hundredth_path'])
    similarity_finder.play_clip(clip['worst_path'])
"""

"\nfor clip in recently_updated_clips:\n    similarity_finder.play_clip(clip['path'])\n    similarity_finder.play_clip(clip['best_path'])\n    similarity_finder.play_clip(clip['second_path'])\n    similarity_finder.play_clip(clip['thousandth_path'])\n    similarity_finder.play_clip(clip['fifteen_hundredth_path'])\n    similarity_finder.play_clip(clip['worst_path'])\n"