## Install

In [None]:
!pip install -qq pyter3 moverscore bert-score nltk datasets nltk rouge-score numpy pyemd

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m666.6/666.6 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.3/179.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00

## Import, Func

In [None]:
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from datasets import load_dataset
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from bert_score import score as bert_score
from nltk.translate.meteor_score import meteor_score
import numpy as np
import pyter  # for TER
# from moverscore_v2 import word_mover_score  # for MoverScore
# import torch
from collections import defaultdict
import pandas as pd
from huggingface_hub import HfApi
import gc
from tqdm import tqdm
import os
import shutil
from datasets import config
cache_directory = config.HF_DATASETS_CACHE

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
# Increase max columns and rows displayed
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

# Show all columns (don't truncate)
pd.set_option('display.max_colwidth', None)

# Format floating point numbers
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# Increase width of displayed tables
pd.set_option('display.width', 1000)

# Don't wrap long strings
pd.set_option('display.max_colwidth', None)

# Show precision of 3 decimal places
pd.options.display.precision = 3

In [None]:
import pandas as pd
from typing import List, Tuple

def align_dataframes(primary_df: pd.DataFrame,
                    reference_dfs: List[pd.DataFrame],
                    key_column: str) -> Tuple[pd.DataFrame, List[pd.DataFrame]]:
    """
    Aligns multiple dataframes to ensure they contain exactly the same rows in the same order.

    Args:
        primary_df: The primary dataframe to align others against
        reference_dfs: List of reference dataframes to align with primary
        key_column: Name of the column to use as the primary key for alignment

    Returns:
        Tuple containing:
        - Filtered primary dataframe
        - List of filtered reference dataframes

    Raises:
        AssertionError: If alignment cannot be achieved
        ValueError: If input validation fails
    """
    # Input validation
    if not isinstance(primary_df, pd.DataFrame) or not all(isinstance(df, pd.DataFrame) for df in reference_dfs):
        raise ValueError("All inputs must be pandas DataFrames")

    if not all(key_column in df.columns for df in [primary_df] + reference_dfs):
        raise ValueError(f"Column '{key_column}' must exist in all dataframes")

    # Get the set of keys from each dataframe
    primary_keys = set(primary_df[key_column])
    ref_keys_sets = [set(df[key_column]) for df in reference_dfs]

    # Find the intersection of all key sets
    common_keys = primary_keys.intersection(*ref_keys_sets)

    if not common_keys:
        raise ValueError("No common keys found across all dataframes")

    # Filter all dataframes to only include common keys
    filtered_primary = primary_df[primary_df[key_column].isin(common_keys)]
    filtered_refs = [df[df[key_column].isin(common_keys)] for df in reference_dfs]

    # Sort all dataframes by the key column
    filtered_primary = filtered_primary.sort_values(key_column).reset_index(drop=True)
    filtered_refs = [df.sort_values(key_column).reset_index(drop=True) for df in filtered_refs]

    # Verify alignment
    primary_keys_ordered = filtered_primary[key_column].tolist()
    for i, ref_df in enumerate(filtered_refs):
        ref_keys_ordered = ref_df[key_column].tolist()
        if primary_keys_ordered != ref_keys_ordered:
            raise AssertionError(f"Alignment failed for reference dataframe {i}, dropping frame from list")
            filtered_refs.pop(i)

    # Verify all dataframes have the same length
    lengths = [len(df) for df in [filtered_primary] + filtered_refs]
    if not all(length == lengths[0] for length in lengths):
        raise AssertionError("Not all dataframes have the same length after filtering")

    return filtered_primary, filtered_refs

In [None]:
def compute_similarity_metrics(target_text, reference_texts):
    """
    Compute various similarity metrics between a target text and multiple reference texts.

    Args:
        target_text (str): The text to compare
        reference_texts (list): List of reference texts to compare against

    Returns:
        dict: Dictionary containing average scores for each metric
    """
    # Initialize scorers
    rouge_scorer_inst = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    smoothie = SmoothingFunction().method1

    # Initialize score storage
    scores = defaultdict(list)

    # Convert texts to tokens for BLEU, METEOR, and TER
    target_tokens = target_text.split()
    reference_tokens = [ref.split() for ref in reference_texts]
    # Compute scores for each reference
    for i, ref in enumerate(reference_texts):
        ref_tokens = reference_tokens[i]
        # BLEU
        scores['bleu'].append(sentence_bleu([ref_tokens], target_tokens,
                                          smoothing_function=smoothie))

        # ROUGE
        rouge_scores = rouge_scorer_inst.score(target_text, ref)
        scores['rouge1'].append(rouge_scores['rouge1'].fmeasure)
        scores['rouge2'].append(rouge_scores['rouge2'].fmeasure)
        scores['rougeL'].append(rouge_scores['rougeL'].fmeasure)

        # METEOR
        scores['meteor'].append(meteor_score([ref_tokens], target_tokens))

        # # TER
        # try:
        #     ter_score = pyter.ter(target_tokens, ref_tokens)
        #     scores['ter'].append(ter_score)
        # except:
        #     print(f"Warning: TER calculation failed for reference {i}")
        #     scores['ter'].append(None)

    # Calculate averages and standard deviations
    metrics = {}
    for metric, score_list in scores.items():
        # Filter out None values if any occurred
        valid_scores = [s for s in score_list if s is not None]
        if valid_scores:
            metrics[f'avg_{metric}'] = np.mean(valid_scores)
            metrics[f'std_{metric}'] = np.std(valid_scores)
        else:
            metrics[f'avg_{metric}'] = None
            metrics[f'std_{metric}'] = None

    return metrics

# def main():
#     # Load a small subset of CNN/DailyMail dataset





#     # Print results
#     print("\nSimilarity scores (with standard deviations):")
#     metrics_pairs = [(k, v) for k, v in metrics.items() if not k.startswith('std_')]

#     for metric, avg in metrics_pairs:
#         std = metrics[f"std_{metric.replace('avg_', '')}"]
#         if avg is not None and std is not None:
#             print(f"{metric}: {avg:.4f} (±{std:.4f})")
#         else:
#             print(f"{metric}: Failed to compute")

In [None]:
class RunningDictAverage:
    """
    Maintains a running average of dictionaries with numeric fields.
    Uses exponential moving average (EMA) to weight recent values more heavily.
    """
    def __init__(self, alpha=0.3):
        """
        Initialize running average tracker.

        Args:
            alpha (float): Smoothing factor between 0 and 1.
                         Higher values give more weight to recent observations.
        """
        self.averages = None
        self.alpha = alpha

    def update(self, new_dict):
        """
        Update running averages with new dictionary values.

        Args:
            new_dict (dict): New dictionary with numeric values

        Returns:
            dict: Current running averages
        """
        # Initialize averages if this is the first dictionary
        if self.averages is None:
            self.averages = new_dict.copy()
            return self.averages

        # Check if new dictionary has same keys
        if new_dict.keys() != self.averages.keys():
            raise ValueError("New dictionary must have same keys as previous dictionaries")

        # Update running average for each key
        for key in new_dict:
            if not isinstance(new_dict[key], (int, float)):
                raise ValueError(f"Non-numeric value found for key: {key}")

            # EMA formula: new_avg = alpha * current_value + (1 - alpha) * previous_avg
            self.averages[key] = (self.alpha * new_dict[key] +
                                (1 - self.alpha) * self.averages[key])

        return self.averages

# Example usage:
tracker = RunningDictAverage(alpha=0.3)

# First dictionary
dict1 = {"a": 10, "b": 20, "c": 30}
print(tracker.update(dict1))  # {"a": 10, "b": 20, "c": 30}

# Second dictionary
dict2 = {"a": 20, "b": 30, "c": 40}
print(tracker.update(dict2))  # {"a": 13, "b": 23, "c": 33}

# Third dictionary
dict3 = {"a": 30, "b": 40, "c": 50}
print(tracker.update(dict3))  # {"a": 18.1, "b": 28.1, "c": 38.1

{'a': 10, 'b': 20, 'c': 30}
{'a': 13.0, 'b': 23.0, 'c': 33.0}
{'a': 18.1, 'b': 28.099999999999998, 'c': 38.099999999999994}


## Test Run

In [None]:
dataset = load_dataset('cnn_dailymail', '3.0.0', split='train[:5]')

target_summary = dataset[0]['highlights']

reference_summaries = [item for item in dataset['highlights'][1:]]

metrics = compute_similarity_metrics(target_summary, reference_summaries)

In [None]:
metrics

{'avg_bleu': 0.007709767560515853,
 'std_bleu': 0.0023886735558457392,
 'avg_rouge1': 0.12096861471861473,
 'std_rouge1': 0.01984247298923302,
 'avg_rouge2': 0.00641025641025641,
 'std_rouge2': 0.011102889792108188,
 'avg_rougeL': 0.0761002886002886,
 'std_rougeL': 0.00975562960818927,
 'avg_meteor': 0.09901743352624669,
 'std_meteor': 0.02784123475481427,
 'avg_ter': 1.0530032367656952,
 'std_ter': 0.18430246079469428}

## Data Preprocessing

In [None]:
df_eng = pd.read_csv('english_only_conv_hash.csv')

In [None]:
conv_hashes = set(df_eng['conversation_hash'].tolist()[:10000])

In [None]:
# Initialize the API client
api = HfApi()

# Get list of datasets for a specific user
datasets_api = api.list_datasets(author="penfever")

In [None]:
datasets_list = [ds.id for ds in datasets_api]

In [None]:
datasets_list_aai = [ds for ds in datasets_list if 'allenai' in ds]

In [None]:
datasets_list_aai[:5]

['penfever/allenai_WildChat-1M-Full-nvidia_NVLM-D-72B',
 'penfever/allenai_WildChat-1M-Full-neuralmagic_Meta-Llama-3.1-405B-Instruct-FP8-dynamic',
 'penfever/allenai_WildChat-1M-Full-neuralmagic_DeepSeek-Coder-V2-Instruct-FP8',
 'penfever/allenai_WildChat-1M-Full-meta-llama_Llama-3.3-70B-Instruct',
 'penfever/allenai_WildChat-1M-Full-google_gemma-2-27b-it']

In [None]:
for df_name in tqdm(datasets_list_aai[47:]):
    df = load_dataset(df_name, split='train')
    df = df.remove_columns([col for col in df.column_names if col != 'conversation_hash' and col != 'conversation'])
    df = df.filter(lambda example: example['conversation_hash'] in conv_hashes)
    df_savename = df_name.split('/')[-1]
    df.to_parquet(f"/content/drive/MyDrive/wc50m-english-subsets/{df_savename}.parquet")
    if os.path.exists(cache_directory):
        shutil.rmtree(cache_directory)
    del df
    gc.collect()

  0%|          | 0/8 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/17 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/17 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/17 [00:00<?, ?files/s]

Generating train split:   0%|          | 0/985648 [00:00<?, ? examples/s]

Loading dataset shards:   0%|          | 0/17 [00:00<?, ?it/s]

Filter:   0%|          | 0/985648 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/10 [00:00<?, ?ba/s]

 12%|█▎        | 1/8 [06:07<42:49, 367.06s/it]

Generating train split:   0%|          | 0/983767 [00:00<?, ? examples/s]

Filter:   0%|          | 0/983767 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/10 [00:00<?, ?ba/s]

 25%|██▌       | 2/8 [10:39<31:07, 311.28s/it]

Resolving data files:   0%|          | 0/22 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/22 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/22 [00:00<?, ?files/s]

Generating train split:   0%|          | 0/984521 [00:00<?, ? examples/s]

Loading dataset shards:   0%|          | 0/22 [00:00<?, ?it/s]

Filter:   0%|          | 0/984521 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/10 [00:00<?, ?ba/s]

 38%|███▊      | 3/8 [16:59<28:32, 342.53s/it]

Generating train split:   0%|          | 0/985120 [00:00<?, ? examples/s]

Filter:   0%|          | 0/985120 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/11 [00:00<?, ?ba/s]

 50%|█████     | 4/8 [21:33<21:03, 315.79s/it]

Generating train split:   0%|          | 0/984922 [00:00<?, ? examples/s]

Filter:   0%|          | 0/984922 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/11 [00:00<?, ?ba/s]

 62%|██████▎   | 5/8 [26:37<15:34, 311.50s/it]

Generating train split:   0%|          | 0/983335 [00:00<?, ? examples/s]

Filter:   0%|          | 0/983335 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/10 [00:00<?, ?ba/s]

 75%|███████▌  | 6/8 [32:18<10:42, 321.48s/it]

Generating train split:   0%|          | 0/984281 [00:00<?, ? examples/s]

Filter:   0%|          | 0/984281 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/10 [00:00<?, ?ba/s]

 88%|████████▊ | 7/8 [37:40<05:21, 321.73s/it]

Generating train split:   0%|          | 0/806219 [00:00<?, ? examples/s]

Filter:   0%|          | 0/806219 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/8 [00:00<?, ?ba/s]

100%|██████████| 8/8 [42:44<00:00, 320.55s/it]


## Generate Scores For Subsets

In [None]:
from pathlib import Path
from tqdm import tqdm

df_path = "/content/drive/MyDrive/wc50m-english-subsets/"
df_list = list(Path(df_path).rglob('*.parquet'))

In [None]:
cum_results_dict = {}

In [None]:
from multiprocessing import Pool
from functools import partial
from tqdm import tqdm
import pandas as pd

def compute_metrics_for_row(args):
    """
    Compute metrics for a single row without averaging
    """
    j, df, refs = args
    target = str(df.iloc[j]['conversation'][1]['content']).strip()
    reference_summaries = [str(ref.iloc[j]['conversation'][1]['content']).strip() for ref in refs]
    return compute_similarity_metrics(target, reference_summaries)

def parallel_compute_metrics(df, refs, cumulative_metrics, n_workers=2, batch_size=4):
    """
    Parallelize the metric computation but perform EMA updates serially

    Args:
        df: Input DataFrame
        refs: List of reference DataFrames
        cumulative_metrics: RunningDictAverage instance
        n_workers: Number of worker processes
        batch_size: Number of items to process in parallel before updating EMA
    """
    with Pool(n_workers) as pool:
        # Create worker arguments
        all_args = [(j, df, refs) for j in range(len(df))]

        # Process in batches to avoid memory issues with very large datasets
        for i in tqdm(range(0, len(df), batch_size)):
            batch_args = all_args[i:i + batch_size]

            # Compute metrics in parallel for this batch
            batch_results = pool.map(compute_metrics_for_row, batch_args)

            # Update running averages serially
            for metrics in batch_results:
                cumulative_metrics.update(metrics)

counter = 0
for idx, frame in enumerate(df_list):
    print("MODEL NUMBER: ", idx, "/n/n")
    counter += 1
    np.random.seed(42843 + counter)
    subset_indices = np.random.choice(len(df_list), size=5, replace=False)
    df = pd.read_parquet(str(frame)).sort_values("conversation_hash")
    refs = [pd.read_parquet(str(df_list[i])).sort_values("conversation_hash") for i in subset_indices if i != idx]

    try:
      aligned_primary, aligned_refs = align_dataframes(
          primary_df=df,
          reference_dfs=refs,
          key_column='conversation_hash'
      )
    except (ValueError, AssertionError) as e:
      print(f"Alignment failed: {e}")
    df, refs = aligned_primary, aligned_refs

    cumulative_metrics = RunningDictAverage(alpha=0.3)
    # parallel_compute_metrics(df, refs, cumulative_metrics) #much slower

    for j in tqdm(range(len(df))):
      if j == 500:
        break
      target = str(df.iloc[j]['conversation'][1]['content']).strip()
      reference_summaries = [str(ref.iloc[j]['conversation'][1]['content']).strip() for ref in refs]
      metrics = compute_similarity_metrics(target, reference_summaries)
      cumulative_metrics.update(metrics)

    cum_results_dict[str(frame)] = cumulative_metrics.averages
    del df, refs, cumulative_metrics
    gc.collect()
    #save cum_results_dict as csv
    pd.DataFrame.from_dict(cum_results_dict, orient='index').to_csv("/content/drive/MyDrive/wc50m-english-subsets/cum_results_dict.csv")

MODEL NUMBER:  0 /n/n


  5%|▍         | 500/10002 [01:12<22:58,  6.89it/s]


MODEL NUMBER:  1 /n/n
Alignment failed: Alignment failed for reference dataframe 1, dropping frame from list


  5%|▍         | 500/10002 [01:11<22:45,  6.96it/s]


MODEL NUMBER:  2 /n/n


  5%|▍         | 500/10002 [01:08<21:46,  7.27it/s]


MODEL NUMBER:  3 /n/n
Alignment failed: Alignment failed for reference dataframe 4, dropping frame from list


  5%|▍         | 500/10002 [01:08<21:36,  7.33it/s]


MODEL NUMBER:  4 /n/n
Alignment failed: Alignment failed for reference dataframe 3, dropping frame from list


  5%|▍         | 500/10002 [01:06<21:06,  7.50it/s]


MODEL NUMBER:  5 /n/n


  5%|▍         | 500/10002 [02:03<39:12,  4.04it/s]  


MODEL NUMBER:  6 /n/n


  5%|▍         | 500/10002 [01:28<28:03,  5.64it/s]


MODEL NUMBER:  7 /n/n
Alignment failed: Alignment failed for reference dataframe 4, dropping frame from list


  5%|▍         | 500/10002 [01:28<28:00,  5.66it/s]


MODEL NUMBER:  8 /n/n
Alignment failed: Alignment failed for reference dataframe 0, dropping frame from list


  5%|▍         | 500/10002 [01:28<27:52,  5.68it/s]


MODEL NUMBER:  9 /n/n
Alignment failed: Alignment failed for reference dataframe 4, dropping frame from list


  5%|▍         | 500/10002 [01:28<28:05,  5.64it/s]


MODEL NUMBER:  10 /n/n


  5%|▍         | 500/10002 [00:49<15:43, 10.07it/s]


MODEL NUMBER:  11 /n/n


  5%|▍         | 500/10002 [02:59<56:59,  2.78it/s]  


MODEL NUMBER:  12 /n/n
Alignment failed: Alignment failed for reference dataframe 4, dropping frame from list


  5%|▍         | 500/10002 [03:01<57:21,  2.76it/s]  


MODEL NUMBER:  13 /n/n
Alignment failed: Alignment failed for reference dataframe 0, dropping frame from list


  5%|▍         | 500/10002 [03:00<57:07,  2.77it/s]  


MODEL NUMBER:  14 /n/n


  5%|▍         | 500/10002 [01:16<24:05,  6.58it/s]


MODEL NUMBER:  15 /n/n


  5%|▍         | 500/10002 [00:57<18:22,  8.62it/s]


MODEL NUMBER:  16 /n/n
Alignment failed: Alignment failed for reference dataframe 3, dropping frame from list


  5%|▍         | 500/10002 [00:57<18:06,  8.75it/s]


MODEL NUMBER:  17 /n/n


  5%|▍         | 500/10002 [03:23<1:04:26,  2.46it/s]


MODEL NUMBER:  18 /n/n
Alignment failed: Alignment failed for reference dataframe 4, dropping frame from list


  5%|▍         | 500/10002 [03:19<1:03:13,  2.50it/s]


MODEL NUMBER:  19 /n/n
Alignment failed: Alignment failed for reference dataframe 1, dropping frame from list


  5%|▍         | 500/10002 [03:19<1:03:19,  2.50it/s]


MODEL NUMBER:  20 /n/n


  5%|▍         | 500/10002 [01:03<20:13,  7.83it/s]


MODEL NUMBER:  21 /n/n


  5%|▍         | 500/10002 [01:24<26:45,  5.92it/s]


MODEL NUMBER:  22 /n/n
Alignment failed: Alignment failed for reference dataframe 4, dropping frame from list


  5%|▍         | 500/10002 [01:24<26:38,  5.95it/s]


MODEL NUMBER:  23 /n/n


  5%|▍         | 500/10002 [00:54<17:06,  9.26it/s]


MODEL NUMBER:  24 /n/n
Alignment failed: Alignment failed for reference dataframe 0, dropping frame from list


  5%|▍         | 500/10002 [00:55<17:29,  9.05it/s]


MODEL NUMBER:  25 /n/n
Alignment failed: Alignment failed for reference dataframe 1, dropping frame from list


  5%|▍         | 500/10002 [00:55<17:33,  9.02it/s]


MODEL NUMBER:  26 /n/n
Alignment failed: Alignment failed for reference dataframe 3, dropping frame from list


  5%|▍         | 500/10002 [00:55<17:30,  9.04it/s]


MODEL NUMBER:  27 /n/n


  5%|▍         | 500/10002 [00:51<16:13,  9.76it/s]


MODEL NUMBER:  28 /n/n


  5%|▍         | 500/10002 [01:22<26:14,  6.04it/s]


MODEL NUMBER:  29 /n/n


  5%|▍         | 500/10002 [01:04<20:34,  7.70it/s]


MODEL NUMBER:  30 /n/n
Alignment failed: Alignment failed for reference dataframe 3, dropping frame from list


  5%|▍         | 500/10002 [01:03<20:09,  7.86it/s]


MODEL NUMBER:  31 /n/n
Alignment failed: Alignment failed for reference dataframe 4, dropping frame from list


  5%|▍         | 500/10002 [01:05<20:35,  7.69it/s]


MODEL NUMBER:  32 /n/n


  5%|▍         | 500/10002 [00:42<13:19, 11.89it/s]


MODEL NUMBER:  33 /n/n
Alignment failed: Alignment failed for reference dataframe 1, dropping frame from list


  5%|▍         | 500/10002 [00:42<13:27, 11.77it/s]


MODEL NUMBER:  34 /n/n


  5%|▍         | 500/10002 [01:34<30:04,  5.26it/s]


MODEL NUMBER:  35 /n/n


  5%|▍         | 500/10002 [00:51<16:15,  9.74it/s]


MODEL NUMBER:  36 /n/n


  5%|▍         | 500/10002 [01:24<26:55,  5.88it/s]


MODEL NUMBER:  37 /n/n


  5%|▍         | 500/10002 [01:38<31:20,  5.05it/s]  


MODEL NUMBER:  38 /n/n


  5%|▍         | 500/10002 [00:49<15:43, 10.07it/s]


MODEL NUMBER:  39 /n/n
Alignment failed: Alignment failed for reference dataframe 3, dropping frame from list


  5%|▍         | 500/10002 [00:51<16:25,  9.64it/s]


MODEL NUMBER:  40 /n/n


  5%|▍         | 500/10002 [01:19<25:05,  6.31it/s]


MODEL NUMBER:  41 /n/n
Alignment failed: Alignment failed for reference dataframe 2, dropping frame from list


  5%|▍         | 500/10002 [01:19<25:01,  6.33it/s]


MODEL NUMBER:  42 /n/n
Alignment failed: Alignment failed for reference dataframe 2, dropping frame from list


  5%|▍         | 500/10002 [01:20<25:22,  6.24it/s]


MODEL NUMBER:  43 /n/n


  5%|▍         | 500/10002 [01:07<21:31,  7.36it/s]


MODEL NUMBER:  44 /n/n


  5%|▍         | 500/10002 [01:23<26:20,  6.01it/s]


MODEL NUMBER:  45 /n/n
Alignment failed: Alignment failed for reference dataframe 0, dropping frame from list


  5%|▍         | 500/10002 [01:23<26:22,  6.00it/s]


MODEL NUMBER:  46 /n/n
Alignment failed: Alignment failed for reference dataframe 0, dropping frame from list


  5%|▍         | 500/10002 [01:23<26:20,  6.01it/s]  


MODEL NUMBER:  47 /n/n
Alignment failed: Alignment failed for reference dataframe 0, dropping frame from list


  5%|▍         | 500/10002 [01:21<25:58,  6.10it/s]


MODEL NUMBER:  48 /n/n


  5%|▍         | 500/10002 [00:40<12:52, 12.30it/s]


MODEL NUMBER:  49 /n/n


  5%|▍         | 500/10002 [01:31<29:03,  5.45it/s]  


MODEL NUMBER:  50 /n/n
Alignment failed: Alignment failed for reference dataframe 0, dropping frame from list


  5%|▍         | 500/10002 [01:32<29:16,  5.41it/s]


MODEL NUMBER:  51 /n/n
Alignment failed: Alignment failed for reference dataframe 0, dropping frame from list


  5%|▍         | 500/10002 [01:30<28:34,  5.54it/s]


MODEL NUMBER:  52 /n/n
Alignment failed: Alignment failed for reference dataframe 0, dropping frame from list


  5%|▍         | 500/10002 [01:31<29:05,  5.44it/s]  
