In [None]:
# ! pip install scispacy

**Time Stamp Evaluation**
---
---

This notebook contains the code to create key steps manually using rule based (regular expressions) methods. Once the key steps have been extracted they are evaluated semanitcally to the LLM output. Additionally, entity matches between steps are evaluated.

In [None]:
import os
import time
import re
import json
import sys
import numpy as np
import pandas as pd
import nltk
import spacy
import scispacy
from datetime import datetime
from scispacy.linking import EntityLinker
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from nltk.stem import WordNetLemmatizer
from sentence_transformers import SentenceTransformer

# Manually Extract Steps

In [None]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m:00:01[0mm00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
def load_transcript(file_path=None, transcript_json=None):
    if file_path:
        with open(file_path, 'r') as f:
            transcript_data = json.load(f)
    elif transcript_json:
        transcript_data = json.loads(transcript_json)
    else:
        raise ValueError("Either file_path or transcript_json must be provided")

    return transcript_data

def format_time(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds = seconds % 60
    return f"{hours:02}:{minutes:02}:{seconds:05.2f}"

def extract_key_steps_rule_based(transcript_data):
    """Extract key steps based on keyword patterns and contextual rules"""
    try:
        nlp
    except NameError:
        nlp = spacy.load("/kaggle/input/en-core-sci-lg/en_core_sci_lg-0.5.4/en_core_sci_lg/en_core_sci_lg-0.5.4")

    # Define filler word patterns
    filler_words_pattern = r"\b(fi|Excuse|very|And so|So|I think|alright|most|la|se|come on|like|well|what|thank|good|ok|nice|great|fine|interesting|important|collaborative|helpful|wonderful|welcome|here|there|that|this|um|uh|hmm|kind of|sort of|maybe|a little|as)\b"

    # Process each segment separately
    for segment in transcript_data:
        segment["transcript"] = re.sub(filler_words_pattern, '', segment["transcript"], flags=re.IGNORECASE)
        segment["transcript"] = re.sub(r"\s+", ' ', segment["transcript"]).strip()

    # Define patterns for key procedural steps (regex patterns)
    step_patterns = {
        "transeptal_puncture": [r"transeptal puncture", r"puncture.*septum", r"septal puncture"],
        "balloon_dilation": [r"balloon.*dilat", r"dilat.*balloon", r"pre.*dilat"],
        "valve_positioning": [r"position.*valve", r"align.*valve", r"valve.*position"],
        "valve_deployment": [r"deployment", r"deploy.*valve", r"inflat.*balloon", r"inflate"],
        "outcome_assessment": [r"outcome.*assess", r"assess.*outcome", r"echo.*assess", r"result.*assess"]
    }

    # Initialize result structure
    steps = []

    # Process transcript segments
    current_time = 0
    buffer = ""

    # Loop through transcript segments and identify key steps
    for segment in transcript_data:
        text = segment["transcript"].lower()
        start_time = float(segment["startTime"])
        end_time = float(segment["endTime"])

        # Add to text buffer for context
        buffer += " " + text

        # Check for step patterns
        for step_name, patterns in step_patterns.items():
            for pattern in patterns:
                if re.search(pattern, text):
                    doc = nlp(segment["transcript"])

                    # Extract full sentences containing the matched pattern
                    sentences = sent_tokenize(segment["transcript"])
                    matched_sentences = [s for s in sentences if re.search(pattern, s.lower())]

                    # If no complete sentence matched, use the segment text
                    description = " ".join(matched_sentences) if matched_sentences else segment["transcript"]

                    # Add surrounding context if needed
                    if not matched_sentences and len(buffer.split()) > 20:
                        # Use spaCy to find the most relevant sentence in the buffer
                        buffer_doc = nlp(buffer)
                        for sent in buffer_doc.sents:
                            if re.search(pattern, sent.text.lower()):
                                description = sent.text
                                break

                    steps.append({
                        "step_name": step_name,
                        "description": description.strip(),
                        "start_time": format_time(start_time),
                        "end_time": format_time(end_time),
                        "start_seconds": start_time,
                        "end_seconds": end_time
                    })

                    # Clear buffer after finding a step
                    buffer = ""

    # Merge overlapping steps of the same type
    merged_steps = []
    for step_name in step_patterns.keys():
        step_segments = [s for s in steps if s["step_name"] == step_name]

        if not step_segments:
            continue

        # Sort by start time
        step_segments.sort(key=lambda x: x["start_seconds"])

        # Merge nearby segments
        current_merged = step_segments[0]
        for segment in step_segments[1:]:
            # If this segment starts soon after the current merged segment ends, merge them
            if segment["start_seconds"] - current_merged["end_seconds"] < 30:  # 30 seconds threshold
                current_merged["description"] += " " + segment["description"]
                current_merged["end_time"] = segment["end_time"]
                current_merged["end_seconds"] = segment["end_seconds"]
            else:
                merged_steps.append(current_merged)
                current_merged = segment

        merged_steps.append(current_merged)

    # Final cleaning - remove duplicates and sort by time
    unique_steps = []
    seen_descriptions = set()

    for step in merged_steps:
        desc_key = re.sub(r'\s+', ' ', step["description"].lower())
        if desc_key not in seen_descriptions:
            seen_descriptions.add(desc_key)
            unique_steps.append(step)

    # Sort by start time
    unique_steps.sort(key=lambda x: x["start_seconds"])

    return unique_steps

def save_steps_to_json(steps, output_file):
    with open(output_file, 'w') as f:
        json.dump(steps, f, indent=2)

def main(transcript_json=None, transcript_file=None, output_file="tmvr_steps.json"):
    transcript_data = load_transcript(transcript_file, transcript_json)

    # Extract key steps using rule-based method
    steps = extract_key_steps_rule_based(transcript_data)

    # Save to file if output_file is provided
    if output_file:
        save_steps_to_json(steps, output_file)

    return steps

In [None]:
# Ensure the output directory exists
output_dir = ""

# Load the transcript correctly
transcript_file = "/kaggle/input/pasteur/Pasteur_TMVR.txt"  # Ensure this file exists!

if not os.path.exists(transcript_file):
    raise FileNotFoundError(f"Transcript file '{transcript_file}' not found!")

with open(transcript_file, "r") as f:
    transcript_data = f.read()  # Read the file content

# Call the function
extracted_steps = main(
    transcript_file=transcript_file,  # Pass the file path
    output_file=os.path.join(output_dir, "extracted_steps.json")
)

# Print extracted steps
print(f"Extracted {len(extracted_steps)} key steps:")
for i, step in enumerate(extracted_steps, 1):
    print(f"\nStep {i}: {step['step_name']}")
    print(f"Time: {step['start_time']} - {step['end_time']}")
    print(f"Description: {step['description']}")




Extracted 11 key steps:

Step 1: valve_positioning
Time: 00:00:42.44 - 00:01:08.22
Description: on the right groin, we have already positioned the the sheath of a trans valve we will implant is 29 millimeter.

Step 2: transeptal_puncture
Time: 00:02:57.52 - 00:03:21.75
Description: is the, the transeptal puncture. is the, the transeptal puncture.

Step 3: transeptal_puncture
Time: 00:05:36.58 - 00:06:04.66
Description: , is a small circle we see exactly and transeptal puncture ideal site. , is a small circle we see exactly and transeptal puncture ideal site.

Step 4: balloon_dilation
Time: 00:10:32.87 - 00:11:53.16
Description: we've selected the balloon just to pre dilate the, the septum. of a French guarding. , next step. is the balloon. we've selected the balloon just to pre dilate the, the septum. It's going to be a 14 millimeter balloon just to make sure we have enough room to advance the the sepi free device. And you can show us the, the inter atrial septum in the view. You want 

# Evaluation

In [None]:
# Load specialized medical NLP model
nlp = spacy.load("/kaggle/input/en-core-sci-lg/en_core_sci_lg-0.5.4/en_core_sci_lg/en_core_sci_lg-0.5.4")  # Scientific/medical specialized model
nlp.add_pipe("scispacy_linker", config={"resolve_abbreviations": True, "linker_name": "umls"})

# load + parse both documents
# LLM output
with open("/kaggle/input/ground-truth/transcript1_gpt3.5_20250224_115540.json") as f:
    llm_data = json.load(f)["summary"]
    llm_data = json.loads(llm_data)

# 'ground truth' = manually extracted steps
with open("/kaggle/working/extracted_steps.json") as f:
    ground_truth = json.load(f)

# Extract step descriptions from LLM output
llm_steps = []
for step_num, step_info in llm_data["Transcatheter Mitral Valve Implantation"]["steps"].items():
    llm_steps.append({
        "step_name": step_num,
        "description": step_info["description"],
        "start_seconds": step_info["startTime"],
        "end_seconds": step_info["endTime"]
    })

# Function to extract medical entities
def extract_medical_entities(text):
    doc = nlp(text)
    entities = []
    for ent in doc.ents:
        if ent._.kb_ents:
            # Get UMLS concept ID
            umls_id = ent._.kb_ents[0][0]
            entities.append({
                "text": ent.text,
                "label": ent.label_,
                "umls_id": umls_id
            })
    return entities

# Compare entities between documents
entity_matches = []
for gt_step in ground_truth:
    gt_entities = extract_medical_entities(gt_step["description"])

    best_match = None
    best_score = 0

    for llm_step in llm_steps:
        llm_entities = extract_medical_entities(llm_step["description"])

        # Count matching entities
        matches = 0
        for gt_ent in gt_entities:
            for llm_ent in llm_entities:
                if gt_ent["umls_id"] == llm_ent["umls_id"]:
                    matches += 1

        # Calculate F1 score for entity matching
        if len(gt_entities) > 0 and len(llm_entities) > 0:
            precision = matches / len(llm_entities)
            recall = matches / len(gt_entities)
            f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

            if f1 > best_score:
                best_score = f1
                best_match = llm_step

    entity_matches.append({
        "ground_truth_step": gt_step["step_name"],
        "best_matching_llm_step": best_match["step_name"] if best_match else None,
        "entity_match_score": best_score,
        "gt_entities": [e["text"] for e in gt_entities],
        "matching_llm_entities": [e["text"] for e in llm_entities] if best_match else []
    })

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
entity_matches

[{'ground_truth_step': 'valve_positioning',
  'best_matching_llm_step': 'Step 10',
  'entity_match_score': 0.25,
  'gt_entities': ['right groin', 'sheath', 'trans valve', 'implant'],
  'matching_llm_entities': ['Real-time imaging',
   'confirmation',
   'valve position',
   'completion',
   'mitral valve implantation']},
 {'ground_truth_step': 'transeptal_puncture',
  'best_matching_llm_step': None,
  'entity_match_score': 0,
  'gt_entities': [],
  'matching_llm_entities': []},
 {'ground_truth_step': 'transeptal_puncture',
  'best_matching_llm_step': None,
  'entity_match_score': 0,
  'gt_entities': ['small circle', 'small circle'],
  'matching_llm_entities': []},
 {'ground_truth_step': 'balloon_dilation',
  'best_matching_llm_step': 'Step 2',
  'entity_match_score': 0.04,
  'gt_entities': ['balloon',
   'dilate',
   'septum',
   'French',
   'guarding',
   'balloon',
   'balloon',
   'dilate',
   'septum',
   'room',
   'inter atrial septum',
   'short axis',
   'bike',
   'balloon',


Based on scispacy medical Large's vocabulary, we are able to compare the matching of key entities within each step. The limitation here is the vocabulary. Hwoever, we can see that many steps do include similiar words. This would have been better if lemminization was applied. We can see for example, that a step matching value positioning has a low score. However, the LLM returned mitral mitral position, which is even better with more description. this shows the limitation of not using stemming or lemmitization here, in that the scores are skewed. Overall, we can see that the key entities extracted are within a good range of semantic similiarity in most cases. Therefore, I would say that on an entitiy level, the LLM is representitive of the groud truth (which is also not perfect).

In [None]:
model = SentenceTransformer('pritamdeka/S-PubMedBert-MS-MARCO')

# Calculate semantic similarity between steps
semantic_matches = []
for gt_step in ground_truth:
    gt_embedding = model.encode(gt_step["description"])

    similarities = []
    for llm_step in llm_steps:
        llm_embedding = model.encode(llm_step["description"])
        similarity = cosine_similarity([gt_embedding], [llm_embedding])[0][0]
        similarities.append((llm_step["step_name"], similarity))

    # Sort by similarity
    similarities.sort(key=lambda x: x[1], reverse=True)

    semantic_matches.append({
        "ground_truth_step": gt_step["step_name"],
        "gt_description": gt_step["description"],
        "best_match": similarities[0][0],
        "best_match_description": next(s["description"] for s in llm_steps if s["step_name"] == similarities[0][0]),
        "similarity_score": similarities[0][1]
    })

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
semantic_matches

[{'ground_truth_step': 'valve_positioning',
  'gt_description': 'on the right groin, we have already positioned the the sheath of a trans valve we will implant is 29 millimeter.',
  'best_match': 'Step 3',
  'best_match_description': "Reviewing the patient's condition, positioning in the right groin, and sizing for the valve implant.",
  'similarity_score': 0.9217067},
 {'ground_truth_step': 'transeptal_puncture',
  'gt_description': 'is the, the transeptal puncture. is the, the transeptal puncture.',
  'best_match': 'Step 6',
  'best_match_description': 'Transeptal puncture and positioning tools inside the supra vena cava under X-ray guidance.',
  'similarity_score': 0.9100877},
 {'ground_truth_step': 'transeptal_puncture',
  'gt_description': ', is a small circle we see exactly and transeptal puncture ideal site. , is a small circle we see exactly and transeptal puncture ideal site.',
  'best_match': 'Step 6',
  'best_match_description': 'Transeptal puncture and positioning tools ins

Remarks:
Semantically, LLM steps match well with a step in the manual extraction. This means that the LLM output is not hallucenating and is correctly reporting what is happening in the transcript. This method is certianly not perfect, as there are slight structural diffferences of the steps. However, this adds a level of ocnfirmation that infact the LLM outputs are well representative of the procedure. To further ensure our results, we verfied with human intelligence.

Human Manual Verification:

In addition to this manual extraction of key steps, we have checked the transcript and manually defined general steps and actions taken in each step and their corresponding time frame. This was compared with the LLM output for additional verification.