In [200]:
from thefuzz import fuzz
import numpy as np

### Functions to Standardize Names from OCR

In [200]:
def standardize_name(name):
    # Remove spaces and lowercase for better matching
    return ''.join(name.split()).lower()

def group_similar_speaker_names(pairs, threshold=85):
    unique_names = []
    name_map = {}

    for _, name in pairs:
        std_name = standardize_name(name)
        found = False
        for canon in unique_names:
            if fuzz.ratio(std_name, canon) >= threshold:
                name_map[name] = canon
                found = True
                break
        if not found:
            unique_names.append(std_name)
            name_map[name] = std_name

    # Map all names to their canonical representative (de-standardize if needed)
    grouped = [(timestamp, name_map[name]) for (timestamp, name) in pairs]
    return grouped

### Functions to merge Diarization and Speaker changes dicts

In [246]:
def merge_consecutive_speaker_segments(segments, custom_end_time=None):
    merged = []
    current_speaker = speakerChanges[0][1]
    start = speakerChanges[0][0]
    end = speakerChanges[0][0]
    merged = []
    for i in range(len(speakerChanges)):    
        if speakerChanges[i][1]== current_speaker:
            continue
        else:
            end = speakerChanges[i][0]
            merged.append({'start':start, 'end':end, 'speaker':current_speaker})

            start = speakerChanges[i][0]

            current_speaker = speakerChanges[i][1]

    if custom_end_time:
        end = custom_end_time
        
    if start != end:
        merged.append({'start':start, 'end':end, 'speaker':current_speaker})
    return merged

def merge_diarization(diarization):
    merged = []
    current_speaker = diarization[0]['speaker']
    start_time = diarization[0]['start']
    end_time = diarization[0]['end']
    text = ''
    for i in range(len(diarization)):
        speaker = diarization[i]['speaker']
        timestamp = diarization[i]['end']
        if speaker == current_speaker:
            text = text + ' ' + diarization[i]['text']
            end_time = timestamp
        else:
            merged.append({'start': start_time, 'end': end_time, 'speaker': current_speaker, 'text':text})
            text = diarization[i]['text']
            current_speaker = speaker
            start_time = end_time = timestamp

    # Add the last segment
    merged.append({'start': start_time, 'end': end_time, 'speaker': current_speaker, 'text':text})
    return merged

### Identify top speaker based on query interval

In [238]:
def top_speaker(intervals, query_start, query_end):
    from collections import defaultdict

    # Case: point query
    if query_start == query_end:
        for interval in intervals:
            if interval['start'] <= query_start <= interval['end']:
                return interval['speaker'], interval['end'] - interval['start']
        return None, 0.0  # No speaker was speaking at that exact time

    
    speaker_times = defaultdict(float)

    for interval in intervals:
        start = max(interval['start'], query_start)
        end = min(interval['end'], query_end)
        if start < end:
            duration = end - start
            speaker = interval['speaker']
            speaker_times[speaker] += duration

    if not speaker_times:
        return None, 0.0

    top_speaker = max(speaker_times.items(), key=lambda x: x[1])
    return top_speaker  # returns (speaker_name, total_time)

### Load Data

In [239]:
diarization = np.load('diarization.npy', allow_pickle=True)
with open('speakerChanges.txt', 'r') as file:
    speakerChanges = eval(file.read())


merged_diarization = merge_diarization(diarization)
custom_end_time = merged_diarization[-1]['end']

speakerChanges = group_similar_speaker_names(speakerChanges)
speakerChanges = merge_consecutive_speaker_segments(speakerChanges, custom_end_time)

### Build transcript by combining Diarization with Visual Speaker Detection

In [247]:
transcript = []
for segment in merged_diarization:
    start, end = segment['start'], segment['end']
    text = segment['text']
    speaker, speak_time = top_speaker(speakerChanges, start, end)
    if speaker is None:
        print("HERE")
        break
    transcript.append({'speaker':speaker, 'text':text})

In [248]:
# First 10 mins of this video
# https://www.youtube.com/watch?v=3BtZN2Tye08
# Everything works well except when speakers change really quickly
# when the call roll for example or approve of the meeting
# We can filter these out of the dataset.
transcript

[{'speaker': 'grahampaige',
  'text': "  County School Board to order. For our moment of silence, we'd like to have a special remembrance of one of our teachers who passed recently.  Powerful ideas sometimes are described as throwing a rock in a river and watching it generate huge ripples that keep getting larger until they encompass a large area. This analogy can fit people as well as ideas. And so it was with Pete Fittner. To give you a sense of the length of his service, he was hired by Joe Vining to teach at Brownsville right out of ed school. In those days, Brownsville had 180 students.  Pete also supported math students at Aetna Hurt and Woodbrook, and taught at Burleigh, where he received our community's highest award for teaching, the Golden Apple.  His principal, Cassandra Blount, called him a trifecta as a teacher. She described his three superpowers as always building confidence in his students, treating students in a way that makes them want to reach their highest expectati

In [249]:
np.load('diarization.npy', allow_pickle=True)

array([{'start': 0.031, 'end': 2.119, 'text': ' County School Board to order.', 'words': [{'word': 'County', 'start': 0.031, 'end': 0.754, 'score': 0.684}, {'word': 'School', 'start': 0.814, 'end': 1.115, 'score': 0.703}, {'word': 'Board', 'start': 1.135, 'end': 1.436, 'score': 0.628}, {'word': 'to', 'start': 1.456, 'end': 1.497, 'score': 0.462}, {'word': 'order.', 'start': 1.838, 'end': 2.119, 'score': 0.858}], 'speaker': 'SPEAKER_01'},
       {'start': 4.388, 'end': 11.053, 'text': "For our moment of silence, we'd like to have a special remembrance of one of our teachers who passed recently.", 'words': [{'word': 'For', 'start': 4.388, 'end': 4.528, 'score': 0.863}, {'word': 'our', 'start': 4.588, 'end': 4.649, 'score': 0.587}, {'word': 'moment', 'start': 4.709, 'end': 4.95, 'score': 0.68}, {'word': 'of', 'start': 4.97, 'end': 5.01, 'score': 0.988}, {'word': 'silence,', 'start': 5.11, 'end': 5.512, 'score': 0.914}, {'word': "we'd", 'start': 5.532, 'end': 5.673, 'score': 0.624}, {'word