In [13]:
import pympi
import re
from collections import defaultdict
import shutil



def get_all_time_offsets(eaf_file):
    
    offsets = []
    eaf = pympi.Elan.Eaf(eaf_file)    

    for media_descriptor in eaf.media_descriptors:
        TIME_ORIGIN='TIME_ORIGIN'
        if (TIME_ORIGIN in media_descriptor):
            offsets.append(int((media_descriptor['TIME_ORIGIN'])))
    
    return offsets if offsets else None


def format_time(seconds, offset=0):
    """Helper function to format time in SRT format (HH:MM:SS,ms)"""
    seconds += offset / 1000  # Convert milliseconds to seconds
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    whole_seconds = int(seconds % 60)
    milliseconds = int((seconds % 1) * 1000)  
    return f"{hours:02}:{minutes:02}:{whole_seconds:02},{milliseconds:03}"


def extend_annotations_with_priority(annotations, min_duration=0.5, start_buffer=0.1, end_buffer=0.5):
    
    # Sort annotations by start time
    annotations.sort(key=lambda x: x['start'])

    # Calculate durations
    for ann in annotations:
        ann['duration'] = ann['end'] - ann['start']
    
    # Pass 1: Extend annotations shorter than min_duration
    for i, ann in enumerate(annotations):
        if ann['duration'] < min_duration:
            buffer = min_duration - ann['duration']
            ann['start'] = max(0, ann['start'] - buffer * 0.5)
            ann['end'] += buffer * 0.5
            if i < len(annotations) - 1:
                next_ann = annotations[i + 1]
                if ann['end'] > next_ann['start']:
                    ann['end'] = next_ann['start']

    # Pass 2: Add half start_buffer where possible
    for i, ann in enumerate(annotations):
        buffer = start_buffer * 0.5
        ann['start'] = max(0, ann['start'] - buffer)
        if i > 0:
            prev_ann = annotations[i - 1]
            if ann['start'] < prev_ann['end']:
                ann['start'] = prev_ann['end']

    # Pass 3: Add full end_buffer and half start_buffer where possible
    for i, ann in enumerate(annotations):
        ann['end'] += end_buffer
        ann['start'] = max(0, ann['start'] - start_buffer * 0.5)
        if i < len(annotations) - 1:
            next_ann = annotations[i + 1]
            if ann['end'] > next_ann['start']:
                ann['end'] = next_ann['start']
        if i > 0:
            prev_ann = annotations[i - 1]
            if ann['start'] < prev_ann['end']:
                ann['start'] = prev_ann['end']

    
    # Remove temporary keys
    for ann in annotations:
        del ann['duration']

    return annotations

def format_annotation(annotation):
    # Use regex to find and replace ADD-TO-SIGNBANK(...) wrapper
    pattern = r'ADD-TO-SIGNBANK\((.*?)\)'    
    def replacement(match):
        return match.group(1).strip()
    annotation = re.sub(pattern, replacement, annotation)
    
    # Replace pronouns with more english translations
    # Personal Pronouns:
    
    
    annotation = annotation.replace("PT:PRO1SG", "I/me")
    annotation = annotation.replace("PT:PRO2SG", "you")
    annotation = annotation.replace("PT:PRO3SG", "he/she/it")
    annotation = annotation.replace("PT:PRO1PL", "we/us")
    annotation = annotation.replace("PT:PRO2PL", "you(pl)")
    annotation = annotation.replace("PT:PRO3PL", "they/them")

    annotation = annotation.replace("PRO1SG", "I/me")
    annotation = annotation.replace("PRO2SG", "you")
    annotation = annotation.replace("PRO3SG", "he/she/it")
    annotation = annotation.replace("PRO1PL", "we/us")
    annotation = annotation.replace("PRO2PL", "you(pl)")
    annotation = annotation.replace("PRO3PL", "they/them")
    # Possessive Pronouns:
    annotation = annotation.replace("POSS1SG", "my/mine")
    annotation = annotation.replace("POSS2SG", "your/yours")
    annotation = annotation.replace("POSS3SG", "his/her/it")
    annotation = annotation.replace("POSS1PL", "our/ours")
    annotation = annotation.replace("POSS2PL", "your/yours")
    annotation = annotation.replace("POSS3PL", "their/theirs")
    annotation = annotation.replace("PT:my/mine", "my/mine")

    annotation = annotation.replace("PT:BODY","(points to body)") #  Point to a body part
    annotation = annotation.replace("PT:LBUOY","(points to list)") # Point to a list buoy
    annotation = annotation.replace("PT:FBUOY","(points to fragment)") # Point to a fragment buoy
    annotation = annotation.replace("PT:BUOY","(points)") # Point to a buoy (of unspecified type)*
    annotation = annotation.replace("PT:", "(points)")

   # annotation = annotation.replace("LBUOY-ONE", "(list)")
   # annotation = annotation.replace("LBUOY-TWO", "(list)")
   # annotation = annotation.replace("LBUOY-THREE", "(list)")

#    if "BUOY" in annotation:
 #       print(annotation)
    

    # Remove number suffixes
    annotation = re.sub(r'\d+$', '', annotation)
   
    return annotation

            
def get_tier_names(eaf_filename):
    """
    Returns a list of tier names from an EAF file.
    
    Parameters:
    eaf_filename (str): Path to the EAF file
    
    Returns:
    list: List of tier names
    """
    try:
        # Load the EAF file
        eaf = pympi.Elan.Eaf(eaf_filename)
        
        # Get tier names
        tier_names = eaf.get_tier_names()
        
        return tier_names
    
    except Exception as e:
        print(f"Error reading EAF file: {str(e)}")
        return []


def eaf_to_srt_combined(eaf_file, srt_file, offset):

    eaf = pympi.Elan.Eaf(eaf_file)
    
    # Collect all annotations from relevant tiers
    annotations = []
    for tier_name in ['RH-IDgloss', 'LH-IDgloss', 'Free Translation']:
        for annotation in eaf.get_annotation_data_for_tier(tier_name):
            annotations.append({
                'tier': tier_name,
                'start': annotation[0],
                'end': annotation[1],
                'text': format_annotation(annotation[2])
            })
    
    # Sort annotations by start time
    annotations.sort(key=lambda x: x['start'])
    
    # Group annotations by sentence
    grouped_annotations = []
    current_group = {'start': None, 'end': None, 'annotations': []}
    for annotation in annotations:
        if current_group['start'] is None:
            current_group['start'] = annotation['start']
            current_group['end'] = annotation['end']
        if annotation['start'] <= current_group['end']:
            current_group['end'] = max(current_group['end'], annotation['end'])
            current_group['annotations'].append(annotation)
        else:
            grouped_annotations.append(current_group)
            current_group = {'start': annotation['start'], 'end': annotation['end'], 'annotations': [annotation]}
    if current_group['annotations']:
        grouped_annotations.append(current_group)
    
    # Write to SRT file
    overall_start_time = None
    overall_end_time = None
    with open(srt_file, 'w', encoding='utf-8') as f:
        for index, group in enumerate(grouped_annotations, 1):
            start_time = group['start'] / 1000
            end_time = group['end'] / 1000
            
            if overall_start_time is None:
                overall_start_time = start_time
            overall_end_time = end_time
            
            en_text = next((ann['text'] for ann in group['annotations'] if ann['tier'] == 'Free Translation'), '')
            bsl_annotations = [ann for ann in group['annotations'] if ann['tier'] in ['RH-IDgloss', 'LH-IDgloss']]
            
            parsed_bsl_annotations = []
            for bsl_ann in bsl_annotations:
                if bsl_ann['tier'] == 'RH-IDgloss':
                    rh_gloss = bsl_ann['text']
                    lh_gloss = next((ann['text'] for ann in bsl_annotations if ann['tier'] == 'LH-IDgloss' and ann['start'] == bsl_ann['start']), '')
                else:
                    lh_gloss = bsl_ann['text']
                    rh_gloss = next((ann['text'] for ann in bsl_annotations if ann['tier'] == 'RH-IDgloss' and ann['start'] == bsl_ann['start']), '')
                
                parsed_bsl = parse_bsl_annotation(lh_gloss, rh_gloss, en_text)
                if parsed_bsl:
                    parsed_bsl_annotations.append(parsed_bsl)
            
            text = en_text
            if parsed_bsl_annotations:
                text += ' | ' + ' '.join(parsed_bsl_annotations)
            
            f.write(f"{index}\n")
            f.write(f"{format_time(start_time, offset)} --> {format_time(end_time, offset)}\n")
            f.write(f"{text}\n\n")
    
    print(f"Found {printable_time(overall_start_time, overall_end_time)} of subtitles from {format_time(overall_start_time, offset)} to {format_time(overall_end_time, offset)} ")

    
def parse_bsl_annotation(lh_gloss, rh_gloss, full_sentence):
    # Implement your custom logic here
    print( full_sentence)
    print(lh_gloss, rh_gloss)
    print("")
    # This is just a placeholder implementation
    if "BUOY" in lh_gloss or "BUOY" in rh_gloss:
        return f"(points) {lh_gloss} {rh_gloss}"
    elif "PRO" in lh_gloss or "PRO" in rh_gloss:
        return f"(pronoun) {lh_gloss} {rh_gloss}"
    else:
        return f"{lh_gloss} {rh_gloss}".strip()

    
def printable_time(start_time, end_time):
    seconds = end_time-start_time
    if (seconds < 60):
        return f"{seconds:.0f} seconds"
    else:
        minutes = seconds/60.0
        return  f"{minutes:.2f} minutes"
    
def eaf_to_srt(eaf_file, srt_file, tier_name, offset):
    eaf = pympi.Elan.Eaf(eaf_file)
    overall_start_time=None
    overall_end_time=None
    
    with open(srt_file, 'w', encoding='utf-8') as f:
        index = 1
        annotations = eaf.get_annotation_data_for_tier(tier_name)
        for annotation in annotations:
            start_time = annotation[0] / 1000
            end_time = annotation[1] / 1000
            text = format_annotation(annotation[2])
            
            if overall_start_time is None:
                overall_start_time = start_time
            overall_end_time = end_time            
            
            f.write(f"{index}\n")
            f.write(f"{format_time(start_time, offset)} --> {format_time(end_time, offset)}\n")
            f.write(f"{text}\n\n")
            index += 1
    
    print(f"Found {printable_time(overall_start_time, overall_end_time)} of subtitles from {format_time(overall_start_time, offset)} to {format_time(overall_end_time, offset)} ")

def process_file(eaf_file):
    srt_file_combined = eaf_file.replace(".eaf", ".combined.srt")

    tiers = get_tier_names(eaf_file)
    offsets = get_all_time_offsets(eaf_file)
    offset = offsets[0] if offsets else 0

    print("Using time offset:", offset)

    print("Extracting combined subtitles")
    eaf_to_srt_combined(eaf_file, srt_file_combined, offset)

process_file('inputs/BF10n.eaf')

Using time offset: 2960
Extracting combined subtitles
Okay?
 GOOD

Okay?
 G:HEY

Right, hey.
 G:HEY

Right, hey.
 GOOD

Right, hey.
 I/me

Right, hey.
 G:HEY

Right, hey.
 I/me

Right, hey.
 I/me

Right, hey.
 I/me

Right, hey.
 I/me

Right, hey.
 I/me

Right, hey.
PREGNANT EXPECT

Right, hey.
PREGNANT EXPECT

I gave birth to a baby boy.
BORN BORN

I gave birth to a baby boy.
BORN BORN

I gave birth to a baby boy.
AT-LAST AT-LAST

I gave birth to a baby boy.
AT-LAST AT-LAST

I gave birth to a baby boy.
BABY BABY

I gave birth to a baby boy.
BABY BABY

I gave birth to a baby boy.
 BOY

I gave birth to a baby boy.
AND AND

I gave birth to a baby boy.
AND AND

I gave birth to a baby boy.
?EXPAND ?EXPAND

I gave birth to a baby boy.
?EXPAND ?EXPAND

I gave birth to a baby boy.
FS:MID^SPOUSE FS:MID^SPOUSE

I gave birth to a baby boy.
FS:MID^SPOUSE FS:MID^SPOUSE

I gave birth to a baby boy.
VISIT VISIT

I gave birth to a baby boy.
VISIT VISIT

I gave birth to a baby boy.
VISIT VISIT

I gave 

In [8]:
import glob, os

for file in glob.glob('inputs/*.eaf'):
    print("-"*16)
    print(file)
    process_file(file)
    

----------------
inputs/BF24n.eaf
Using time offset: 4720
Extracting combined subtitles
Right!
GOOD GOOD

Right!
GOOD GOOD

A really horrible thing happened to me at home.
TRUE TRUE

A really horrible thing happened to me at home.
TRUE TRUE

A really horrible thing happened to me at home.
HORRIBLE HORRIBLE

A really horrible thing happened to me at home.
HORRIBLE HORRIBLE

A really horrible thing happened to me at home.
HAPPEN HAPPEN

A really horrible thing happened to me at home.
HAPPEN HAPPEN

A really horrible thing happened to me at home.
 my/mine

A really horrible thing happened to me at home.
HOUSE HOUSE

A really horrible thing happened to me at home.
HOUSE HOUSE

A really horrible thing happened to me at home.
 LIVE

A really horrible thing happened to me at home.
 (points)LOC

My wife and 2 children went to stay with her parents for 2 weeks.
 I/me

My wife and 2 children went to stay with her parents for 2 weeks.
SPOUSE SPOUSE

My wife and 2 children went to stay with her pa


It was't that bad and the snow was only this deep.
 I/me

It was't that bad and the snow was only this deep.
DRIVE DRIVE

It was't that bad and the snow was only this deep.
DRIVE DRIVE

It was't that bad and the snow was only this deep.
NO NO

It was't that bad and the snow was only this deep.
NO NO

It was't that bad and the snow was only this deep.
BAD BAD

It was't that bad and the snow was only this deep.
BAD BAD

It was't that bad and the snow was only this deep.
LITTLE 

It was't that bad and the snow was only this deep.
 THREE

It was't that bad and the snow was only this deep.
HOUR HOUR2 

It was't that bad and the snow was only this deep.
HOUR HOUR2 

It was't that bad and the snow was only this deep.
FROM FROM

It was't that bad and the snow was only this deep.
FROM FROM

It was't that bad and the snow was only this deep.
BELFAST BELFAST

It was't that bad and the snow was only this deep.
BELFAST BELFAST

It was't that bad and the snow was only this deep.
FS:TO(TOO) FS:TO(TO

Using time offset: 88
Extracting combined subtitles

G:RIGHT 

So, I started to learn to drive when I was 17.
FS:S(FALSE-START) FS:S(FALSE-START)

So, I started to learn to drive when I was 17.
FS:S(FALSE-START) FS:S(FALSE-START)

So, I started to learn to drive when I was 17.
 I/me

So, I started to learn to drive when I was 17.
LEARN LEARN

So, I started to learn to drive when I was 17.
LEARN LEARN

So, I started to learn to drive when I was 17.
DRIVE/VEHICLE DRIVE/VEHICLE

So, I started to learn to drive when I was 17.
DRIVE/VEHICLE DRIVE/VEHICLE

So, I started to learn to drive when I was 17.
START ADD-TO-SIGBNBANK(START3)

So, I started to learn to drive when I was 17.
START ADD-TO-SIGBNBANK(START3)

So, I started to learn to drive when I was 17.
 AGE02-SEVENTEEN(FALSE-START)

So, I started to learn to drive when I was 17.
 SEVENTEEN

So, I started to learn to drive when I was 17.
DRIVE DRIVE

So, I started to learn to drive when I was 17.
DRIVE DRIVE

Also, I passed my theory tes

Using time offset: 815
Extracting combined subtitles

GOOD 


 GOOD


G:DISMISS 


 G:DISMISS

I was not born Deaf.
I/me 

I was not born Deaf.
I/me 

I was not born Deaf.
BORN 

I was not born Deaf.
 BORN

I was not born Deaf.
DEAF 

I was not born Deaf.
I/me 

I was not born Deaf.
NO 

I was two, and it was through having meningitis.
I/me 

I was two, and it was through having meningitis.
I/me 

I was two, and it was through having meningitis.
I/me 

I was two, and it was through having meningitis.
AGE02-TWO 

I was two, and it was through having meningitis.
ADD-TO-SIGNBANKMENINGITISb(FALSE-START) 

I was two, and it was through having meningitis.
THROUGH 

I was two, and it was through having meningitis.
MENINGITISb 

I was two, and it was through having meningitis.
I/me 

I was two.
ADD-TO-SIGNBANK(AGE02-TWO 

I was two.
(points)LOC (points)LOC

I was two.
(points)LOC (points)LOC

I was two.
I/me 

I was two.
OVER-TIME OVER-TIME

I was two.
OVER-TIME OVER-TIME

I was two.
my/mine 


BEEN 

OK!
 I/me

OK!
 TWO

OK!
WEEK 

OK!
 WEEK

OK!
 PAST

OK!
 I/me

OK!
 HOLIDAY

OK!
WHERE WHERE

OK!
WHERE WHERE

OK!
 I/me

OK!
 PORTUGAL

OK!
TRUE TRUE

OK!
TRUE TRUE

OK!
RELEASE RELEASE

OK!
RELEASE RELEASE

OK!
EVERYTHING EVERYTHING

OK!
EVERYTHING EVERYTHING

OK!
 KNOW

OK!
 WHY

OK!
BAD BAD

OK!
BAD BAD

OK!
(points)LOC (points)LOC

OK!
(points)LOC (points)LOC

OK!
 EXPECT

OK!
 I/me

OK!
GO GO

OK!
GO GO

OK!
 EGYPT

OK!
 I/me


 GOOD

I was in the airport with Aer Lingus.
 I/me

I was in the airport with Aer Lingus.
GO-IN GO-IN

I was in the airport with Aer Lingus.
GO-IN GO-IN

I was in the airport with Aer Lingus.
TAKE-OFF TAKE-OFF

I was in the airport with Aer Lingus.
TAKE-OFF TAKE-OFF

I was in the airport with Aer Lingus.
FS:AER FS:AER

I was in the airport with Aer Lingus.
FS:AER FS:AER

I was in the airport with Aer Lingus.
FS:LINGUS FS:LINGUS

I was in the airport with Aer Lingus.
FS:LINGUS FS:LINGUS

I was in the airport with Aer Lingus.
 GOOD

I was in the air

 GOOD

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
 YESTERDAY(FALSE-START)

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
 (points)LOC

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
WEEKEND WEEKEND

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
WEEKEND WEEKEND

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
 (points)LOC

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
FRIDAY FRIDAY

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
FRIDAY FRIDAY

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
NIGHT NIGHT

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
NIGHT NIGHT

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
OVER-TIME OVER-TIME

At the weekend, Friday night I went to bed late, but woke up at 6 o'clock.
OVER

Using time offset: 10841
Extracting combined subtitles

 BAD


 BAD


BAD 

Oh no!
BAD 

Oh no!
 BAD

Oh!
 BAD

Was there a window open?
 you(pl)

Was there a window open?
 WINDOW

Was there a window open?
OPEN OPEN

Was there a window open?
OPEN OPEN

Was there a window open?
 you


 GOOD


 BAD


 GOOD


 BAD

I do believe they like gold.
 he/she/it

I do believe they like gold.
 LIKE

I do believe they like gold.
 GOLD

I do believe they like gold.
GOLD 

I do believe they like gold.
 I/me

I do believe they like gold.
BELIEVE BELIEVE

I do believe they like gold.
BELIEVE BELIEVE

That's what they say and it must be true!
GOSSIP GOSSIP

That's what they say and it must be true!
GOSSIP GOSSIP

That's what they say and it must be true!
G:WELL G:WELL

That's what they say and it must be true!
G:WELL G:WELL

That's what they say and it must be true!
TRUE TRUE

That's what they say and it must be true!
TRUE TRUE

That's what they say and it must be true!
you 

That's terrible!
 BAD

That


 YES


 YES


 G:WELL


 RIGHT


 G:WELL


 ALRIGHT

Yeah, I do, they're really  nice.
 YES

Yeah, I do, they're really  nice.
 GOOD

Yeah, I do, they're really  nice.
 LOVELY

Yeah, I do, they're really  nice.
 GOOD


 G:WELL


 YES


 LOVELY


 LOOK-GOOD


 COLOUR


 GOOD


 BAD


 BAD


 BAD


 BAD


 he/she/it


 BAD


 GOOD


 LOVELY


 BAD

You must have spoilt her.
DAMAGE DAMAGE

You must have spoilt her.
DAMAGE DAMAGE

You must have spoilt her.
BAD 


GOOD 


 GOOD

That's just so lovely!
TRUE TRUE

That's just so lovely!
TRUE TRUE

That's just so lovely!
TRUE TRUE

That's just so lovely!
TRUE TRUE

That's just so lovely!
 LOVELY


GOOD 


LOVELY 

Oh, me.
G:WELL G:WELL

Oh, me.
G:WELL G:WELL

Oh, me.
 I/me

Well.
 G:WELL

Well.
 I/me

Erm, last year, I ... 
 G:ERM

Erm, last year, I ... 
 I/me

Erm, last year, I ... 
 (points)LOC

Erm, last year, I ... 
YEAR YEAR

Erm, last year, I ... 
YEAR YEAR

Erm, last year, I ... 
 I/me

Erm, last year, I ... 
 GO-TO

It was Summer, dur


I went the 27th August for one week.
 WHEN

I went the 27th August for one week.
 WHAT

I went the 27th August for one week.
 NUMBER

I went the 27th August for one week.
 I/me

I went the 27th August for one week.
GO GO

I went the 27th August for one week.
GO GO

I went the 27th August for one week.
 TWO02^SEVEN

I went the 27th August for one week.
FS:AUGUST(AUG) FS:AUGUST(AUG)

I went the 27th August for one week.
FS:AUGUST(AUG) FS:AUGUST(AUG)

I went the 27th August for one week.
 ONE

I went the 27th August for one week.
WEEK WEEK

I went the 27th August for one week.
WEEK WEEK

I went the 27th August for one week.
INTERVAL INTERVAL

I went the 27th August for one week.
INTERVAL INTERVAL

I went the 27th August for one week.
 I/me

I went the 27th August for one week.
TAKE-OFF TAKE-OFF

I went the 27th August for one week.
TAKE-OFF TAKE-OFF

I went the 27th August for one week.
WITH WITH

I went the 27th August for one week.
WITH WITH

I went the 27th August for one week.
 my/mi

NO NO

But actually I don't feel that I was very close with my family...
NO NO

But actually I don't feel that I was very close with my family...
?APPROACH ?APPROACH

But actually I don't feel that I was very close with my family...
?APPROACH ?APPROACH

But actually I don't feel that I was very close with my family...
I/me 

But actually I don't feel that I was very close with my family...
FEEL 

But actually I don't feel that I was very close with my family...
I/me 

Because when I was away at boarding school we signed, and when I got home I didn't understand what people were saying.
WHY 

Because when I was away at boarding school we signed, and when I got home I didn't understand what people were saying.
I/me 

Because when I was away at boarding school we signed, and when I got home I didn't understand what people were saying.
PERMANENT PERMANENT

Because when I was away at boarding school we signed, and when I got home I didn't understand what people were saying.
PERMANENT PERMANE

I carried on pushing and got sweaty.
G:CA:SHAKES-TOP-TO-COOL-DOWN G:CA:SHAKES-TOP-TO-COOL-DOWN

People were walking past me shivering.
 PEOPLE

People were walking past me shivering.
WALK 

People were walking past me shivering.
 WALK

People were walking past me shivering.
COLD 

People were walking past me shivering.
 COLD

People were walking past me shivering.
 I/me

People were walking past me shivering.
 DSEW(1-VERT)-MOVE:HUMAN

Found 2.26 minutes of subtitles from 00:00:02,960 to 00:02:18,597 
----------------
inputs/BF11n.eaf
Using time offset: 0
Extracting combined subtitles

HERD HERD


HERD HERD

I remember, when I was young.
 I/me

I remember, when I was young.
 REMEMBER

I remember, when I was young.
 WHEN

I remember, when I was young.
 I/me

I remember, when I was young.
YOUNG YOUNG

I remember, when I was young.
YOUNG YOUNG

I remember, when I was young.
 I/me

I was with my husband, before we got married.
WITH WITH

I was with my husband, before we got married.
WITH WI

I want to tell you about my puppy.
 WANT

I want to tell you about my puppy.
 FAMILY

I want to tell you about my puppy.
AT-LAST AT-LAST

I want to tell you about my puppy.
AT-LAST AT-LAST

I want to tell you about my puppy.
HAVE HAVE

I want to tell you about my puppy.
HAVE HAVE

I want to tell you about my puppy.
DSEW(FLAT)-BE:ANIMAL DSEW(FLAT)-BE:ANIMAL

I want to tell you about my puppy.
DSEW(FLAT)-BE:ANIMAL DSEW(FLAT)-BE:ANIMAL

I want to tell you about my puppy.
 ?LAST-WEEK

I want to tell you about my puppy.
GOOD GOOD

I want to tell you about my puppy.
GOOD GOOD

I want to tell you about my puppy.
NEW NEW

I want to tell you about my puppy.
NEW NEW

I want to tell you about my puppy.
DSEW(FLAT)-BE:ANIMAL DSEW(FLAT)-BE:ANIMAL

I want to tell you about my puppy.
DSEW(FLAT)-BE:ANIMAL DSEW(FLAT)-BE:ANIMAL

I want to tell you about my puppy.
 LOOK-GOOD

I want to tell you about my puppy.
DSEW(FLAT)-BE:ANIMAL DSEW(FLAT)-BE:ANIMAL

I want to tell you about my puppy.
DSEW(FLAT)-BE:ANIM

In [9]:


# Example usage
annotations = [
    {'start': 1.0, 'end': 2.0, 'rh_text': 'Hello', 'lh_text': 'World'},
    {'start': 2.5, 'end': 3.5, 'rh_text': 'How', 'lh_text': 'are you?'},
    {'start': 5, 'end': 5.01, 'rh_text': 'wibble', 'lh_text': ''},
    {'start': 6.0, 'end': 7.0, 'rh_text': 'Good', 'lh_text': 'morning'}
]

extended_annotations = extend_annotations_with_priority(annotations)
for annotation in extended_annotations:
    print(annotation)


{'start': 0.8999999999999999, 'end': 2.45, 'rh_text': 'Hello', 'lh_text': 'World'}
{'start': 2.45, 'end': 4.0, 'rh_text': 'How', 'lh_text': 'are you?'}
{'start': 4.655, 'end': 5.755, 'rh_text': 'wibble', 'lh_text': ''}
{'start': 5.9, 'end': 7.5, 'rh_text': 'Good', 'lh_text': 'morning'}


In [12]:
import re
from datetime import datetime, timedelta

def parse_srt(file_path):
    subtitles = []
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        for i in range(0, len(lines), 4):
            if i + 2 < len(lines):
                time_range = lines[i + 1].strip()
                text = lines[i + 2].strip()
                start, end = time_range.split(' --> ')
                start_time = datetime.strptime(start, '%H:%M:%S,%f')
                end_time = datetime.strptime(end, '%H:%M:%S,%f')
                subtitles.append({
                    'start': start_time,
                    'end': end_time,
                    'text': text
                })
    return subtitles

def process_subtitles(english_srt, bsl_srt):
    english_subtitles = parse_srt(english_srt)
    bsl_subtitles = parse_srt(bsl_srt)
    
    output = []
    bsl_index = 0
    
    for english_sub in english_subtitles:
        output.append(f"- {english_sub['text']}")
        
        while bsl_index < len(bsl_subtitles):
            bsl_sub = bsl_subtitles[bsl_index]
            if bsl_sub['start'] < english_sub['end'] and bsl_sub['end'] > english_sub['start']:
                output.append(f"- {bsl_sub['text']}")
                bsl_index += 1
            else:
                break
    
    # Add any remaining BSL subtitles
    while bsl_index < len(bsl_subtitles):
        output.append(f"- {bsl_subtitles[bsl_index]['text']}")
        bsl_index += 1
    
    return '\n'.join(output)

# Usage
english_srt = 'inputs/BF1n.en.srt'
bsl_srt = 'inputs/BF1n.bsl.srt'
result = process_subtitles(english_srt, bsl_srt)
print(result)


- Are we ready?
- GOOD
- I want to tell you about my puppy.
- I/me
- EXPLAIN
- ABOUT
- my/mine
- FS:PUPPY
- DSEW(FLAT)-BE:ANIMAL
- My family got a puppy last year.
- my/mine
- WANT
- FAMILY
- AT-LAST
- HAVE
- DSEW(FLAT)-BE:ANIMAL
- ?LAST-WEEK
- GOOD
- A new puppy, it's lovely.
- NEW
- DSEW(FLAT)-BE:ANIMAL
- LOOK-GOOD
- DSEW(FLAT)-BE:ANIMAL
- G:WELL
- My Dad had wanted a dog for a very long time
- ?LAST-WEEK
- TRUE
- my/mine
- FATHER
- ALWAYS
- WANT
- | WANT
- WANT
- | WANT
- DOG
- WANT
- | WANT
- SINCE
- Mum had said "no, no, no, no".
- my/mine
- MOTHER
- ALWAYS
- NO
- NO
- NO
- NO
- NO
- FATHER
- My Dad had been very patient
- BEHAVIOUR
- My Sister said to our Mum, "It's not fair, Dad wants a dog"
- my/mine
- SISTER
- SAY
- NO
- EQUAL
- my/mine
- FATHER
- WANT
- DOG
- G:WELL
- My Mum still wasn't sure but then things settled down.
- my/mine
- MOTHER
- G:ERM
- G:WELL
- SAME
- SETTLE
- SETTLE
- my/mine
- My sister got married and moved to England.
- SISTER
- MARRY
- MOVE
- SN:ENGLAND(RO