# ***Problem:***
The .srt file containing the dialogues for the movie "Rosemary's Baby" have dialogues that do not properly sync with the movie video. This notebook contains codes in python that can help solve this issue.

In [None]:
# importing all the necessary libraries
from datetime import datetime, timedelta
from google.colab import files
import pandas as pd

In [None]:
# reading the .csv file
dialogues = pd.read_csv('old_dialogues.csv', names=['old_dialogue_column'], header=None)

In [None]:
# displaying the details of the DataFrame
dialogues.info()

In [None]:
# displaying the contents of the DataFrame
dialogues

In [None]:
# finding the number of null/NaN values (if any)
dialogues.isnull().sum()

In [None]:
'''
Function Name: no_letters
Description: To check if the given text does not contain any letters
Parameters:
        text: The text to be checked
Returns:
        True if the text does not contain any letters, False otherwise
'''
def no_letters(text):
    for x in text:
        if x.isalpha(): return False
    return True

# ***Some important observations before implementation:***

* The time stamps in the movie are represented as {"start-time" --> "end-time"}.

* Hence, we search for entries where '-->' is present, excluding any letters (a safe way to avoid dialogues).

* We also know the position indices for the timestamps, as the format for the timestamps is fixed.

            Format:
                        HH:MM:SS,sss --> HH:MM:SS,sss
            
            Where:            
                H - Hours
                M - Minutes
                S - Seconds
                s - Milliseconds

* We also know that the delay in the syncing of the dialogues and the movie video is by 20 seconds.

In [None]:
for x in range(len(dialogues)):

    # checking if the row entry contains the timestamps or not
    if '-->' in dialogues.loc[x, 'old_dialogue_column'] and no_letters(dialogues.loc[x, 'old_dialogue_column']):

        # extracting the start & end timestamps from the entry
        diag_time1 = datetime.strptime(dialogues.loc[x, 'old_dialogue_column'][:8], '%H:%M:%S')
        diag_time2 = datetime.strptime(dialogues.loc[x, 'old_dialogue_column'][17:25], '%H:%M:%S')

        # the dialogues in the subtitles script is delayed by 20 seconds, so duration = 20 seconds
        duration = timedelta(seconds=20)

        # adding the delay shift to the actual timestamps
        new_time1 = str((diag_time1 + duration).time())
        new_time2 = str((diag_time2 + duration).time())

        # adding the new timestamps to a new column
        dialogues.loc[x, 'new_dialogue_column'] = new_time1 + dialogues.loc[x, 'old_dialogue_column'][8:17]\
                                                + new_time2 + dialogues.loc[x, 'old_dialogue_column'][25:]
    else:
        # incase of a non-timestamp entry, we keep the values as it is in the new column
        dialogues.loc[x, 'new_dialogue_column'] = dialogues.loc[x, 'old_dialogue_column']

In [None]:
del dialogues['old_dialogue_column'] # deleting the original column

In [None]:
# converting the modified DataFrame to a .csv file
dialogues.to_csv('new_dialogues.csv')

# downloading the new & in-sync dialogues file
files.download('new_dialogues.csv')