# Translating Subtitles

In [1]:
import sys, os
import pandas as pd

project_root = os.path.abspath("..")  # Assuming the notebook is in a subfolder
sys.path.append(project_root)

from config import *

In [3]:
sub_tsv = r'..\files\arjan\This Is How Marie Kondo Sets up Her Pytest_jxqGsJEhiAg.en.tsv'
sub_tsv

'..\\files\\arjan\\This Is How Marie Kondo Sets up Her Pytest_jxqGsJEhiAg.en.tsv'

In [13]:
df = pd.read_csv(sub_tsv, sep='\t')
df

Unnamed: 0,Start Time,End Time,Text
0,00:00:00.000,00:00:06.380,Today I'm going to walk you through how to set...
1,00:00:07.200,00:00:09.800,I'll start with code that has zero tests
2,00:00:09.800,00:00:15.640,"I'll add PyTest, show you step-by-step how to ..."
3,00:00:16.000,00:00:20.480,Including one feature almost no one knows abou...
4,00:00:20.480,00:00:25.959,I'll share some practical tips to make your te...
...,...,...,...
231,00:15:47.219,00:15:49.138,"If you want to learn more about what to test,"
232,00:15:49.138,00:15:52.378,"how to set up a complete set of unit tests,"
233,00:15:52.378,00:15:55.459,check out this video where I do a refactoring ...
234,00:15:55.459,00:15:58.698,and use unit tests to create a safety net.


## Obtaining translations from Yandex

In [7]:
import requests as rq
import json

In [19]:
mid = len(df) // 2
mid

118

In [20]:
texts = df[:mid].Text.to_list()
texts

["Today I'm going to walk you through how to set up PyTest from scratch in a really simple minimalistic way, Marie Kondo style",
 "I'll start with code that has zero tests",
 "I'll add PyTest, show you step-by-step how to write unit tests and even cover some advanced features along the way",
 "Including one feature almost no one knows about that's actually really cool. At the end",
 "I'll share some practical tips to make your tests cleaner faster and more maintainable. Let's get started",
 "Obviously, the first thing that you're going to need to do is install PyTest. You can install it using any package manager",
 "But in this video, I'm going to use uv which is fast and efficient. To add PyTest to your Py project file",
 'simply type uv add',
 'dash test dev PyTest',
 'This dev flag here ensures that PyTest is only installed in development mode, not in production',
 'You can also see that when you take a look at the Py project file',
 "So here we have dependencies, the regular depend

In [21]:
source_language = 'en'
target_language = 'ru'

body = {
    "sourceLanguageCode": source_language,
    "targetLanguageCode": target_language,
    "texts": texts,
    "folderId": y_folder_ID
}

headers = {
    "Content-Type": "application/json",
    "Authorization": "Api-Key {:s}".format(y_key)
}

response = rq.post(
    'https://translate.api.cloud.yandex.net/translate/v2/translate',
    json = body,
    headers = headers
)

print(response.status_code)

200


In [24]:
translations = response.json()['translations']
translations

[{'text': 'Сегодня я расскажу вам о том, как настроить PyTest с нуля очень простым минималистичным способом в стиле Мари Кондо'},
 {'text': 'Я начну с кода, в котором нет тестов'},
 {'text': 'Я добавлю PyTest, покажу вам шаг за шагом, как писать модульные тесты, и даже расскажу о некоторых расширенных функциях на этом пути'},
 {'text': 'Включая одну функцию, о которой почти никто не знает, и которая на самом деле очень крутая. В конце'},
 {'text': 'Я поделюсь несколькими практическими советами, которые помогут сделать ваши тесты более понятными, быстрыми и удобными в обслуживании. Давайте начнем'},
 {'text': 'Очевидно, что первое, что вам нужно будет сделать, это установить PyTest. Вы можете установить его с помощью любого менеджера пакетов'},
 {'text': 'Но в этом видео я собираюсь использовать uv, который работает быстро и эффективно. Как добавить PyTest в ваш файл проекта Py'},
 {'text': 'просто введите uv add'},
 {'text': 'dash-тест dev PyTest'},
 {'text': 'Этот флаг разработчика зд

In [27]:
# Extend the translations list to match the length of the DataFrame
extended_translations = [t['text'] for t in translations] + [None] * (len(df) - len(translations))

# Add the extended translations as a new column
df['Yandex'] = extended_translations
df

Unnamed: 0,Start Time,End Time,Text,Yandex
0,00:00:00.000,00:00:06.380,Today I'm going to walk you through how to set...,"Сегодня я расскажу вам о том, как настроить Py..."
1,00:00:07.200,00:00:09.800,I'll start with code that has zero tests,"Я начну с кода, в котором нет тестов"
2,00:00:09.800,00:00:15.640,"I'll add PyTest, show you step-by-step how to ...","Я добавлю PyTest, покажу вам шаг за шагом, как..."
3,00:00:16.000,00:00:20.480,Including one feature almost no one knows abou...,"Включая одну функцию, о которой почти никто не..."
4,00:00:20.480,00:00:25.959,I'll share some practical tips to make your te...,"Я поделюсь несколькими практическими советами,..."
...,...,...,...,...
231,00:15:47.219,00:15:49.138,"If you want to learn more about what to test,",
232,00:15:49.138,00:15:52.378,"how to set up a complete set of unit tests,",
233,00:15:52.378,00:15:55.459,check out this video where I do a refactoring ...,
234,00:15:55.459,00:15:58.698,and use unit tests to create a safety net.,


In [28]:
out_tsv = sub_tsv.replace('.en.tsv', '.en-ru.tsv')
out_tsv

'..\\files\\arjan\\This Is How Marie Kondo Sets up Her Pytest_jxqGsJEhiAg.en-ru.tsv'

In [29]:
df.to_csv(out_tsv, sep='\t', index=False)

## Converting human-processed translation into .vtt

In [4]:
translated_tsv = r'..\files\arjan\This Is How Marie Kondo Sets up Her Pytest_jxqGsJEhiAg.en_pruned_mm.tsv.txt'
translated_tsv

'..\\files\\arjan\\This Is How Marie Kondo Sets up Her Pytest_jxqGsJEhiAg.en_pruned_mm.tsv.txt'

In [11]:
out_df = pd.read_csv(translated_tsv, sep='\t')
out_df

Unnamed: 0,Start Time,End Time,Text,MM
0,00:00:00.000,00:00:06.380,Today I'm going to walk you through how to set...,"Сегодня я продемонстрирую, как настроить PyTes..."
1,00:00:07.200,00:00:09.800,I'll start with code that has zero tests,"Начав с кода вообще без тестов,"
2,00:00:09.800,00:00:15.640,"I'll add PyTest, show you step-by-step how to ...","я установлю PyTest, а затем в пошаговом режиме..."
3,00:00:16.000,00:00:20.480,Including one feature almost no one knows abou...,"включая одну реально крутую фишку, о которой п..."
4,00:00:20.480,00:00:25.959,I'll share some practical tips to make your te...,"и дам несколько полезных подсказок, как сделат..."
...,...,...,...,...
231,00:15:47.219,00:15:49.138,"If you want to learn more about what to test,","При желании узнать больше о том, что именно тр..."
232,00:15:49.138,00:15:52.378,"how to set up a complete set of unit tests,",и как сформировать исчерпывающий набор модульн...
233,00:15:52.378,00:15:55.459,check out this video where I do a refactoring ...,посмотрите вот это видео. В нем я выполняю зад...
234,00:15:55.459,00:15:58.698,and use unit tests to create a safety net.,и использую модульные тесты для подстраховки.


In [None]:
def save_to_vtt(df, output_file="output.vtt"):
    """Save a Pandas DataFrame back to a .vtt subtitle file. (suggested by ChatGPT)"""
    with open(output_file, "w", encoding="utf-8") as f:
        f.write("WEBVTT\n\n")  # VTT header
        
        for _, row in df.iterrows():
            f.write(f"{row['Start Time']} --> {row['End Time']}\n")
            f.write(f"{row['Text']}\n\n")  # Double new line between captions

In [12]:
out_df.drop(columns=['Text'], inplace=True)
out_df.rename(columns={'MM': 'Text'}, inplace=True)
out_df

Unnamed: 0,Start Time,End Time,Text
0,00:00:00.000,00:00:06.380,"Сегодня я продемонстрирую, как настроить PyTes..."
1,00:00:07.200,00:00:09.800,"Начав с кода вообще без тестов,"
2,00:00:09.800,00:00:15.640,"я установлю PyTest, а затем в пошаговом режиме..."
3,00:00:16.000,00:00:20.480,"включая одну реально крутую фишку, о которой п..."
4,00:00:20.480,00:00:25.959,"и дам несколько полезных подсказок, как сделат..."
...,...,...,...
231,00:15:47.219,00:15:49.138,"При желании узнать больше о том, что именно тр..."
232,00:15:49.138,00:15:52.378,и как сформировать исчерпывающий набор модульн...
233,00:15:52.378,00:15:55.459,посмотрите вот это видео. В нем я выполняю зад...
234,00:15:55.459,00:15:58.698,и использую модульные тесты для подстраховки.


In [15]:
out_vtt = translated_tsv.replace('.en_pruned_mm.tsv.txt', '.mm.ru.vtt')
out_vtt

'..\\files\\arjan\\This Is How Marie Kondo Sets up Her Pytest_jxqGsJEhiAg.mm.ru.vtt'

In [16]:
save_to_vtt(out_df, out_vtt)

In [16]:
response.text

'{\n "code": 3,\n "message": "total texts length must be not greater than 10000",\n "details": [\n  {\n   "@type": "type.googleapis.com/google.rpc.RequestInfo",\n   "requestId": "d75c2d53-9585-4e38-8181-e12e4d7ac638"\n  }\n ]\n}\n'

In [17]:
sum([len(text) for text in texts])

16026