# Transcribe a YouTube Video in English with Assembly AI and yt-dlp

### References:

- [Assembly AI documentation](https://www.assemblyai.com/docs)
- [yt-dlp on GitHub](https://github.com/yt-dlp/yt-dlp)

## Preparation

#### Imports and Globals

In [1]:
import assemblyai as aai
import yt_dlp
import json

from config import *

aai.settings.api_key = aai_key
YT_BASE_URL = 'https://www.youtube.com/watch?v='
DST_FOLDER = 'files'

#### Task-specific Variables

In [2]:
# title = 'The Power To OverRule G-d'  # actually not needed as yt-dlp can obtain it from the video
v_id = 'wKS2sFRI87w'  # the main identifier of the video, absolutely needeed

#### Pull and save the soundtrack with yt-dlp

In [24]:
url = f'{YT_BASE_URL}{v_id}'

ydl_opts = {
    'format': 'm4a/bestaudio/best',  # The best audio version in m4a format
    'outtmpl': f'{DST_FOLDER}/%(title)s_%(id)s.%(ext)s',  
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(url)
    audio_file = ydl.prepare_filename(info)

print(f'\n>>> Downloaded to: {audio_file}')

[youtube] Extracting URL: https://www.youtube.com/watch?v=wKS2sFRI87w
[youtube] wKS2sFRI87w: Downloading webpage
[youtube] wKS2sFRI87w: Downloading tv client config
[youtube] wKS2sFRI87w: Downloading player c548b3da
[youtube] wKS2sFRI87w: Downloading tv player API JSON
[youtube] wKS2sFRI87w: Downloading ios player API JSON
[youtube] wKS2sFRI87w: Downloading m3u8 information
[info] wKS2sFRI87w: Downloading 1 format(s): 140
[download] files\The Power To OverRule G-d：_wKS2sFRI87w.m4a has already been downloaded
[download] 100% of   23.92MiB

>>> Downloaded to: files\The Power To OverRule G-d：_wKS2sFRI87w.m4a


## Building the transcript via AssemblyAI API

In [None]:
config = aai.TranscriptionConfig(language_detection=True)

In [27]:
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(audio_file, config)
print(transcript.status)

TranscriptStatus.completed


In [20]:
file_name = audio_file.split("\\")[-1].split(".")[0]
file_name

'The Power To OverRule G-d：_wKS2sFRI87w'

In [21]:
json.dump(transcript.json_response, 
          open(f'files/transcript_{file_name}_{transcript.id}.json', 'w', encoding='utf-8'), 
          indent=4, 
          ensure_ascii=False
)

with open(f'files/transcript_{file_name}_{transcript.id}.txt', 'w', encoding='utf-8') as f:
    f.write(transcript.text)

## Working with the transcript

In [None]:
# load the transxript by id if necessary
job_id = '1dc162c7-0114-4698-be0a-8259ad1e7edf'

transcript = aai.Transcript.get_by_id(job_id)
transcript.status

<TranscriptStatus.completed: 'completed'>

### Search for word

In [32]:
words = transcript.json_response['words']
words

[{'text': 'Good',
  'start': 1440,
  'end': 1552,
  'confidence': 0.99349,
  'speaker': None,
  'channel': None},
 {'text': 'morning',
  'start': 1552,
  'end': 1736,
  'confidence': 0.99996,
  'speaker': None,
  'channel': None},
 {'text': 'and',
  'start': 1760,
  'end': 1912,
  'confidence': 0.99584,
  'speaker': None,
  'channel': None},
 {'text': 'welcome',
  'start': 1936,
  'end': 2136,
  'confidence': 0.67908,
  'speaker': None,
  'channel': None},
 {'text': 'to',
  'start': 2168,
  'end': 2312,
  'confidence': 0.99951,
  'speaker': None,
  'channel': None},
 {'text': 'Worldwide',
  'start': 2336,
  'end': 2776,
  'confidence': 0.54569,
  'speaker': None,
  'channel': None},
 {'text': 'Wisdom.',
  'start': 2808,
  'end': 3192,
  'confidence': 0.98676,
  'speaker': None,
  'channel': None},
 {'text': 'Folks.',
  'start': 3256,
  'end': 3544,
  'confidence': 0.99498,
  'speaker': None,
  'channel': None},
 {'text': 'Today',
  'start': 3592,
  'end': 3800,
  'confidence': 0.99831,