# Project - YouTube Videos Summary Generator

### OpenAI Authentication

In [1]:
import openai
import os

In [2]:
openai.api_key = os.getenv('OPENAI_API_KEY')

### import custom package and setup path for it

In [3]:
import sys
from pathlib import Path

# in jupyter (lab / notebook), based on notebook path

# print(f"Path.cwd(): {Path.cwd()}")
module_path = str(Path.cwd().parents[0])

if module_path not in sys.path:
    sys.path.append(module_path)

from common.usage import print_token_usage

In [4]:
# !pip -q install pytube

In [5]:
# downloading the YouTube audio stream
def youtube_audio_downloader(link):
    from pytube import YouTube
    import os
    import re
    if 'youtube.com' not in link:
        print('Invalid YouTube link!')
        return False
    
    yt = YouTube(link)
    
    audio = yt.streams.filter(only_audio=True).first()
    print('Downloading the audio stream ...', end='')
    output_file = audio.download()
    if os.path.exists(output_file):
        print('Done!')
    else:
        print('Error downloading the file!')
        return False
    
    basename = os.path.basename(output_file)
    name, extension = os.path.splitext(basename)
    audio_file = f'{name}.mp3'
    audio_file = re.sub('\s+', '-', audio_file)
    os.rename(basename, audio_file)
    return audio_file


# transcribing the audio_file or translating it to English 
def transcribe(audio_file, not_english=False):
    import os
    import openai
    
    if not os.path.exists(audio_file):
        print('Audio file does not exist!')
        return False
    
    if not_english:  
        # translation to english
        with open(audio_file, 'rb') as f:
            print('Starting translating to English ...', end='')
            transcript = openai.Audio.translate('whisper-1', f)
            print('Done!')
    else:
        # transcription
        with open(audio_file, 'rb') as f:
            print('Starting transcribing ... ', end='')
            transcript = openai.Audio.transcribe('whisper-1', f)
            print('Done!')
        
    name, extension = os.path.splitext(audio_file)
    transcript_filename = f'transcript-{name}.txt'
    with open(transcript_filename, 'w') as f:
        f.write(transcript['text'])
            
    return transcript_filename


# summarizing the transcript using GPT
def summarize(transcript_filename):
    import openai 
    import os
    
    if not os.path.exists(transcript_filename):
        print('The transcript file does not exist!')
        return False
    
    with open(transcript_filename) as f:
        transcript = f.read()
        
    system_prompt = 'I want you to act as a Life Coach.'
    prompt = f'''Create a summary of the following text.
    Text: {transcript}
    
    Add a title to the summary.
    Your summary should be informative and factual, covering the most important aspects of the topic.
    Start your summary with an INTRODUCTION PARAGRAPH that gives an overview of the topic FOLLOWED
    by BULLET POINTS if possible AND end the summary with a CONCLUSION PHRASE.'''
    
    print('Starting summarizing ... ', end='')
    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo',
        messages=[
            {'role': 'system', 'content': system_prompt},
            {'role': 'user', 'content': prompt}
        ],
        max_tokens=2048,
        temperature=1
    )
    
    print('Done')
    print_token_usage(response)
    r = response['choices'][0]['message']['content']
    return r

In [6]:
# link = 'https://www.youtube.com/watch?v=G7KNmW9a75Y' # (403: Fobbiden) Miley Cyrus - Flowers
# link = 'https://www.youtube.com/watch?v=RI3JCq9-bbM'  
# link = 'https://www.youtube.com/watch?v=qs35t2xFqdU' # Pink Floyd
# link = 'https://www.youtube.com/watch?v=lKfhVLqadDQ' # Marie Curie in French
# link = 'https://www.youtube.com/watch?v=NUsoVlDFqZg'
# link = 'https://www.youtube.com/watch?v=IBvg3WeqP1U'
link = 'https://www.youtube.com/watch?v=Jyi1bNnvbCY' # Short TOFLE lecture

mp3_file = youtube_audio_downloader(link)
print(mp3_file)

# the second argument (not_english=True) is necessary if the source file is not in English and needs to be 
# translated as well
transcript_file = transcribe(mp3_file, not_english=False) 
summary = summarize(transcript_file)
print('\n')
print(summary)

Downloading the audio stream ...Done!
TOEFL-Listening-Practice---Lecture-1.mp3
Starting transcribing ... Done!
Starting summarizing ... Done
·Token usage: 619 = 428 + 191 (prompt + completion)


Title: Human Migration throughout History

Introduction:
In this anthropology lecture, the professor discusses the history of human migration. Despite not associating ourselves with migratory animals, humans have always been on the move. The lecture explains some of the earliest and most significant human migrations.

Main Points:
- A few hundred thousand years ago, humans migrated from the savannas of Africa into Europe and Asia as part of the Ice Age cycles.
- Humans were able to survive in the harsh conditions of northern Europe and Asia due to their ability to make shelter and clothing and control fire.
- Later migrations took humans to Japan, Indonesia, Australia, and North America, all in chronological order.
- Human migration often involved displacing or mixing with the existing inhabita