# Workflow of Learning App

## 1. Extract the YouTube ID from a given link

In [1]:
from pytube import extract

urls = [
            'https://www.youtube.com/watch?v=MoqtsYLGCC4&t=505s&pp=ygULcnVzdCBweXRob24%3D',
            'http://www.youtube.com/embed/SA2iWivDJiE',
            'http://www.youtube.com/4380tg8h430hir',
            'https://www.youtube.com/watch?v=358hg580gh9fh05-t240'
]

for url in urls:
    video_id = extract.video_id(url)
    print(video_id)

MoqtsYLGCC4
SA2iWivDJiE
4380tg8h430
358hg580gh9


## 2. Extract & transform video captions

In [2]:
from youtube_transcript_api import YouTubeTranscriptApi
video_id = "MoqtsYLGCC4"
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript

[{'text': 'rust is quickly making a name for itself',
  'start': 0.199,
  'duration': 5.12},
 {'text': 'as a speedy and approachable programming',
  'start': 2.84,
  'duration': 4.519},
 {'text': "language now you've been bothering me",
  'start': 5.319,
  'duration': 3.841},
 {'text': 'about doing a rust video for a while now',
  'start': 7.359,
  'duration': 4.841},
 {'text': 'so you win this video is my initial take',
  'start': 9.16,
  'duration': 5.32},
 {'text': 'on Rust and I might just do more content',
  'start': 12.2,
  'duration': 4.72},
 {'text': 'about it in the future rust has a pretty',
  'start': 14.48,
  'duration': 5.0},
 {'text': 'unique approach to type safety memory',
  'start': 16.92,
  'duration': 4.76},
 {'text': 'management and exception handling and',
  'start': 19.48,
  'duration': 3.799},
 {'text': 'that makes it a Powerhouse for', 'start': 21.68, 'duration': 3.88},
 {'text': 'performance critical applications there',
  'start': 23.279,
  'duration': 4.08},


In [3]:
transcript = " ".join([item["text"] for item in transcript])
transcript



## 3. Feed video captions into LLM (OpenAI)

In [5]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.chains import LLMChain

#########################################
OPENAI_API_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
#########################################


template = f"""
    You are an assistant designed to create questions based on any given text. For each text segment you receive, your task is to generate 5 unique questions. Each question should be paired with 4 possible answers: one correct answer and three incorrect answers.

    To ensure clarity and ease of processing, format your response to resemble a Python list of lists.

    Your response should be structured as follows:

    1. An outer list containing 5 inner lists.
    2. Each inner list should represent a question and its answers, containing exactly 5 strings in this order:
       - The generated question.
       - The correct answer.
       - The first incorrect answer.
       - The second incorrect answer.
       - The third incorrect answer.

    Your response should look like this:
    [
        ["Generated Question 1", "Correct Answer 1", "Incorrect Answer 1.1", "Incorrect Answer 1.2", "Incorrect Answer 1.3"],
        ["Generated Question 2", "Correct Answer 2", "Incorrect Answer 2.1", "Incorrect Answer 2.2", "Incorrect Answer 2.3"],
        ...
    ]

    It is essential to follow this format as it is optimized for subsequent Python processing.
"""

system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}")
chat_prompt = ChatPromptTemplate.from_messages(
    [system_message_prompt, human_message_prompt]
)
chain = LLMChain(
    llm=ChatOpenAI(openai_api_key=OPENAI_API_KEY),
    prompt=chat_prompt,
)
quiz_data = chain.run(transcript)
print(type(quiz_data))
print(quiz_data)

<class 'str'>
[
    ["What type of programming language is Rust known for being?", "Speedy and approachable", "Slow and challenging", "Complex and slow", "Simple and difficult"],
    ["What does Rust use for error handling instead of exceptions?", "Result and option types", "Exceptions and errors", "None and value types", "Errors and exceptions"],
    ["How does Rust handle memory management compared to Python?", "Based on ownership", "Using automatic garbage collection", "Does not handle memory", "Based on exceptions"],
    ["What is a major difference in how Python and Rust handle typing?", "Python is dynamic, Rust is static", "Python is static, Rust is dynamic", "Both are dynamic", "Both are static"],
    ["What feature in Rust helps with code expansion at compile time?", "Macros", "Functions", "Classes", "Variables"]
]


## 4. Transform output

In [6]:
import ast

quiz_data_clean = ast.literal_eval(quiz_data)
print(type(quiz_data_clean))
print(quiz_data_clean)

<class 'list'>
[['What type of programming language is Rust known for being?', 'Speedy and approachable', 'Slow and challenging', 'Complex and slow', 'Simple and difficult'], ['What does Rust use for error handling instead of exceptions?', 'Result and option types', 'Exceptions and errors', 'None and value types', 'Errors and exceptions'], ['How does Rust handle memory management compared to Python?', 'Based on ownership', 'Using automatic garbage collection', 'Does not handle memory', 'Based on exceptions'], ['What is a major difference in how Python and Rust handle typing?', 'Python is dynamic, Rust is static', 'Python is static, Rust is dynamic', 'Both are dynamic', 'Both are static'], ['What feature in Rust helps with code expansion at compile time?', 'Macros', 'Functions', 'Classes', 'Variables']]
