In [None]:
!pip install youtube_transcript_api

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi

In [None]:
def get_video_id(url_link):
    return url_link.split("v=")[-1].split("&")[0]

In [None]:
video_id = get_video_id("https://www.youtube.com/watch?v=0KDdU0DCbJA")

In [None]:
transcript = YouTubeTranscriptApi.get_transcript(video_id)

In [None]:
transcript

In [None]:
transcript_text = " ".join([line['text'] for line in transcript])

In [None]:
transcript_text

In [None]:
!pip install deepmultilingualpunctuation

In [None]:
from deepmultilingualpunctuation import PunctuationModel

# Load model
model = PunctuationModel()

# Restore punctuation
punctuated_text = model.restore_punctuation(transcript_text)

print(punctuated_text)

In [None]:
from transformers import pipeline

summarizer = pipeline("summarization", model="google/pegasus-xsum")
summary = summarizer(punctuated_text[:1024], max_length=120, min_length=30, do_sample=False)
print(summary[0]['summary_text'])

In [None]:
from transformers import pipeline, AutoTokenizer

# Load model and tokenizer
model_name = "google/pegasus-xsum"
summarizer = pipeline("summarization", model=model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Function to split long text into chunks of ≤ 512 tokens
def split_text_into_token_chunks(text, max_tokens=512):
    words = text.split()
    chunk = []
    current_token_count = 0

    for word in words:
        tokenized_word = tokenizer.tokenize(word)
        if current_token_count + len(tokenized_word) > max_tokens:
            yield " ".join(chunk)
            chunk = [word]
            current_token_count = len(tokenized_word)
        else:
            chunk.append(word)
            current_token_count += len(tokenized_word)

    if chunk:
        yield " ".join(chunk)

# Safely summarize each token-limited chunk
summaries = []

for chunk in split_text_into_token_chunks(punctuated_text):
    try:
        # Truncate input again at encoder level just to be sure
        summary = summarizer(
            chunk,
            max_length=120,
            min_length=30,
            do_sample=False,
            truncation=True
        )
        summaries.append(summary[0]['summary_text'])
    except Exception as e:
        print(f"⚠️ Error on chunk: {e}")

# Combine all summaries
final_summary = "\n".join(summaries)
print("\n✅ FINAL SUMMARY:\n")
print(final_summary)

In [None]:
import os
from openai import OpenAI

# Make sure the OPENAI_API_KEY is set in your environment
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    raise ValueError("OPENAI_API_KEY environment variable not set.")

client = OpenAI(
    api_key=api_key
)

# Make sure `prompt` is defined before using it
prompt = "Your prompt goes here"

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt,
        }
    ],
    model="gpt-3.5-turbo",
    temperature=1,
    max_tokens=256,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
)

# Print the assistant's reply
print(chat_completion.choices[0].message.content)



In [None]:
print(chat_completion.choices[0].message.content)

In [None]:
chat_completion