In [18]:
import os
from dotenv import load_dotenv
from openai import OpenAI

In [19]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-;")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [20]:
# This code grabs the auto-generated subtitles, strips out the timestamps, and gives you plain text.
import yt_dlp
import glob
import re

def get_youtube_transcript(url):
    # Create a temporary output directory
    output_dir = "yt_subs"
    os.makedirs(output_dir, exist_ok=True)

    ydl_opts = {
        'skip_download': True,
        'writesubtitles': True,
        'writeautomaticsub': True,
        'subtitlesformat': 'vtt',
        'outtmpl': f'{output_dir}/%(id)s.%(ext)s',
        'quiet': True
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        video_id = info.get("id")

    # Find the .vtt file
    vtt_files = glob.glob(f"{output_dir}/{video_id}*.vtt")
    if not vtt_files:
        raise FileNotFoundError("No .vtt subtitle file found. The video may not have captions.")

    with open(vtt_files[0], 'r', encoding='utf-8') as f:
        vtt = f.read()

    # Remove timestamps
    cleaned = re.sub(r"\d+:\d+\.\d+ --> \d+:\d+\.\d+.*\n?", '', vtt)
    return cleaned.strip()

In [21]:
# newspaper3k is pretty good at finding the actual content of a webpage

from newspaper import Article

def extract_article(url):
    article = Article(url)
    article.download()
    article.parse()
    return article.text

In [22]:
openai = OpenAI()
def summarize_text(text):
    prompt = f"Summarize this:\n\n{text}\n\nSummary:"
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=300
    )
    return response.choices[0].message.content.strip()

In [23]:
yt_text = get_youtube_transcript("https://www.youtube.com/watch?v=M9x_koRZ2bA")
print(yt_text)
print(summarize_text(yt_text))



WEBVTT                                                  
Kind: captions
Language: en

00:00:00.000 --> 00:00:02.500
Narrator: 
"Dark Souls had a story," you ask?

00:00:02.500 --> 00:00:07.000
Indeed! Perhaps if your vision was not obfuscated by myriad deaths...

00:00:07.000 --> 00:00:08.700
...you might see more clearly!

00:00:08.800 --> 00:00:11.300
Fret not, my crestfallen children...

00:00:11.300 --> 00:00:14.000
...for I shall reveal this to you!"

00:00:15.000 --> 00:00:18.000
Dragon 1: "Bein' a dragon is... amazing!"

00:00:18.000 --> 00:00:20.000
Dragon 2: "Amazing! Amaaaazzziiinnggg!"

00:00:20.000 --> 00:00:23.700
Dragon 1: "Rulin' over our endless gray landscape 
of gray water and gray trees...

00:00:23.700 --> 00:00:25.300
Dragon 1: "We even got this skull!"

00:00:25.300 --> 00:00:27.100
Dragon 1: "The hell if I know where it came from."

00:00:27.100 --> 00:00:29.500
Dragon 1: "This really is... the high life."

00:00:29.500 --> 00:00:31.400
Dragon 3: "Except for Seat

In [24]:
# for articles:
article_text = extract_article("https://medium.com/@deshwaljaivardhan/the-algorithm-said-no-and-youll-never-know-why-0dd2c7862259")
print(summarize_text(article_text))

AI bias refers to the unfair or prejudiced decisions made by artificial intelligence systems due to skewed data or flawed algorithms. This bias can significantly impact various aspects of life:

1. **Resume Screening & AI Hiring Tools**: These tools can unintentionally downgrade candidates based on biased historical hiring practices, such as women’s colleges or foreign names, leading to unfair rejections without feedback.

2. **CIBIL Scores & Creditworthiness**: Credit scoring algorithms may use biased data, like zip codes, resulting in disadvantaged access to credit for minorities or low-income groups. Consequently, individuals could be denied loans based on assumptions tied to their location or demographic.

3. **Healthcare Algorithms**: AI in healthcare has been shown to predict lower care needs for Black patients compared to equally sick white patients due to historical data biases, leading to unequal healthcare access.

4. **Facial Recognition in Policing**: Facial recognition tec