In [1]:
import os
import re
from typing import List
from pydantic import BaseModel
from youtube_transcript_api import YouTubeTranscriptApi
from dotenv import load_dotenv

# ==================================
# 1. Load ENV and Configure OpenAI
# ==================================
load_dotenv("../.env")  # Adjust path as needed

from openai import OpenAI

client = OpenAI(api_key=os.getenv("NEXT_PUBLIC_OPENAI_API_KEY"))

# ===================================
# 2. Pydantic Models for Data Storage
# ===================================

class Video(BaseModel):
    """Represents a single YouTube video and its transcript."""
    url: str
    transcript: str

class Session(BaseModel):
    """
    Represents a session that includes one or more videos
    plus a summary (generated by OpenAI).
    """
    date: str
    videos: List[Video]
    summary: str  # Populated by GPT

class Patient(BaseModel):
    """
    Represents a patient/client, including their personal info
    and the sessions they have had.
    """
    name: str
    picture: str
    client_since: str
    sessions: List[Session]


# =====================================
# 3. YouTube Transcript Helper Function
# =====================================
def get_youtube_video_id(url: str) -> str:
    """
    Extract the video ID from a YouTube URL using a regex.
    For example, if the URL is:
        https://www.youtube.com/watch?v=abcd1234
    This function returns "abcd1234".
    """
    match = re.search(r"v=([a-zA-Z0-9_-]+)", url)
    if match:
        return match.group(1)
    return ""

def fetch_transcript(url: str) -> str:
    """
    Fetch and return the complete transcript text for a given YouTube video URL.
    Returns an error message if something goes wrong.
    """
    try:
        video_id = get_youtube_video_id(url)
        if not video_id:
            return "Invalid YouTube URL"
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        # Join all transcript segments into a single text
        return "\n".join(item["text"] for item in transcript_list)
    except Exception as e:
        return f"Error occurred: {e}"

def fetch_timed_transcript(url: str) -> str:
    """
    Fetch and return the complete transcript including timestamps for a given YouTube video URL.
    """
    try:
        video_id = get_youtube_video_id(url)
        if not video_id:
            return "Invalid YouTube URL"
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        # Format each segment with a timestamp (in seconds) followed by the text.
        lines = [f"[{item['start']:.2f}] {item['text']}" for item in transcript_list]
        return "\n".join(lines)
    except Exception as e:
        return f"Error occurred: {e}"


# =================================
# 4. Summarization Model & Function
# =================================
class SummaryResponse(BaseModel):
    """
    This Pydantic model dictates the
    structure of the parsed GPT response.
    """
    summary: str

def generate_summary(text: str) -> str:
    """
    Use OpenAI’s Beta Chat Completions with a Pydantic response format
    to generate a summary for the given transcript text.
    """
    # We prompt GPT to summarize the text
    completion = client.beta.chat.completions.parse(
        model="gpt-4o",   # Replace with a real model name that supports .parse()
        messages=[
            {"role": "system", "content": "You are a helpful assistant that summarizes text."},
            {"role": "user", "content": text},
        ],
        response_format=SummaryResponse,
        # You can also add temperature, max_tokens, etc., if supported:
        # temperature=0.7,
        # max_tokens=200,
    )
    # The parsed response is an instance of `SummaryResponse`.
    parsed: SummaryResponse = completion.choices[0].message.parsed
    return parsed.summary


# ===============================
# 5. Main Routine / Demo Usage
# ===============================
if __name__ == "__main__":

    # Example YouTube URLs
    videos_list = [
        "https://www.youtube.com/watch?v=5l6mwfsmnBU",
        "https://www.youtube.com/watch?v=XDlWSR7Ohck",
        "https://www.youtube.com/watch?v=7LD8iC4NqXM",
        "https://www.youtube.com/watch?v=z9fF9F5w1cI",
        "https://www.youtube.com/watch?v=8aDFvvjC6XM"
    ]

    # Fetch transcripts for each video, create Video models
    all_videos = []
    for url in videos_list:
        txt = fetch_transcript(url)
        all_videos.append(Video(url=url, transcript=txt))

    # For demonstration, let's say:
    #   - Alex has 2 sessions, each with 1 video
    #   - Jane has 3 sessions, each with 1 video
    alex_video_session_1 = all_videos[0]
    alex_video_session_2 = all_videos[1]
    jane_video_session_1 = all_videos[2]
    jane_video_session_2 = all_videos[3]
    jane_video_session_3 = all_videos[4]

    # Generate summary for each transcript:
    alex_summary_1 = generate_summary(alex_video_session_1.transcript)
    alex_summary_2 = generate_summary(alex_video_session_2.transcript)
    jane_summary_1 = generate_summary(jane_video_session_1.transcript)
    jane_summary_2 = generate_summary(jane_video_session_2.transcript)
    jane_summary_3 = generate_summary(jane_video_session_3.transcript)

    # Create Session objects for Alex
    alex_session_1 = Session(
        date="2024-01-01",
        videos=[alex_video_session_1],
        summary=alex_summary_1
    )
    alex_session_2 = Session(
        date="2024-01-02",
        videos=[alex_video_session_2],
        summary=alex_summary_2
    )

    # Create Session objects for Jane
    jane_session_1 = Session(
        date="2024-01-03",
        videos=[jane_video_session_1],
        summary=jane_summary_1
    )
    jane_session_2 = Session(
        date="2024-01-04",
        videos=[jane_video_session_2],
        summary=jane_summary_2
    )
    jane_session_3 = Session(
        date="2024-01-05",
        videos=[jane_video_session_3],
        summary=jane_summary_3
    )

    # Construct Patient objects
    alex = Patient(
        name="Alex",
        picture="https://example.com/alex.jpg",
        client_since="2022-01-01",
        sessions=[alex_session_1, alex_session_2]
    )
    jane = Patient(
        name="Jane",
        picture="https://example.com/jane.jpg",
        client_since="2022-02-01",
        sessions=[jane_session_1, jane_session_2, jane_session_3]
    )


print(alex)

name='Alex' picture='https://example.com/alex.jpg' client_since='2022-01-01' sessions=[Session(date='2024-01-01', videos=[Video(url='https://www.youtube.com/watch?v=5l6mwfsmnBU', transcript="[Music]\nwatch this full series at the links in\nthe description below and subscribe to\nour YouTube channel to watch new mental\nhealth videos every week I read the\ncomments on all of our videos and some\nviewers have left feedback that maybe I\nhave narcissistic personality disorder\nwell we're about to find out in this\nsimulated therapy session with dr.\nRomani it's all you now I'm gonna answer\nquestions so Kyle you know as we think\nabout the pattern of narcissism in you\nplace I'm gonna start is your\nrelationships okay can you tell me are\nyou dating anyone right now\nno you're not okay so we don't have that\nif you think about your last\nrelationship how was that emotionally\nwas that an emotionally fulfilling\nrelationship it was but I'm not a very\nemotional person what do you mean by\n

In [2]:
# get transcript

print(alex_session_1.videos[0].transcript)

[Music]
watch this full series at the links in
the description below and subscribe to
our YouTube channel to watch new mental
health videos every week I read the
comments on all of our videos and some
viewers have left feedback that maybe I
have narcissistic personality disorder
well we're about to find out in this
simulated therapy session with dr.
Romani it's all you now I'm gonna answer
questions so Kyle you know as we think
about the pattern of narcissism in you
place I'm gonna start is your
relationships okay can you tell me are
you dating anyone right now
no you're not okay so we don't have that
if you think about your last
relationship how was that emotionally
was that an emotionally fulfilling
relationship it was but I'm not a very
emotional person what do you mean by
that it doesn't take a lot for me to
feel like I'm emotionally filled up so
you almost get overwhelmed by emotion
yes like if somebody is just like I love
you oh my oh my gosh like have it your
own deal okay yeah 