In [26]:
from google_auth_oauthlib.flow import InstalledAppFlow
import json
from googleapiclient.discovery import build
from google.oauth2.credentials import Credentials
import google.auth
import openai
import os
import chromadb
from dotenv import load_dotenv
from langchain.text_splitter import CharacterTextSplitter
import json
import gradio as gr
import time


In [27]:
from langchain.text_splitter import CharacterTextSplitter

def chunk_text_using_langchain(text, chunk_size=500):
    """Splits text into smaller chunks using LangChain's CharacterTextSplitter."""
    splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=50)
    chunks = splitter.split_text(text)  # Split the text into chunks
    
    return chunks


In [28]:
def get_liked_videos(youtube):
    liked_videos = []
    request = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        myRating="like",
        maxResults=199  # YouTube API limits to 50 per request
    )
    
    while request and len(liked_videos) < 200:
        response = request.execute()
        for item in response.get("items", []):
            liked_videos.append({
                "title": item["snippet"]["title"],
                "duration": item["contentDetails"]["duration"],
                "viewcount": item["statistics"].get("viewCount", "0"),
                "categoryid": item["snippet"]["categoryId"],
                "id": item["id"],
                "description": item["snippet"].get("description", ""),
                "channelid": item["snippet"]["channelId"],
                "tags": item["snippet"].get("tags", [])
            })
        request = youtube.videos().list_next(request, response)
    
    return liked_videos

In [29]:
def get_subscriptions(youtube):
    subscriptions = []
    request = youtube.subscriptions().list(
        part="snippet",
        mine=True,
        maxResults=50
    )

    while request:
        response = request.execute()
        for item in response.get("items", []):
            subscriptions.append({
                "id": item["snippet"]["resourceId"]["channelId"],
                "title": item["snippet"]["title"],
                "description": item["snippet"].get("description", "")
            })
        request = youtube.subscriptions().list_next(request, response)

    return subscriptions

In [30]:
def get_uploads(youtube):
    request = youtube.channels().list(
        part="statistics",
        mine=True
    )
    response = request.execute()
    total_uploads = response["items"][0]["statistics"].get("videoCount", "0")
    return {"length of total uploads": total_uploads}

In [31]:
def store_in_chromadb(data):
    # Load environment variables and OpenAI API key
    load_dotenv()
    openai.api_key = os.getenv("OPENAI_API_KEY")

    # Initialize ChromaDB
    chroma_client = chromadb.PersistentClient(path="chroma_db")
    collection = chroma_client.get_or_create_collection(name="youtube_data")
    
    print("✅ OpenAI API Key Loaded and ChromaDB Initialized")

    # Function to get OpenAI embedding using the new API
    def get_openai_embedding(text):
        """Generate an embedding using OpenAI's text-embedding-ada-002 model."""
        response = openai.embeddings.create(
            input=text,
            model="text-embedding-ada-002"
        )
        # Correct way to access the embedding data
        return response.data[0].embedding

    # Store Liked Videos
    for video in data["liked_videos"]:
        # Chunk the description (assuming chunk_text_using_langchain is defined elsewhere)
        description_chunks = chunk_text_using_langchain(video["description"])

        for i, chunk in enumerate(description_chunks):
            embedding = get_openai_embedding(chunk)

            # Convert tags list into a string (separated by commas)
            tags_str = ", ".join(video["tags"]) if isinstance(video["tags"], list) else str(video["tags"])

            collection.add(
                ids=[f"liked_{video['id']}_chunk_{i}"],
                embeddings=[embedding],
                metadatas=[{
                    "type": "liked_video",
                    "title": video["title"],
                    "channel": video["channelid"],
                    "description_chunk": chunk,
                    "tags": tags_str,  # Converted list to string
                    "category_id": video["categoryid"],
                    "view_count": video["viewcount"],
                    "duration": video["duration"]
                }]
            )

    # Store Subscriptions
    for sub in data["subscriptions"]:
        embedding = get_openai_embedding(sub["title"] + " " + sub["description"])

        collection.add(
            ids=[f"sub_{sub['id']}"],
            embeddings=[embedding],
            metadatas=[{
                "type": "subscription",
                "title": sub["title"],
                "description": sub["description"]
            }]
        )

    # Store Total Uploads
    total_uploads_count = len(data["uploads"])  # Calculate the total number of uploads

    collection.add(
        ids=["total_uploads"],
        embeddings=[[0] * 1536],  # Placeholder vector
        metadatas=[{
            "type": "total_uploads",
            "count": total_uploads_count  # Correctly using the number of uploads
        }]
    )

    print("✅ Liked videos, subscriptions, and total uploads stored in ChromaDB")






In [33]:
import openai
import os
import chromadb
import json
from dotenv import load_dotenv

def analyze_with_gpt():
    # Load environment variables and OpenAI API key
    load_dotenv()
    openai_client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    # Connect to ChromaDB
    chroma_client = chromadb.PersistentClient(path="chroma_db")
    collection = chroma_client.get_collection(name="youtube_data")

    def fetch_data_from_chroma():
        """Retrieve liked videos, subscriptions, and uploads from ChromaDB."""
        liked_videos = collection.get(where={"type": "liked_video"})
        subscriptions = collection.get(where={"type": "subscription"})
        total_uploads = collection.get(where={"type": "total_uploads"})

        return liked_videos, subscriptions, total_uploads

    def analyze_liked_videos(liked_videos):
        """Analyze liked video titles and descriptions to find trends."""
        titles = [meta["title"] for meta in liked_videos["metadatas"]]
        descriptions = [meta["description_chunk"] for meta in liked_videos["metadatas"]]

        return {
            "titles": titles[:20],  # Limit to avoid prompt overflow
            "descriptions": descriptions[:20]
        }

    def analyze_categories(liked_videos):
        """Check how diverse the video categories are."""
        categories = [meta["category_id"] for meta in liked_videos["metadatas"]]
        unique_categories = set(categories)

        return {
            "total_categories": len(unique_categories),
            "categories": list(unique_categories)
        }

    def analyze_subscriptions(subscriptions):
        """Analyze the variety of subscriptions."""
        sub_titles = [meta["title"] for meta in subscriptions["metadatas"]]

        return {
            "total_subscriptions": len(sub_titles),
            "subscriptions": sub_titles[:20]
        }

    def analyze_uploads(total_uploads):
        """Check the total number of videos uploaded."""
        upload_count = total_uploads["metadatas"][0]["count"] if total_uploads["metadatas"] else 0

        return {
            "upload_count": upload_count
        }

    def generate_humorous_analysis(liked_analysis, category_analysis, sub_analysis, upload_analysis):
        """Use GPT-4 to generate eight humorous insights and a final score."""

        system_prompt = """
        You are a humorous AI analyzing a person's YouTube activity.
        Your job is to generate five funny insights about their YouTube habits.
        Be witty, engaging, and creative in your analysis.

        After the analysis, assign a **final score out of 100** based on these criteria:

        1️⃣ **Uploads (20 points)**:
           - 0 points: No uploads
           - 5 points: 1-10 uploads
           - 10 points: 10-50 uploads
           - 20 points: More than 50 uploads

        2️⃣ **Base Score (20 points)**:
           - Every user gets a starting 20 points.

        3️⃣ **Subscriptions Quality & Quantity (20 points)**:
           - Higher score if they have a diverse and interesting set of subscriptions.

        4️⃣ **Liked Videos Quality & Quantity (20 points)**:
           - Higher score if they like a variety of high-quality videos.

        5️⃣ **Match Between Liked Videos and Subscriptions (20 points)**:
           - Higher score if the content they like matches what they subscribe to.

        **Final Output Format:**
        - eight humorous insights of about 20 words each
        - Final score out of 100 with a funny remark about the score (just show the score and remark not do not explain the score)
        - explanation for the score using the methodology given
        """

        user_prompt = f"""
        Here is the data about the user's YouTube activity:

        1️⃣ **Liked Videos Analysis**:
        - Titles: {json.dumps(liked_analysis['titles'], indent=2)}
        - Descriptions: {json.dumps(liked_analysis['descriptions'], indent=2)}

        2️⃣ **Category Analysis**:
        - Unique Categories: {category_analysis['total_categories']}
        - Category IDs: {category_analysis['categories']}

        3️⃣ **Subscription Analysis**:
        - Total Subscriptions: {sub_analysis['total_subscriptions']}
        - Subscription Titles: {json.dumps(sub_analysis['subscriptions'], indent=2)}

        4️⃣ **Uploads**:
        - Total Videos Uploaded: {upload_analysis['upload_count']}

        Generate eight humorous observations based on this data.
        Then, assign a final **score out of 100** based on the scoring system.
        """

        response = openai_client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ]
        )

        return response.choices[0].message.content

    # **Main Execution**
    # Fetch stored data
    liked_videos, subscriptions, total_uploads = fetch_data_from_chroma()

    # Analyze different aspects
    liked_analysis = analyze_liked_videos(liked_videos)
    category_analysis = analyze_categories(liked_videos)
    sub_analysis = analyze_subscriptions(subscriptions)
    upload_analysis = analyze_uploads(total_uploads)

    # Generate humorous insights and score
    humor_output = generate_humorous_analysis(liked_analysis, category_analysis, sub_analysis, upload_analysis)

    # Create a structured result with comments, score, and explanation
    # analysis_results = {
    #     "comments": humor_output.split("\n\n")[0],  # Assuming the first part is comments/insights
    #     "final_score": humor_output.split("\n\n")[1].split("\n")[0],  # The first line of the second part is the score
    #     "explanation": "\n".join(humor_output.split("\n\n")[1:])  # The rest is the explanation
    # }

    return humor_output


In [34]:

import webbrowser
# Global variable to store authenticated YouTube service
youtube_service = None

SCOPES = ["https://www.googleapis.com/auth/youtube.readonly"]

# Authenticate and create YouTube service

def authenticate():
    """Redirects user to Google Chrome for authentication and returns a success message."""
    global youtube_service
    flow = InstalledAppFlow.from_client_secrets_file(
        "credentials.json", SCOPES,
        redirect_uri="http://localhost:8080/"  # Explicitly set redirect URI
    )
    
    # Force Chrome to open for authentication
    chrome_path = "C:/Program Files/Google/Chrome/Application/chrome.exe" if os.name == "nt" else "/usr/bin/google-chrome"
    os.environ["BROWSER"] = chrome_path  # Set Chrome as the default browser

    # Run authentication
    creds = flow.run_local_server(port=8080, open_browser=True)

    if not creds or not creds.valid:
        return "❌ Authentication failed! Please try again."
    
    youtube_service = build("youtube", "v3", credentials=creds)  # Store in global variable
    return "✅ Authentication successful! Click 'Analyze' to continue."


# def authenticate():
#     flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
#     creds = flow.run_local_server(port=8080)
#     print(" theses are  " )
#     print(creds)
#     youtube_service=build("youtube", "v3", credentials=creds);
    


    
import time

def analyze_youtube():
    """Fetches data from YouTube and analyzes it."""
    global youtube_service
    if youtube_service is None:
        return "❌ Please login first!"

    status = gr.Markdown("✅ Fetching stored data from ChromaDB...")
    start_time = time.time()

    liked_videos = get_liked_videos(youtube_service)
    subscriptions = get_subscriptions(youtube_service)
    uploads = get_uploads(youtube_service)
    
    data = {
        "liked_videos": liked_videos,
        "subscriptions": subscriptions,
        "uploads": uploads
    }
    
    store_in_chromadb(data)

    end_time = time.time()
    print(f"⏳ Time taken to fetch from ChromaDB: {end_time - start_time:.2f} seconds")

    status.value = "✅ Data retrieved! Analyzing now..."
    time.sleep(2)

    # Process with GPT
    start_time = time.time()
    analysis_results = analyze_with_gpt()
    end_time = time.time()
    print(f"⏳ Time taken for GPT analysis: {end_time - start_time:.2f} seconds")

    status.value = "✅ Analysis complete! Displaying results..."
    return analysis_results



      

def display_results():
    """Triggers analysis and displays results one by one."""
    analysis_results = analyze_youtube()
    
    if isinstance(analysis_results, str):  # If error message
        return analysis_results

    comments = analysis_results
   # final_score = analysis_results["final_score"]
    final_score="0"
    
    output_text = ""
    for comment in comments:
        output_text += f"**{comment}**\n\n"
        time.sleep(5)  # Wait 5 seconds before adding next comment
    
    output_text += f"🏆 **Your YouTube Score: {final_score} / 100 🎉**"
    return output_text

with gr.Blocks() as app:
    gr.Markdown("# 🎬 YouTube Account Analyzer")
    gr.Markdown("Login with your YouTube account to analyze your activity!")
    
    login_button = gr.Button("Login with YouTube")
    analyze_button = gr.Button("Analyze My Account", visible=False)
    
    output = gr.Markdown("Waiting for login...")
    
    # Login button triggers authentication
    login_button.click(authenticate, outputs=output)

    # Once authenticated, enable analysis button
    login_button.click(lambda: gr.update(visible=True), outputs=analyze_button)

    # Analyze button fetches data and processes it
    analyze_button.click(display_results, outputs=output)

app.launch()


* Running on local URL:  http://127.0.0.1:7868

To create a public link, set `share=True` in `launch()`.




Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=183975247830-bqgpkults0o7p6vhcj0us6v0durgqc8d.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fyoutube.readonly&state=dEE3WxnDs5RJ8oaQYsslIcf5q4DALT&access_type=offline
⏳ Time taken to fetch from ChromaDB: 0.00 seconds
⏳ Time taken for GPT analysis: 24.61 seconds
Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=183975247830-bqgpkults0o7p6vhcj0us6v0durgqc8d.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fyoutube.readonly&state=rXUzslN6kEZaa4ytKTDc8wBxfFJAoX&access_type=offline
⏳ Time taken to fetch from ChromaDB: 0.00 seconds
⏳ Time taken for GPT analysis: 22.25 seconds
