In [2]:
from urllib.parse import urlparse, parse_qs
from youtube_transcript_api import YouTubeTranscriptApi
import pandas as pd

def get_video_id(url):
    """
    Extracts the YouTube video ID from a given URL.
    """
    parsed_url = urlparse(url)
    if parsed_url.hostname in ['youtu.be']:
        # Shortened URL format, e.g. https://youtu.be/VIDEO_ID
        return parsed_url.path[1:]
    elif parsed_url.hostname in ['www.youtube.com', 'youtube.com']:
        # Standard URL format, e.g. https://www.youtube.com/watch?v=VIDEO_ID
        qs = parse_qs(parsed_url.query)
        return qs.get("v", [None])[0]
    return None

def get_transcript(video_id):
    """
    Attempts to fetch the transcript for a given video ID.
    Returns the full transcript text or an empty string if not available.
    """
    try:
        transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = " ".join([entry["text"] for entry in transcript_data])
        return transcript_text
    except Exception as e:
        print("Error fetching transcript:", e)
        return ""

def extract_products(transcript_text):
    """
    Dummy function to 'extract' product names mentioned in the transcript.
    In a real application, you might use NLP methods or a predefined list.
    Here, we assume the video discusses three sample products.
    """
    # Example: you could scan for keywords or use an NLP library like spaCy.
    # For demonstration, we use a hard-coded list.
    return ["ProductA", "ProductB", "ProductC"]

def perform_fact_check(transcript_text):
    """
    Dummy fact-check function that does a simple analysis.
    A real implementation might use external fact-checking services or
    more advanced NLP to verify claims.
    
    Here, we simply count occurrences of key words.
    """
    keywords = ["fact", "truth", "verified"]
    score = sum(transcript_text.lower().count(word) for word in keywords)
    # Return 'Verified' if the count exceeds a threshold (dummy logic)
    return "Verified" if score > 2 else "Not Verified"

def main():
    # URL of the YouTube video
    video_url = "https://youtu.be/uLRuz82XNTM?si=XtxMbQyX5E3WrVmr"
    video_id = get_video_id(video_url)
    
    # Get the transcript from YouTube
    transcript = get_transcript(video_id)
    if transcript:
        print("Transcript successfully fetched from YouTube.")
    else:
        print("Transcript not available. Using dummy transcript for demo.")
        transcript = ("Dummy transcript mentioning ProductA and ProductB. "
                      "The facts in this video are verified and based on truth. "
                      "Sustainability is high for ProductA.")
    
    # Extract product names from the transcript (dummy extraction here)
    products = extract_products(transcript)
    
    # For demonstration, assign dummy cost and sustainability ratings.
    # In a real application, these could be obtained from databases or user input.
    product_details = {
        "ProductA": {"Cost": "$100", "Sustainability": "High"},
        "ProductB": {"Cost": "$150", "Sustainability": "Medium"},
        "ProductC": {"Cost": "$200", "Sustainability": "Low"}
    }
    
    # Perform fact check on the transcript (dummy analysis)
    fact_check_result = perform_fact_check(transcript)
    
    # Build the comparison table
    data = []
    for prod in products:
        details = product_details.get(prod, {"Cost": "N/A", "Sustainability": "N/A"})
        data.append({
            "Product": prod,
            "Cost": details["Cost"],
            "Sustainability": details["Sustainability"],
            "Fact Check": fact_check_result
        })
    
    df = pd.DataFrame(data)
    
    # Display the result table
    print("\nProduct Comparison Table:")
    print(df)

if __name__ == "__main__":
    main()


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [1]:
! pip install youtube-transcript-api pandas


Collecting youtube-transcript-api
  Downloading youtube_transcript_api-0.6.3-py3-none-any.whl (622 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m622.3/622.3 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting defusedxml<0.8.0,>=0.7.1
  Using cached defusedxml-0.7.1-py2.py3-none-any.whl (25 kB)
Installing collected packages: defusedxml, youtube-transcript-api
Successfully installed defusedxml-0.7.1 youtube-transcript-api-0.6.3
