<a href="https://colab.research.google.com/github/ne0fencing/YouTube-comment-opportunity-finder-ai-agent/blob/main/YtCmntFaiAgent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Installing the necessary libraries for AI Agents
!pip install transformers torch sentencepiece



In [None]:
# Installing the IBM Data Prep Kit transforms
!pip install 'data-prep-toolkit-transforms[all]'



In [None]:
from transformers import pipeline
import re
import datetime

# This creates the 'llm' object which we will use to make decisions.
# 'text2text-generation' is the task, and flan-t5 is the brain.
llm = pipeline(
    "text2text-generation",
    model="google/flan-t5-large",
    max_length=128
)

Device set to use cuda:0


In [None]:
def reply_generator_tool(comment):
    """Generates a high-engagement reply."""
    prompt = f"Write a short, engaging YouTube reply to this: {comment}"
    response = llm(prompt)[0]["generated_text"].strip()
    return f"üìù DRAFT REPLY: {response}"

def internal_notes_tool(comment):
    """Categorizes the comment for your internal records."""
    return "üìå NOTE: This is a general comment. Saved to 'Community Sentiment' list."

def ignore_tool():
    """Filters out noise."""
    return "üóëÔ∏è ACTION: Ignore. This is likely spam or low-value."

In [None]:
def decide_and_act(youtube_comment):
    # This prompt 'forces' the AI to categorize the comment
    router_prompt = f"""
    Analyze the following YouTube comment: "{youtube_comment}"

    Rules:
    1. If it's a question or a request for a video, respond: TOOL_REPLY
    2. If it's a long compliment or a detailed feedback, respond: TOOL_NOTE
    3. If it's a one-word comment (like 'Nice', 'Cool') or spam, respond: TOOL_IGNORE

    Decision:
    """

    # The AI picks the tool name
    decision = llm(router_prompt)[0]["generated_text"].strip().upper()

    # Logic to execute the tool based on the AI's decision
    if "TOOL_REPLY" in decision:
        return reply_generator_tool(youtube_comment)
    elif "TOOL_NOTE" in decision:
        return internal_notes_tool(youtube_comment)
    else:
        return ignore_tool()

In [None]:
import pandas as pd

# Creating a messy dataset
data = {
    "comment_id": [1, 2, 3, 4, 5],
    "contents": [
        "Can you explain the IBM kit?",
        "Great video! üî•",
        "Great video! üî•", # Duplicate
        "Check this link out: http://spam.com",
        "How do I use Python for AI?"
    ]
}

df = pd.DataFrame(data)
# DPK often works with Parquet files for efficiency at scale
df.to_parquet("youtube_comments.parquet")
print("Messy dataset saved as Parquet.")

Messy dataset saved as Parquet.


In [None]:
# Note: In a full DPK workflow, we use their specific transform classes
# For your project, we will use the core logic discussed in the Masterclass:
def clean_youtube_data(input_df):
    # 1. Deduplication (Standard DPK practice)
    cleaned_df = input_df.drop_duplicates(subset=['contents'])

    # 2. Filtering Noise (Simulating the 'Filter' transform)
    # We remove comments that are too short to be 'opportunities'
    cleaned_df = cleaned_df[cleaned_df['contents'].str.len() > 10]

    return cleaned_df

processed_data = clean_youtube_data(df)
print(f"Original: {len(df)} comments | Cleaned: {len(processed_data)} comments")
print(processed_data)

Original: 5 comments | Cleaned: 4 comments
   comment_id                              contents
0           1          Can you explain the IBM kit?
1           2                        Great video! üî•
3           4  Check this link out: http://spam.com
4           5           How do I use Python for AI?


In [None]:
from transformers import pipeline
import re
import datetime

# This creates the 'llm' object which we will use to make decisions.
# 'text2text-generation' is the task, and flan-t5 is the brain.
llm = pipeline(
    "text2text-generation",
    model="google/flan-t5-large",
    max_length=128
)

def reply_generator_tool(comment):
    """Generates a high-engagement reply."""
    prompt = f"Write a short, engaging YouTube reply to this: {comment}"
    response = llm(prompt)[0]["generated_text"].strip()
    return f"üìù DRAFT REPLY: {response}"

def internal_notes_tool(comment):
    """Categorizes the comment for your internal records."""
    return "üìå NOTE: This is a general comment. Saved to 'Community Sentiment' list."

def ignore_tool():
    """Filters out noise."""
    return "üóëÔ∏è ACTION: Ignore. This is likely spam or low-value."

def decide_and_act(youtube_comment):
    # This prompt 'forces' the AI to categorize the comment
    router_prompt = f"""
    Analyze the following YouTube comment: "{youtube_comment}"

    Rules:
    1. If it's a question or a request for a video, respond: TOOL_REPLY
    2. If it's a long compliment or a detailed feedback, respond: TOOL_NOTE
    3. If it's a one-word comment (like 'Nice', 'Cool') or spam, respond: TOOL_IGNORE

    Decision:
    """

    # The AI picks the tool name
    decision = llm(router_prompt)[0]["generated_text"].strip().upper()

    # Logic to execute the tool based on the AI's decision
    if "TOOL_REPLY" in decision:
        return reply_generator_tool(youtube_comment)
    elif "TOOL_NOTE" in decision:
        return internal_notes_tool(youtube_comment)
    else:
        return ignore_tool()

print("--- YouTube Opportunity Finder Agent ---")
while True:
    user_input = input("Paste a YouTube Comment (or 'exit'): ")
    if user_input.lower() == "exit":
        print("Agent: Shutting down...")
        break

    # The agent processes the input
    result = decide_and_act(user_input)
    print(f"Agent {result}\n")

Device set to use cuda:0


--- YouTube Opportunity Finder Agent ---
Paste a YouTube Comment (or 'exit'): cool
Agent üìù DRAFT REPLY: I'm so glad you're here.

Paste a YouTube Comment (or 'exit'): exit
Agent: Shutting down...


In [None]:
import pandas as pd
import os

# Let's take our cleaned parquet data and 'send' it to a sheet
# (In a real setup, you would use the Google Sheets API)
def export_to_automation_hub(df_to_export):
    # Filter only for 'Opportunities' that need a reply
    opportunity_df = df_to_export[df_to_export['text'].str.contains('?', regex=False)]

    # Ensure the 'data' directory exists
    output_dir = "data"
    os.makedirs(output_dir, exist_ok=True)

    # Save as a CSV that we will upload to Google Sheets
    opportunity_df.to_csv(os.path.join(output_dir, "to_relay_app.csv"), index=False)
    print("üöÄ Data ready for Relay.app automation!")

# Ensure the 'data' directory exists
output_dir = "data"
os.makedirs(output_dir, exist_ok=True)

# Use the existing 'cleaned_df' from the kernel state and save it to parquet
cleaned_df.to_parquet(os.path.join(output_dir, "comments_clean.parquet"))

# Now, pass the existing cleaned_df directly to the export function.
export_to_automation_hub(cleaned_df)

üöÄ Data ready for Relay.app automation!
