In [None]:

# Install the required packages
%pip install youtube-transcript-api
%pip install youtube-search-python
%pip install yt_dlp

from langchain_community.llms import Ollama
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from youtubesearchpython import VideosSearch
from urllib.parse import urlparse, parse_qs
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import re
import yt_dlp


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [7]:

# Initialize LangChain Ollama client (DeepSeek model)
deepseek_model = Ollama(model="deepseek-r1:1.5b")

def llm(prompt: str) -> str:
    response = deepseek_model.invoke(prompt)
    return response.strip()


  deepseek_model = Ollama(model="deepseek-r1:1.5b")


In [57]:

def clean_generated_query(llm_output: str) -> str:
    """
    Cleans noisy LLM output into a valid short search query.
    """
    # Strip tags like <think> and </think> if they appear
    llm_output = re.sub(r"<.*?>", "", llm_output).strip()

    # Remove boilerplate phrases common in LLM responses
    bad_phrases = [
        "search for information on",
        "find information about",
        "look up",
        "web search query for",
        "search query:",
        "your search query is"
    ]
    for phrase in bad_phrases:
        llm_output = llm_output.lower().replace(phrase, "")

    # Trim weird punctuation left behind
    llm_output = llm_output.strip(":. ")

    # Split into lines, keep only short query-like line
    lines = [line.strip() for line in llm_output.split("\n") if line.strip()]
    for line in lines:
        if 2 <= len(line.split()) <= 10:  # reasonable search query length
            return line

    # Fallback if everything fails (keep first non-empty line)
    return lines[0] if lines else llm_output

def get_youtube_video_id(url):
    parsed_url = urlparse(url)
    if parsed_url.hostname in ['www.youtube.com', 'youtube.com']:
        return parse_qs(parsed_url.query).get('v', [None])[0]
    elif parsed_url.hostname == 'youtu.be':
        return parsed_url.path.lstrip('/')
    return None

def search_youtube_videos(query, max_results=3):
    """
    Search YouTube for videos matching the query using yt-dlp.
    Handles cases where the search returns a playlist (list of videos) or a single video.
    """

    ydl_opts = {
        'quiet': True,
        'extract_flat': True,
        'force_generic_extractor': True,
        'noplaylist': True,
        'default_search': 'ytsearch',
        'limit': max_results,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        result = ydl.extract_info(f"ytsearch:{query}", download=False)

        print(f"DEBUG: yt-dlp raw result -> {result}")

        # Handle case: playlist-like search result (expected result from ytsearch)
        if isinstance(result, dict) and 'entries' in result and result['entries']:
            videos = result['entries'][:max_results]
            return [f"https://www.youtube.com/watch?v={video['id']}" for video in videos]
        
        # Handle case: single video result (should be rare from ytsearch)
        elif isinstance(result, dict) and 'id' in result:
            return [f"https://www.youtube.com/watch?v={result['id']}"]

        # Handle empty result (no matches found)
        elif isinstance(result, dict) and result.get('entries') == []:
            print(f"⚠️ No YouTube videos found for query: {query}")
            return []

        # Fallback if nothing makes sense (debug mode)
        else:
            raise ValueError(f"Unexpected yt-dlp search result structure: {result}")

def fetch_youtube_transcript(video_url):
    video_id = get_youtube_video_id(video_url)
    transcript = YouTubeTranscriptApi.get_transcript(video_id)
    return " ".join([entry['text'] for entry in transcript])

def cleaned_summary_output(text: str) -> str:
    """
    Removes everything inside <think>...</think> tags, including the tags themselves.
    Works even if there are multiple such sections.
    """
    return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()

In [56]:

user_prompt = input("Enter your research topic: ")

initial_query = llm(
    f"You are a research assistant. Create only one concise web search query "
    f"that is short and formatted as a search query to find specific information about:\n'{user_prompt}'\n"
    "IMPORTANT: Output only the query itself — no explanations, no reasoning, just the query NO extra fluff around the query."
)

initial_query = clean_generated_query(initial_query)
print(f"Generated search query: {initial_query}")


Generated search query: cats in the hat: behavior patterns and grooming habits


In [18]:

search_tool = DuckDuckGoSearchAPIWrapper()
search_results = search_tool.results(initial_query, max_results=7)

print(f"Got {len(search_results)} results from DuckDuckGo.")

for i, r in enumerate(search_results, start=1):
    print(f"\nResult {i}: {r['title']}")
    print("URL:", r.get('link', 'No link'))
    print("Snippet:", r.get('snippet', '')[:200], "...")


Got 7 results from DuckDuckGo.

Result 1: Signs Your Cat May Be Feeling Lonely: Understanding Feline Social Needs ...
URL: https://petshun.com/article/how-do-i-know-if-my-cat-is-lonely
Snippet: Does your cat enjoy being petted, brushed, or played with? Affectionate behavior, such as rubbing against your legs, purring, or seeking physical contact, is a positive sign. If your cat seems distant ...

Result 2: 10 Signs Your Cat's Social Skills Need Improvement (and How to Help)
URL: https://pawdown.com/10-signs-your-cats-social-skills-need-improvement-and-how-to-help/
Snippet: Cats are often seen as independent creatures, but their social skills play a crucial role in how they interact with humans, other pets, and their environment. Whether it's hiding from guests, avoiding ...

Result 3: 15 Signs Your Cat Might Be Struggling with Social Skills (and How to ...
URL: https://catschef.com/15-signs-your-cat-might-be-struggling-with-social-skills-and-how-to-fix-it/
Snippet: When a cat isolates 

In [None]:

video_urls = search_youtube_videos(initial_query)

transcripts = []
for url in video_urls:
    try:
        transcript = fetch_youtube_transcript(url)
        transcripts.append((url, transcript))
    except TranscriptsDisabled:
        print(f"No transcript available for: {url}")
    except Exception as e:
        print(f"Failed to fetch transcript for {url}: {e}")


DEBUG: yt-dlp raw result -> {'id': 'cat in a hat', 'title': 'cat in a hat', '_type': 'playlist', 'entries': [{'_type': 'url', 'ie_key': 'Youtube', 'id': 'X5Irl_oWD6M', 'url': 'https://www.youtube.com/watch?v=X5Irl_oWD6M', 'title': 'The Cat in the Hat | Brand New Full Episode | Official Animated Read-Along | Dr. Seuss', 'description': None, 'duration': 602.0, 'channel_id': 'UC75uv7iKwTQdgD5mkhcvERA', 'channel': 'Dr. Seuss', 'channel_url': 'https://www.youtube.com/channel/UC75uv7iKwTQdgD5mkhcvERA', 'uploader': 'Dr. Seuss', 'uploader_id': '@DrSeuss', 'uploader_url': 'https://www.youtube.com/@DrSeuss', 'thumbnails': [{'url': 'https://i.ytimg.com/vi/X5Irl_oWD6M/hq720.jpg?sqp=-oaymwEcCOgCEMoBSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLDjHpDGvMV-cpoG0XNWQVNwK0JX2Q', 'height': 202, 'width': 360}, {'url': 'https://i.ytimg.com/vi/X5Irl_oWD6M/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLApWNwg7qpb7nG4AQF_RVOu9kYmNg', 'height': 404, 'width': 720}], 'timestamp': None, 'rele

In [37]:

combined_text = ""
for r in search_results:
    combined_text += f"Web Source: {r['link']}\n{r['snippet']}\n\n"
for url, transcript in transcripts:
    combined_text += f"YouTube Source: {url}\n{transcript[:500]}...\n\n"

current_summary = llm(f"Summarize the following content factually:\n\n{combined_text}").strip()
all_sources = [r['link'] for r in search_results] + [url for url, _ in transcripts]

print("Initial Combined Summary:\n", current_summary)


Initial Combined Summary:
 <think>
Okay, so I need to summarize this content about cats' social skills. Let's see... There are five sources provided here. Each of them seems to talk about different aspects related to how cats interact with humans and each other.

The first one is from petshun.com about whether a cat feels lonely when only being petted, brushed, or played with. That seems like it's talking about the positive behavior that suggests social interaction rather than loneliness.

The second source is from pawdown.com, which mentions that cats are independent but their social skills matter for their interactions. So this is more about how cats can be social in different ways and not necessarily always in a positive sense.

Third is catschef.com, which has several links. The fifth one talks about cats struggling with social skills, especially isolation, like when they hide from guests or avoid playtime. This might indicate loneliness if the cat feels alone without interaction.


In [49]:

max_loops = 2

for loop in range(max_loops):
    reflect_prompt = (
        f"Topic: {user_prompt}\n\n"
        f"Current Summary:\n{current_summary}\n\n"
        "Review the summary for gaps. If missing information exists, generate exactly one new search query."
        "Respond ONLY with the query text (or 'NONE' if no further search is needed)."
    )

    new_query = llm(reflect_prompt).strip()
    new_query = clean_generated_query(new_query)

    if new_query.upper() == "NONE":
        print("No further questions identified.")
        break

    # Web Search
    new_search_results = search_tool.results(new_query, max_results=5)

    # YouTube Search and Transcripts
    new_video_urls = search_youtube_videos(new_query)
    new_transcripts = []
    for url in new_video_urls:
        try:
            transcript = fetch_youtube_transcript(url)
            new_transcripts.append((url, transcript))
        except TranscriptsDisabled:
            print(f"No transcript available for: {url}")
        except Exception as e:
            print(f"Failed to fetch transcript for {url}: {e}")

    # Add new sources
    all_sources.extend([r['link'] for r in new_search_results] + [url for url, _ in new_transcripts])

    # Combine new content (web + YouTube)
    new_content = ""
    for r in new_search_results:
        new_content += f"Web Source: {r['link']}\n{r['snippet']}\n\n"
    for url, transcript in new_transcripts:
        new_content += f"YouTube Source: {url}\n{transcript[:500]}...\n\n"

    # Update summary
    update_prompt = (
        f"Current Summary:\n{current_summary}\n\n"
        f"New Information (web + YouTube):\n{new_content}\n\n"
        "Update the summary based only on this new information do NOT think about the nature of updating information just do it."
    )
    current_summary = llm(update_prompt).strip()

    print(f"Updated Summary After Loop {loop+1}:\n", current_summary)


DEBUG: yt-dlp raw result -> {'id': "alright, so i'm trying to create a summary about cats in a hat based on the provided information. the initial content mentioned using a specific template and including web links and youtube videos for relevant links. however, those weren't related to cats and were instead about pets and social interactions.", 'title': "alright, so i'm trying to create a summary about cats in a hat based on the provided information. the initial content mentioned using a specific template and including web links and youtube videos for relevant links. however, those weren't related to cats and were instead about pets and social interactions.", '_type': 'playlist', 'entries': [{'_type': 'url', 'ie_key': 'Youtube', 'id': 'QyyH13w9rEI', 'url': 'https://www.youtube.com/watch?v=QyyH13w9rEI', 'title': 'DXBPE - Extended Answer Questions - PE Active', 'description': None, 'duration': 2823.0, 'channel_id': 'UCYngECZsHB6j_2OBXmX4Zuw', 'channel': 'DXBPE', 'channel_url': 'https://w

In [None]:
import json
import requests

cleaned_summary = cleaned_summary_output(current_summary)

final_report = (
    f"Topic: {user_prompt}\n\n"
    f"Final Research Summary:\n{cleaned_summary}\n\n"
    "Sources:\n" + "\n".join(all_sources)
)

def send_email_report_via_api(subject, body, to_email):
    api_key = "my api key is save din env"  # Your actual SMTP2GO API key
    api_url = "https://api.smtp2go.com/v3/email/send"

    email_data = {
        "api_key": api_key,
        "to": [to_email],
        "sender": "kk3600@columbia.edu",  # Your verified sending email in SMTP2GO
        "subject": subject,
        "text_body": body
    }

    try:
        response = requests.post(api_url, json=email_data)
        response_data = response.json()
        
        if response_data.get("data", {}).get("succeeded", 0) > 0:
            print("Email sent successfully via SMTP2GO API!")
        else:
            print(f"Failed to send email via API. Response: {response_data}")
    except Exception as e:
        print(f"Error while sending email via SMTP2GO API: {e}")

recipient_email = input("Enter your email to receive the final report: ")
send_email_report_via_api(f"Research Report on: {user_prompt}", final_report, recipient_email)

# Also print the report to the console for visibility
print("\n=== Final Report Sent ===\n")
print(final_report)


Email sent successfully via SMTP2GO API!

=== Final Report Sent ===

Topic: cat in the hat behavior

Final Research Summary:
The updated summary highlights how cats interact with humans through touch (e.g., rubbing, purring) and play, focusing on positive behaviors that enhance emotional well-being. Encouraging social practice is important for comfort and emotional balance while cautioning that loneliness may occur unless proper connection is maintained.

1. Cats engage in positive behaviors like rubbing or purring to show interest in human actions.
2. Social interactions can enhance emotional well-being but may cause loneliness without proper connection.
3. Encouraging social practice helps maintain comfort and emotional balance in cats.

Sources:
https://petshun.com/article/how-do-i-know-if-my-cat-is-lonely
https://pawdown.com/10-signs-your-cats-social-skills-need-improvement-and-how-to-help/
https://catschef.com/15-signs-your-cat-might-be-struggling-with-social-skills-and-how-to-fix