In [None]:
import setup

setup.init()

In [None]:
import os
import io
import contextlib
from dotenv import load_dotenv

load_dotenv()

In [None]:
GOOGLE_GEMINI_API_KEY = os.environ.get("GOOGLE_GEMINI_API_KEY")

assert GOOGLE_GEMINI_API_KEY is not None

In [None]:
BRIGHT_DATA_API_KEY = os.environ.get("BRIGHT_DATA_API_KEY")

assert BRIGHT_DATA_API_KEY is not None

In [None]:
from reddit.models import RedditCommunity

In [None]:
from langchain_brightdata import BrightDataSERP

serp_tool = BrightDataSERP(
    bright_data_api_key=BRIGHT_DATA_API_KEY,
    parse_results=True,
)

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    google_api_key=GOOGLE_GEMINI_API_KEY,
)

In [None]:
agent_prompt = """
You are an expert assistant that can search the internet to find the best Reddit communities for any given topic.

You are also an expert at finding niche communities that discuss the same topic.
"""

In [None]:
from pydantic import BaseModel, Field


class RedditCommunitySchema(BaseModel):
    name: str = Field(description="Formatted name for Reddit")
    url: str = Field(description="The complete URL of the Reddit community")
    subreddit_slug: str = Field(
        description="The slug of the subreddit such as r/python, r/web, or r/trending"
    )
    member_count: int | None = Field(description="Current member count, if available")


class RedditCommunitiesSchema(BaseModel):
    communities: list[RedditCommunitySchema] = Field(
        description="The list of Reddit communities"
    )

In [None]:
from langchain.agents import create_agent

reddit_agent = create_agent(
    model=model,
    tools=[serp_tool],
    system_prompt=agent_prompt,
    response_format=RedditCommunitiesSchema,
)

buffer = io.StringIO()

# Prevent output of `result_data` from being displayed because it prints out the Bright Data API key
with contextlib.redirect_stdout(buffer):
    results = reddit_agent.invoke(
        {"messages": [{"role": "user", "content": "python, javascript, django, web"}]},
        stream_mode="values",
    )

results["messages"][-1].content

In [None]:
for subreddit in results["structured_response"].communities:
    print(
        subreddit.name, subreddit.url, subreddit.subreddit_slug, subreddit.member_count
    )

In [None]:
community_data = [x.model_dump() for x in results["structured_response"].communities]

for community in community_data:
    url = community.pop("url")

    print(community)

    RedditCommunity.objects.update_or_create(url=url, defaults=community)