In [None]:
from langgraph.graph import START, StateGraph
from typing_extensions import TypedDict
import time
from langchain_deepseek import ChatDeepSeek
from pydantic import BaseModel
from langchain_core.messages import HumanMessage
from pydantic import Field



class SOCountry(BaseModel):
    name: Field(str, description="The name of the country")
    capital: Field(str, description="The capital of the country")
    area: Field(int, description="The area of the country in square kilometers")
    population: Field(int, description="The population of the country")

class SOCapital(BaseModel):
    name: Field(str, description="The name of the capital")
    area: Field(int, description="The area of the capital in square kilometers")
    population: Field(int, description="The population of the capital")


class State(TypedDict):
    country: SOCountry
    capital: SOCapital

llm = ChatDeepSeek(model="deepseek-chat", temperature=0)

async def SOCountry(state: State):
    llm_with_structured_output = llm.with_structured_output(SOCountry)
    res = llm_with_structured_output.ainvoke([HumanMessage(content="USA")])
    return {"country": res}

async def SOCapital(state: State):
    llm_with_structured_output = llm.with_structured_output(SOCapital)
    res = llm_with_structured_output.ainvoke([HumanMessage(content=state["country"].capital)])
    return {"capital": res}


agent_builder = StateGraph(State)
agent_builder.add_node("SOCountry", SOCountry)
agent_builder.add_node("SOCapital", SOCapital)

agent_builder.add_edge(START, "SOCountry")
agent_builder.add_edge("SOCountry", "SOCapital")

graph = agent_builder.compile()

PydanticSchemaGenerationError: Unable to generate pydantic-core schema for FieldInfo(annotation=NoneType, required=True, description='The name of the country'). Set `arbitrary_types_allowed=True` in the model_config to ignore this error or implement `__get_pydantic_core_schema__` on your type to fully support it.

If you got this error by calling handler(<some type>) within `__get_pydantic_core_schema__` then you likely need to call `handler.generate_schema(<some type>)` since we do not call `__get_pydantic_core_schema__` on `<some type>` otherwise to avoid infinite recursion.

For further information visit https://errors.pydantic.dev/2.11/u/schema-for-unknown-type

In [95]:
async for chunk in graph.astream(
    {"foo": ""},
    stream_mode="values",
):
    print(chunk)

{'foo': '', 'messages': []}
{'foo': '', 'messages': ['node 1 called']}
{'foo': '', 'messages': ['node 1 called', 'node 2 called']}
{'foo': '', 'messages': ['node 1 called', 'node 2 called', 'node 3 called']}


In [76]:
list(graph.get_state_history(config))

[StateSnapshot(values={'foo': '', 'num': 1}, next=(), config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f09b6d4-eda5-6504-8001-2719a6fa6b1c'}}, metadata={'source': 'loop', 'step': 1, 'parents': {}}, created_at='2025-09-27T06:43:42.515730+00:00', parent_config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f09b6d4-ed9d-6afc-8000-44dacfa8b0b7'}}, tasks=(), interrupts=()),
 StateSnapshot(values={'foo': ''}, next=('node_1',), config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f09b6d4-ed9d-6afc-8000-44dacfa8b0b7'}}, metadata={'source': 'loop', 'step': 0, 'parents': {}}, created_at='2025-09-27T06:43:42.512601+00:00', parent_config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f09b6d4-ed9b-6a9a-bfff-6dd6a2359d0c'}}, tasks=(PregelTask(id='8397e81a-ef2a-919e-2404-842459e36686', name='node_1', path=('__pregel_pull', 'node_1'), error=None, interrupts=(), state={'configurable

In [None]:
graph.invoke({"foo": ""})

In [9]:
config = {"configurable": {"thread_id": "1"}}

In [10]:
graph.invoke({"foo": ""}, config)

{'foo': 'b', 'bar': ['a', 'b']}

In [11]:
list(graph.get_state_history(config))


[StateSnapshot(values={'foo': 'b', 'bar': ['a', 'b']}, next=(), config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f09b680-4e34-6dfc-8002-7d7a4b5410e2'}}, metadata={'source': 'loop', 'step': 2, 'parents': {}}, created_at='2025-09-27T06:05:50.939490+00:00', parent_config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f09b680-4e33-65f6-8001-11e1f03c9af0'}}, tasks=(), interrupts=()),
 StateSnapshot(values={'foo': 'a', 'bar': ['a']}, next=('node_b',), config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f09b680-4e33-65f6-8001-11e1f03c9af0'}}, metadata={'source': 'loop', 'step': 1, 'parents': {}}, created_at='2025-09-27T06:05:50.938873+00:00', parent_config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f09b680-4e31-629c-8000-51fc18437b5f'}}, tasks=(PregelTask(id='6d4b2a52-f325-2dd4-6e9d-1059994716e0', name='node_b', path=('__pregel_pull', 'node_b'), error=None, interrupts

In [3]:
from dotenv import load_dotenv
load_dotenv()

from langgraph.graph import StateGraph, START, END
from src.models.schemas import CompetitorState, CompetitorFinderOutputState
from src.core import tenant_agent, competitor_finder, change_detector
from langchain_core.messages import HumanMessage
import asyncio

async def build_opp_agent():
    agent_builder = StateGraph(CompetitorState)
    agent_builder.add_node("tenant_info_agent", tenant_agent)
    agent_builder.add_node("competitor_finder", competitor_finder)
    agent_builder.add_node("change_detector", change_detector)


    agent_builder.add_edge(START, "tenant_info_agent")
    agent_builder.add_edge("tenant_info_agent", "competitor_finder")
    agent_builder.add_edge("competitor_finder", "change_detector")
    agent_builder.add_edge("change_detector", END)
    opp_agent = agent_builder.compile()
    return opp_agent

results = []
opp_agent = await build_opp_agent()
# async for update in opp_agent.astream(
#     {"messages": [HumanMessage(content="https://retrotype.ink/?ref=producthunt")]},
#     # subgraphs=True,
#     stream_mode="updates",
# ):
#     results.append(update)

# Onging Tracking

In [10]:
from langchain_deepseek import ChatDeepSeek

llm = ChatDeepSeek(
    model="deepseek-reasoner",
    temperature=0,
    max_tokens=64000,
    timeout=None,
    max_retries=2,
    # api_key="...",
    # other params...
)

In [9]:
from langchain_core.messages import HumanMessage
llm.invoke([HumanMessage(content="Hello")])

BadRequestError: Error code: 400 - {'error': {'message': 'Invalid max_tokens value, the valid range of max_tokens is [1, 8192]', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_request_error'}}

In [4]:
from src.core.ongoing_compare_agent import ongoing_compare_agent
urls = ["https://news.ycombinator.com/", "https://www.ycombinator.com/", "https://www.asdasjdkashdkajshdkjahsdaskjdhakjsdhkajshdkja.com/apply/"]

In [5]:
results = []
async for update in ongoing_compare_agent(urls):
    results.append(update)

In [8]:
results[0]

SOChanges(url='https://news.ycombinator.com/', changes=[Change(change_type='Modified', content="Reordered top stories: 'William Gibson Reads Neuromancer' moved from position 8 to 7, while '60 years after Gemini' article dropped from position 7 to 8", timestamp='2025-09-16T09:49:35Z', threat_level=1, why_matter='Minor content prioritization change showing community interest in sci-fi/tech culture content over space history, but no strategic impact on Hacker News positioning', suggestions='Monitor for any pattern of content preference shifts, but no immediate action needed as this appears to be normal ranking fluctuation'), Change(change_type='Modified', content='Updated story rankings and engagement metrics across multiple posts with minor point increases (1-7 points) and comment count adjustments', timestamp='2025-09-16T09:49:35Z', threat_level=0, why_matter='Routine engagement metric updates reflecting normal user voting and commenting activity, no strategic changes to platform featur

In [7]:
from src.core.tracking import OngoingTracker
tracker = OngoingTracker(tag="test", batch_size=10)

In [8]:
tracker.maxAge

300000

In [9]:
urls = ["https://news.ycombinator.com/", "https://www.ycombinator.com/", "https://www.asdasjdkashdkajshdkjahsdaskjdhakjsdhkajshdkja.com/apply/"]

In [10]:
results = []
async for update in tracker.track_stream(urls):
    results.append(update)

In [None]:
results[0].

'|     |     |     |\n| --- | --- | --- |\n| |     |     |     |\n| --- | --- | --- |\n| [![](https://news.ycombinator.com/y18.svg)](https://news.ycombinator.com/) | **[Hacker News](https://news.ycombinator.com/news)** [new](https://news.ycombinator.com/newest) \\| [past](https://news.ycombinator.com/front) \\| [comments](https://news.ycombinator.com/newcomments) \\| [ask](https://news.ycombinator.com/ask) \\| [show](https://news.ycombinator.com/show) \\| [jobs](https://news.ycombinator.com/jobs) \\| [submit](https://news.ycombinator.com/submit) | [login](https://news.ycombinator.com/login?goto=news) | |\n\n| |     |     |     |\n| --- | --- | --- |\n| 1. |  | [Hosting a website on a disposable vape](https://bogdanthegeek.github.io/blog/projects/vapeserver/) ( [bogdanthegeek.github.io](https://news.ycombinator.com/from?site=bogdanthegeek.github.io)) |\n|  | 929 points by [BogdanTheGeek](https://news.ycombinator.com/user?id=BogdanTheGeek) [13 hours ago](https://news.ycombinator.com/item

In [8]:
from firecrawl.v2.types import Document
type(results[0]) == Document

True

In [67]:
jdiff = None
tdiff = None

if results[0].change_tracking['changeStatus'] == 'changed':
    tdiff = results[0].change_tracking.get('diff').get('text')
    jdiff = results[0].change_tracking.get('diff').get('json')

In [102]:
results[0].metadata.url

'https://news.ycombinator.com/'

In [68]:
tdiff

'@@ -7,94 +7,94 @@\n | |     |     |     |\n | --- | --- | --- |\n | 1. |  | [Hosting a website on a disposable vape](https://bogdanthegeek.github.io/blog/projects/vapeserver/) ( [bogdanthegeek.github.io](https://news.ycombinator.com/from?site=bogdanthegeek.github.io)) |\n-|  | 861 points by [BogdanTheGeek](https://news.ycombinator.com/user?id=BogdanTheGeek) [12 hours ago](https://news.ycombinator.com/item?id=45252817) \\| [hide](https://news.ycombinator.com/hide?id=45252817&goto=news) \\| [391\xa0comments](https://news.ycombinator.com/item?id=45252817) |\n+|  | 929 points by [BogdanTheGeek](https://news.ycombinator.com/user?id=BogdanTheGeek) [13 hours ago](https://news.ycombinator.com/item?id=45252817) \\| [hide](https://news.ycombinator.com/hide?id=45252817&goto=news) \\| [394\xa0comments](https://news.ycombinator.com/item?id=45252817) |\n \n-| 2. |  | [Public static void main(String\\[\\] args) is dead](https://mccue.dev/pages/9-16-25-psvm) ( [mccue.dev](https://news.ycombinator.com

In [98]:
import json
print(json.dumps(jdiff,separators=(",", ":"), ensure_ascii=False))

{"files":[{"from":null,"to":null,"chunks":[{"content":"@@ -7,94 +7,94 @@","changes":[{"type":"normal","content":" | |     |     |     |","normal":true,"ln1":7,"ln2":7},{"type":"normal","content":" | --- | --- | --- |","normal":true,"ln1":8,"ln2":8},{"type":"normal","content":" | 1. |  | [Hosting a website on a disposable vape](https://bogdanthegeek.github.io/blog/projects/vapeserver/) ( [bogdanthegeek.github.io](https://news.ycombinator.com/from?site=bogdanthegeek.github.io)) |","normal":true,"ln1":9,"ln2":9},{"type":"del","content":"-|  | 861 points by [BogdanTheGeek](https://news.ycombinator.com/user?id=BogdanTheGeek) [12 hours ago](https://news.ycombinator.com/item?id=45252817) \\| [hide](https://news.ycombinator.com/hide?id=45252817&goto=news) \\| [391 comments](https://news.ycombinator.com/item?id=45252817) |","del":true,"ln":10},{"type":"add","content":"+|  | 929 points by [BogdanTheGeek](https://news.ycombinator.com/user?id=BogdanTheGeek) [13 hours ago](https://news.ycombinator.

In [72]:
from pydantic import BaseModel, Field
from typing import List
class Change(BaseModel):
    change_type: str = Field(..., description=("Added | Removed | Modified"))
    content: str = Field(..., description=("A concise summary of what exactly changed on the competitor's website. Focus on the meaningful details extracted from the diff rather than raw HTML or markdown."))
    timestamp: str = Field(..., description=("The timestamp when this change was detected. Use ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ."))
    threat_level: int = Field(5, ge=0, le=10, description=("A numerical score from 0 to 10 representing the potential impact of this change on our business or strategy. 0 = negligible; 10 = extremely critical."))
    why_matter: str = Field(..., description=("An explanation of why this change is important from a competitive intelligence perspective. Highlight possible business, market, or positioning implications."))
    suggestions: str = Field(..., description=("Actionable next steps or recommendations for our strategy based on this change. Examples: monitor further updates, adjust pricing, review competitor roadmap, or prepare a counter-marketing response."))

class ListOfChanges(BaseModel):
    changes: List[Change]

from langchain_core.messages import HumanMessage
from langchain_deepseek import ChatDeepSeek
llm = ChatDeepSeek(model="deepseek-chat", temperature=0)

llm_structured = llm.with_structured_output(ListOfChanges)

res = llm_structured.invoke([HumanMessage(content=f"""The diff is: 
{jdiff}""")])

In [73]:
res.model_dump()

{'changes': [{'change_type': 'Modified',
   'content': "Top story engagement increased: 'Hosting a website on a disposable vape' gained 68 points (861→929) and 3 more comments (391→394)",
   'timestamp': '2025-09-16T12:00:00Z',
   'threat_level': 3,
   'why_matter': 'Increased engagement on technical/hacking content suggests growing interest in unconventional computing platforms, which could indicate shifting developer interests or emerging trends in hardware hacking',
   'suggestions': 'Monitor for similar hardware hacking trends and consider if our platform should support or feature unconventional computing projects'},
  {'change_type': 'Modified',
   'content': "Position 2 changed from 'Public static void main(String[] args) is dead' to 'Learn x86-64 assembly by writing a GUI from scratch' with significantly lower engagement (126→61 points)",
   'timestamp': '2025-09-16T12:00:00Z',
   'threat_level': 4,
   'why_matter': 'Major shift from Java ecosystem discussion to low-level assemb

In [4]:
from firecrawl.v2.client_async import AsyncFirecrawlClient

crawler = AsyncFirecrawlClient()

In [None]:
res = await crawler.scrape("https://news.ycombinator.com/", formats=['markdown', 'change_tracking'], maxAge=0)

In [6]:
res



In [None]:
from datetime import datetime, timezone

def get_today_str() -> str:
    """Return current UTC time in ISO 8601 format (e.g., 2025-09-16T12:00:00Z)."""
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

In [90]:
get_today_str()

'2025-09-16T08:51:58Z'