In [None]:
!pip install -U langchain langchain-core langchain-community langchain-openai


In [None]:
import pandas as pd

csv_url = "https://raw.githubusercontent.com/marcin119a/data/refs/heads/main/adresowo_warszawa_wroclaw.csv"
df = pd.read_csv(csv_url)

In [None]:
!pip install langchain-community pypdf

In [None]:
!wget https://api.sages.pl/content/trainings/ai-w-tworzeniu-oprogramowania/ai-hybrid-rag.pdf

In [None]:
from langchain_community.document_loaders import PyPDFLoader

# 1. Wczytaj PDF
pdf_path = "ai-hybrid-rag.pdf"
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()

for i, page in enumerate(pages):
  print(f"\n--- Strona {i+1} ---")
  print(page.page_content)



In [None]:
from transformers import pipeline
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")

def translate(text: str) -> str:
   result = translator(text, max_length=512)
   return result[0]["translation_text"]

# 3. Tłumaczenie każdej strony PDF
for i, page in enumerate(pages):
   print(f"\n--- Strona {i+1} ---")
   original = page.page_content.strip()
   translated = translate(original[:512])  # Ograniczenie do 512 znaków
   print(translated)



In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from transformers import pipeline

# 1. Loader PDF
loader = PyPDFLoader("ai-hybrid-rag.pdf")
pages = loader.load()

# 2. Splitter – dzielimy tekst na segmenty
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    separators=["\n\n", "\n", " ", ""]
)

docs = splitter.split_documents(pages)

print(f"Liczba segmentów: {len(docs)}")

# 3. Translator
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en")

def translate(text: str) -> str:
    result = translator(text, max_length=512)
    return result[0]["translation_text"]

# 4. Tłumaczenie każdego segmentu
for i, doc in enumerate(docs):
    print(f"\n--- Segment {i+1} ---")
    original = doc.page_content.strip()
    translated = translate(original)
    print(translated)

In [None]:
from langchain_core.prompts import ChatPromptTemplate

chat_prompt = ChatPromptTemplate([
   ("system", "You are a helpful assistant"),
   ("user", "Tell me a joke about {topic}")
])

prompt_value = chat_prompt.invoke({"topic": "Warszawa"})
print(prompt_value.to_messages())




In [None]:

# Importujemy klienta LLM z LangChain
from langchain_openai import ChatOpenAI

# Inicjalizujemy model – lekka wersja GPT-4o, szybka i tania
llm = ChatOpenAI(model="gpt-4o-mini")

# Wywołujemy model, przekazując wcześniej przygotowany prompt z sekcją HTML
response = llm.invoke(prompt_value)

# Wyświetlamy przetworzony wynik (np. wyciągnięte pola z ogłoszenia)
response.content



In [None]:
import pandas as pd
df1 = pd.read_csv('https://raw.githubusercontent.com/marcin119a/r_d/refs/heads/main/scraper/data/ogloszenia_lodz_cleaned.csv')
df1['city'] = 'Łodź'

df2 = pd.read_csv('https://raw.githubusercontent.com/marcin119a/r_d/refs/heads/main/scraper/data/ogloszenia_warszawa_cleaned.csv')
df2['city'] = 'Warszawa'

df = pd.concat([df1, df2])
df.head()

In [None]:
df = df[['locality', 'price_total_zl', 'area', 'rooms', 'city', 'url']]

In [None]:
from langchain.tools import tool

@tool
def search_listings(city: str, max_price: int = 1_000_000):
  """
  Zwraca pierwsze 5 ogłoszeń z danego miasta poniżej max_price.
  """
  subset = df.query("city == @city and price_total_zl <= @max_price").sort_values(by='price_total_zl')
  return subset.head(5).to_dict(orient="records")

tools=[search_listings]



In [None]:
from langchain_openai import ChatOpenAI
from langchain.agents import create_agent
from langchain.agents.middleware import wrap_model_call, ModelRequest, ModelResponse


basic_model = ChatOpenAI(model="gpt-4o-mini")
advanced_model = ChatOpenAI(model="gpt-4o")

In [None]:

@wrap_model_call
def dynamic_model_selection(request: ModelRequest, handler) -> ModelResponse:
   """Choose model based on conversation complexity."""
   message_count = len(request.state["messages"])

   if message_count > 10:
       # Use an advanced model for longer conversations
       model = advanced_model
   else:
       model = basic_model

   request.model = model
   return handler(request)




In [None]:

agent = create_agent(
   model=basic_model,  # Default model
   tools=tools,
   middleware=[dynamic_model_selection]
)
result = agent.invoke(
    {"messages": [{"role": "user", "content": "Znajdź 3 ogłoszenia poniżej 600 000 zł dla Warszawy i policz średnią dla Warszawy dla danych"}]},
    context={"user_role": "expert"}
)
print(result['messages'][-1].content)

In [None]:
for chunk in agent.stream({
   "messages": [{"role": "user", "content": "Znajdź 3 ogłoszenia poniżej 600 000 zł dla Warszawy i policz średnią dla Warszawy dla danych"}]
}, stream_mode="values"):
  
   # Każdy chunk = pełen stan agenta w danym momencie
   latest_message = chunk["messages"][-1]

   if latest_message.content:
       print(f"Agent: {latest_message.content}")
   elif latest_message.tool_calls:
       print(f"Calling tools: {[tc['name'] for tc in latest_message.tool_calls]}")

