In [1]:
import dotenv
dotenv.load_dotenv()

True

In [6]:
from langchain.utilities.tavily_search import TavilySearchAPIWrapper
from langchain.tools.tavily_search import TavilySearchResults

tavily_tool = TavilySearchResults(api_wrapper=TavilySearchAPIWrapper())


from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())



available_tools = [tavily_tool, wikipedia]

In [20]:
dataset_path = "datasets/london.txt"

In [21]:
doc_file = open(dataset_path, 'r').read()

In [163]:

from langchain_text_splitters import RecursiveCharacterTextSplitter,CharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2500,
    chunk_overlap=200,
    separators=[
        "\n\n",
        "\n",
        ".",
        ",",
        "\u200b",  # Zero-width space
        "\uff0c",  # Fullwidth comma
        "\u3001",  # Ideographic comma
        "\uff0e",  # Fullwidth full stop
        "\u3002",  # Ideographic full stop
        "",
    ]
)

In [164]:
texts = text_splitter.create_documents([doc_file])
texts = [x.page_content for x in texts]
texts

["London ( LUN-dÉ™n) is the capital and largest city of both England and the United Kingdom, with a population of 8,866,180 in 2022. The wider metropolitan area is the largest in Western Europe, with a population of 14.9 million. London stands on the River Thames in southeast England, at the head of a 50-mile (80 km) estuary down to the North Sea, and has been a major settlement for nearly 2,000 years. Its ancient core and financial centre, the City of London, was founded by the Romans as Londinium and has retained its medieval boundaries. The City of Westminster, to the west of the City of London, has been the centuries-long host of the national government and parliament. London grew rapidly in the 19th century, becoming the world's largest city at the time as it expanded and absorbed the surrounding county of Middlesex alongside parts of Surrey and Kent. In 1965, it was combined with parts of Essex and Hertfordshire to create the administrative area of Greater London, which is govern

In [172]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0)



In [196]:
from typing import List
from langchain_core.pydantic_v1 import BaseModel, Field


from langchain.output_parsers import PydanticOutputParser


class QuestionAnswer(BaseModel):
    question:str = Field(description="Question regarding context")
    answer: str = Field(description="Answer to the question")

class QuestionsList(BaseModel):
    questions: List[QuestionAnswer] = Field(description="Question and answers mapping")

parser = PydanticOutputParser(pydantic_object=QuestionsList)

In [238]:
from langchain_core.prompts import PromptTemplate


template = (
    "```plain_text\n{content}\n```\n"
    "The above is a chunk of data about london scraped from wikipedia. "
    "You're a real estate consultancy in london. "
    "Based on the the above content you're provided, "
    "Give five questions and answers regarding real estate prospects "
    "london and mainly its boroughs or a specific borough or an area or areas for housing and offices rental information."
    "Make sure the questions aren't basic data query, but more of complex and consise reasoning and prospects study that our clients might expect. "
    "might want and so be answers. "
    "And make sure the answers are consise."
    "And most importantly, the questions and answers must be factual and on point. "
    "\n{format_instructions}"
    "Let's start. \n"
)
prompt = PromptTemplate.from_template(template)
chain = prompt | llm | parser


def gen_questions_for(text_cont):
    return chain.invoke(dict(content=text_cont, format_instructions=parser.get_format_instructions())).questions



In [240]:
gen_questions_for(texts[0])

[QuestionAnswer(question='What are the current trends in housing rental prices in the City of London compared to the wider metropolitan area?', answer='Housing rental prices in the City of London have remained relatively stable due to the limited availability of housing stock, while the wider metropolitan area has seen a slight increase in rental prices due to high demand and limited supply.'),
 QuestionAnswer(question='How has the recent post-Brexit exodus of stock listings from the London Stock Exchange impacted the demand for office spaces in the City of Westminster?', answer='The post-Brexit exodus has led to a decrease in demand for office spaces in the City of Westminster as some financial institutions have relocated their operations to other European cities, resulting in a slight oversupply of office spaces in the area.'),
 QuestionAnswer(question='What are the key factors driving the demand for housing in the Greater London Built-up Area and how is it impacting rental prices?',

In [241]:
qa_list_borough = []

for tx in texts:
    qa_map = gen_questions_for(tx)
    qa_list_borough += qa_map
    print(qa_map)
    print()


[QuestionAnswer(question='What are the current trends in housing prices in the City of London compared to the wider metropolitan area?', answer='Housing prices in the City of London have remained relatively stable due to limited space for new developments, while the wider metropolitan area has seen a steady increase in prices due to high demand and new construction projects.'), QuestionAnswer(question='How has the recent post-Brexit exodus of stock listings from the London Stock Exchange impacted the demand for office spaces in the City of Westminster?', answer='The post-Brexit exodus has led to a decrease in demand for office spaces in the City of Westminster as some financial institutions have relocated, but the area remains attractive for businesses due to its historical significance and proximity to government institutions.'), QuestionAnswer(question='What are the key factors driving the demand for housing in the borough of Kensington and Chelsea?', answer='The borough of Kensingto

In [242]:

print(len(qa_list_borough))

265


In [243]:
import json

# qal = [dict(question=x.question, answer=x.answer) for x in qa_list]
qalb = [dict(question=x.question, answer=x.answer) for x in qa_list_borough]

# open("qa_list.json", 'w').write(json.dumps(qal))
open("qa_list_specified.json", 'w').write(json.dumps(qalb))

122073

In [244]:
import pandas as pd


dat = json.load(open("qa_list_specified.json"))

In [247]:
dat[157]

{'question': 'Considering the high proportion of international passengers at London Southend Airport, which borough or area would be a potential hotspot for real estate development catering to international visitors?',
 'answer': "Essex, specifically the vicinity of London Southend Airport, would be a potential hotspot for real estate development catering to international visitors given the airport's high proportion of international passengers."}

In [232]:
llm.invoke("With over 300 languages spoken in London, how does language diversity impact the commercial real estate market in the city? give me one line answer")

AIMessage(content='Language diversity in London can create opportunities for businesses to cater to a wider range of clientele, potentially increasing demand for commercial real estate in areas with diverse populations.', response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 35, 'total_tokens': 66}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-b9eb8f27-745c-4b08-8ba2-66a3d1ad7783-0', usage_metadata={'input_tokens': 35, 'output_tokens': 31, 'total_tokens': 66})

In [258]:
open("questions_export.md","w").write("\n\n".join([f"({str(i).zfill(3)}) Q: **{x.get('question')}**\nA: {x.get('answer')}" for i,x in enumerate(dat)]))

118626