In [52]:
# Data Ingestion
from langchain_community.document_loaders import TextLoader
loader = TextLoader("speech.txt")
text_documents = loader.load()
text_documents

[Document(page_content="SARTHAK KHANDUJA\nWindsor, Ontario\nsarthakkhanduja98@gmail.com | +1 (519) 992 7207\n\nMay 3rd, 2024\n\nHiring Manager\nConstant Contact\n\nRe: New Grad Java Developer - Contacts\n\nDear Hiring Manager,\n\nI am writing to express my interest in the New Grad Java Developer - Contacts position at Constant Contact. With a diverse background in software engineering, I am excited about the opportunity to contribute to your innovative team that makes communication technology accessible and simple at Constant Contact.\n\nThe reason I have applied for this position is because I relate very closely to the idea of catering to small business owners and entrepreneurs. Having independently created various solutions for small business owners as a freelancer, I believe problem-solving as a skill could be maximized when working with a non-established company. Moreover, the types of problems are never repetitive, which is why itâ€™s always exciting to try and propose a new solut

In [53]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [54]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('example_research_paper.pdf')
docs = loader.load()
docs

[Document(page_content='Luxembourg SUMO Trafﬁc (LuST) Scenario:\n24 Hours of Mobility\nfor Vehicular Networking Research\nLara CODECA, Raphael FRANK, Thomas ENGEL\nInterdisciplinary Centre for Security, Reliability and Trust\nUniversity of Luxembourg, 2721, Luxembourg\nEmails: lara.codeca@uni.lu raphael.frank@uni.lu thomas.engel@uni.lu\nAbstract —Different research communities varying from\ntelecommunication to trafﬁc engineering are working on prob-\nlems related to vehicular trafﬁc congestion, intelligent transporta-\ntion systems, and mobility patterns using information collected\nfrom a variety of sensors. To test the solutions, the ﬁrst step is\nto use a vehicular trafﬁc simulator with an appropriate scenario\nin order to reproduce realistic mobility patterns. Many mobility\nsimulators are available, and the choice is usually done based on\nthe size and type of simulation required, but a common problem\nis to ﬁnd a realistic trafﬁc scenario. In order to evaluate and\ncompare new c

In [55]:
# Converting document into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
documents = text_splitter.split_documents(docs)

documents

[Document(page_content='Luxembourg SUMO Trafﬁc (LuST) Scenario:\n24 Hours of Mobility\nfor Vehicular Networking Research\nLara CODECA, Raphael FRANK, Thomas ENGEL\nInterdisciplinary Centre for Security, Reliability and Trust\nUniversity of Luxembourg, 2721, Luxembourg\nEmails: lara.codeca@uni.lu raphael.frank@uni.lu thomas.engel@uni.lu\nAbstract —Different research communities varying from\ntelecommunication to trafﬁc engineering are working on prob-\nlems related to vehicular trafﬁc congestion, intelligent transporta-\ntion systems, and mobility patterns using information collected\nfrom a variety of sensors. To test the solutions, the ﬁrst step is\nto use a vehicular trafﬁc simulator with an appropriate scenario\nin order to reproduce realistic mobility patterns. Many mobility\nsimulators are available, and the choice is usually done based on\nthe size and type of simulation required, but a common problem\nis to ﬁnd a realistic trafﬁc scenario. In order to evaluate and', metadata={'s

In [56]:
# Converting the chunks into Vectors
# Vector Embeddings and Vector Store

from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
# Chroma is a Vector DB

db = Chroma.from_documents(documents, OpenAIEmbeddings())

In [57]:
query = "Who are the authors of 'LUsT Scenario' research paper?"

# result = db.similarity_search(query)
result = db.similarity_search_with_relevance_scores(query)
result

[(Document(page_content='Luxembourg SUMO Trafﬁc (LuST) Scenario:\n24 Hours of Mobility\nfor Vehicular Networking Research\nLara CODECA, Raphael FRANK, Thomas ENGEL\nInterdisciplinary Centre for Security, Reliability and Trust\nUniversity of Luxembourg, 2721, Luxembourg\nEmails: lara.codeca@uni.lu raphael.frank@uni.lu thomas.engel@uni.lu\nAbstract —Different research communities varying from\ntelecommunication to trafﬁc engineering are working on prob-\nlems related to vehicular trafﬁc congestion, intelligent transporta-\ntion systems, and mobility patterns using information collected\nfrom a variety of sensors. To test the solutions, the ﬁrst step is\nto use a vehicular trafﬁc simulator with an appropriate scenario\nin order to reproduce realistic mobility patterns. Many mobility\nsimulators are available, and the choice is usually done based on\nthe size and type of simulation required, but a common problem\nis to ﬁnd a realistic trafﬁc scenario. In order to evaluate and', metadata={'

In [58]:
query1= "Is this simulation done using OMNET++ ?"

# result = db.similarity_search(query)
result1 = db.similarity_search_with_relevance_scores(query1)
result1

[(Document(page_content='34.\n[23] A. Varga and R. Hornig, “An overview of the OMNeT++ simulation\nenvironment,” in Proceedings of the 1st international conference on\nsimulation tools and techniques for communications, networks and\nsystems & workshops . ICST (Institute for Computer Sciences, Social-\nInformatics and Telecommunications Engineering), 2008, p. 60.\n[24] C. Sommer, R. German, and F. Dressler, “Bidirectionally coupled\nnetwork and road trafﬁc simulation for improved IVC analysis,” Mobile\nComputing, IEEE Transactions on , vol. 10, no. 1, pp. 3–15, 2011.\n[25] J. G. Wardrop, “ROAD PAPER. SOME THEORETICAL ASPECTS\nOF ROAD TRAFFIC RESEARCH.” in ICE Proceedings: Engineering\nDivisions , vol. 1, no. 3. Thomas Telford, 1952, pp. 325–362.\n[26] L. Codeca, R. Frank, and T. Engel, “Improving trafﬁc in urban environ-\nments applying the Wardrop equilibrium,” in Network Protocols (ICNP),\n2013 21st IEEE International Conference on . IEEE, 2013, pp. 1–6.', metadata={'page': 7, 'sourc

In [59]:
query2= "Which city's traffic scenario is being used here?"

# result = db.similarity_search(query)
result2 = db.similarity_search_with_relevance_scores(query2)
result2

[(Document(page_content='city, demographic information about the population, schools,\nworkplaces and residential areas. All of these are retrieved\nfrom OSM and STATEC.\nMobility\nThe trafﬁc model is based on a mobility study that describes\ntrafﬁc characteristics over recent years [18]. We decided to\ntune the trafﬁc demand between 200,000 and 300,000 vehicles\nper day, depending on which trafﬁc demand we want to\nsimulate (details are in Section Trafﬁc Demand ). The public\ntransport database was used to retrieve the information about\nbus routes [19]. A total of 563 bus stops were added to the\nscenario. As shown in Table III, we added 38 bus routes inside\nthe city for a total of 2,336 buses over the 24 hours period. The\nlocation of the bus stops in the LuST Scenario is not the same\nas the one in the OSM ﬁle (as mentioned earlier), however we\ntried to keep them as close as possible. For this reason, we had\nto rebuild the bus routes to match the new bus stop locations.', metada

In [60]:
## Till now, we have only done a similarity search, this means the embeddings have been just grouped together to see which chunks are similar.

## Now, we're going to use a Generative model, to which we will give these similar chunks in the form of 
## embeddings to read through, and generate a coherent response based on the context it gathers from these embeddings


Retreiver and Chains, with Langchain

In [61]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("example_research_paper.pdf")
docs = loader.load()
docs

[Document(page_content='Luxembourg SUMO Trafﬁc (LuST) Scenario:\n24 Hours of Mobility\nfor Vehicular Networking Research\nLara CODECA, Raphael FRANK, Thomas ENGEL\nInterdisciplinary Centre for Security, Reliability and Trust\nUniversity of Luxembourg, 2721, Luxembourg\nEmails: lara.codeca@uni.lu raphael.frank@uni.lu thomas.engel@uni.lu\nAbstract —Different research communities varying from\ntelecommunication to trafﬁc engineering are working on prob-\nlems related to vehicular trafﬁc congestion, intelligent transporta-\ntion systems, and mobility patterns using information collected\nfrom a variety of sensors. To test the solutions, the ﬁrst step is\nto use a vehicular trafﬁc simulator with an appropriate scenario\nin order to reproduce realistic mobility patterns. Many mobility\nsimulators are available, and the choice is usually done based on\nthe size and type of simulation required, but a common problem\nis to ﬁnd a realistic trafﬁc scenario. In order to evaluate and\ncompare new c

In [62]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
documents = text_splitter.split_documents(docs)
documents

[Document(page_content='Luxembourg SUMO Trafﬁc (LuST) Scenario:\n24 Hours of Mobility\nfor Vehicular Networking Research\nLara CODECA, Raphael FRANK, Thomas ENGEL\nInterdisciplinary Centre for Security, Reliability and Trust\nUniversity of Luxembourg, 2721, Luxembourg\nEmails: lara.codeca@uni.lu raphael.frank@uni.lu thomas.engel@uni.lu\nAbstract —Different research communities varying from\ntelecommunication to trafﬁc engineering are working on prob-\nlems related to vehicular trafﬁc congestion, intelligent transporta-\ntion systems, and mobility patterns using information collected\nfrom a variety of sensors. To test the solutions, the ﬁrst step is\nto use a vehicular trafﬁc simulator with an appropriate scenario\nin order to reproduce realistic mobility patterns. Many mobility\nsimulators are available, and the choice is usually done based on\nthe size and type of simulation required, but a common problem\nis to ﬁnd a realistic trafﬁc scenario. In order to evaluate and', metadata={'s

In [63]:
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(documents, OpenAIEmbeddings())

In [64]:
query = "Typical Traffic option in Google Maps"
result = db.similarity_search(query)
result[0].page_content

'behaviours.\nTrafﬁc patterns plausibility\nIn order to show that the LuST Scenario behaves in a\nrealistic way, we decided to compare it with the Typical Trafﬁc\noption in Google Maps [21]. Figure 6a shows a snapshot of\nthe simulation at 8:00 o’clock in the morning, and Figure 6b\nshows the typical trafﬁc on a Monday morning at the same\ntime, provided by Google Maps. In this context, the relative'

In [77]:
from langchain_community.llms import OpenAI
# from langchain_community.llms import Ollama

llm = OpenAI(model="gpt-3.5-turbo-instruct")
# llm = Ollama(model="llama2")
llm

OpenAI(client=<openai.resources.completions.Completions object at 0x000001AD52984070>, async_client=<openai.resources.completions.AsyncCompletions object at 0x000001AD52987970>, openai_api_key='sk-proj-zNTas8QYpPwWqyNvXDr3T3BlbkFJxg9fKyH7ryPcxmuk8ENz', openai_proxy='')

In [78]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
                                          Answer the following question based only on the provided context. 
                                          Think step by step before providing a detailed answer.
                                          I will tip you $1000 if the user finds the answer helpful.
                                          <context>
                                          {context}
                                          </context>
                                          Question: {input}""") 

In [79]:
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm, prompt)

In [80]:
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001AD50F29270>)

In [81]:
## Retreiver Chain
## User asks for an enquiry, this enquiry goes through a Retreiver (which is connected to a vector store through the backend), and 
## it passes the embeddings retreived to an LLM (which already has our prompt), and hence generates a response

from langchain.chains import create_retrieval_chain

retreival_chain = create_retrieval_chain(retriever, document_chain)

In [86]:
retreival_chain.invoke({"input": "What is this paper about?"})

{'input': 'What is this paper about?',
 'context': [Document(page_content='different experiments.\nWe decided to use the road network of a real city as basis\nfor our scenario in order to reproduce real trafﬁc demand and\nmobility patterns. We chose the City of Luxembourg because\nits topology is comparable to that of many of European cities\nand because of available trafﬁc statistics, which can be used\nto calibrate the trafﬁc demand. Another advantage is that its\nsize is reasonable in terms of complexity for a microscopic\nsimulator.\nThe rest of the paper is organized as follows. Section II\nreviews the different trafﬁc scenarios available to the vehicular\nnetworking community and explains their limitations. Section\nIII explains in details how the LuST Scenario has been built\ncovering aspects such as the road topology, the population and\nthe trafﬁc demand, and the different kind of vehicles involved.\nSection IV presents an overview of the possible use cases\nfor the scenario. 