# Retrieval Augmented Generation (RAG)

In [1]:
import os
import json
import tiktoken
from dotenv import load_dotenv
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores import FAISS

load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [3]:
with open("dummy_data.json", "r") as f:
    dummy_data = json.load(f)
len(dummy_data)

100

In [4]:
# calculate tokens of the data

encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
num_tokens = len(encoding.encode("".join(dummy_data)))
num_tokens

7823

In [5]:
hacky_hour = ['Event Title: HackyHour: LargeLanguageModels for Developers\n\nPlace: Degginger Regensburg\n\nDate: November 14, 2023\n\nTime: 18:30 o\'clock \n\nDescription: Dr. Johann Schenkl from trinnovative will demonstrate technical integrations such as embedding ChatGPT into personal applications, adding custom functions, and incorporating individual data into language models.']

In [6]:
combine_data = dummy_data + hacky_hour

In [7]:
vectorstore = FAISS.from_texts(combine_data, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | prompt 
    | model 
    | StrOutputParser()
)

In [8]:
chain.invoke("Wo findet die Hacky Hour statt?")

'Die Hacky Hour findet im Degginger Regensburg statt.'

In [9]:
chain.invoke("Um wie viel Uhr Startet die Hacky Hour?")

'Die Hacky Hour beginnt um 18:30 Uhr.'