## Colab Imports

In [None]:
from google.colab import drive
from google.colab import userdata
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Installation

In [None]:
!pip install -qU langchain-community faiss-cpu langchain-openai

## Env Setting

In [None]:
import os

os.environ['OPENAI_API_KEY'] = userdata.get('openAI')

In [None]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [None]:
# Index Path(Directory to save initialized index)
DRIVE_PATH = '/content/drive/MyDrive/RAG_JSON_EMBEDDINGS_INDEX'
INDEX_DIR_PATH = os.path.join(DRIVE_PATH, "INDEX")
HA_INDEX_PATH = os.path.join(INDEX_DIR_PATH, "HA_INDEX")
if not os.path.exists(INDEX_DIR_PATH):
    os.makedirs(INDEX_DIR_PATH)

faiss_index_path = os.path.join(HA_INDEX_PATH, "combined_faiss_index")

print(faiss_index_path)

/content/drive/MyDrive/RAG_JSON_EMBEDDINGS_INDEX/INDEX/HA_INDEX/combined_faiss_index


## Index Loading

In [None]:
# Faiss index set
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

loaded_vector_store = FAISS.load_local(
    faiss_index_path, embeddings, allow_dangerous_deserialization=True
    )

## Query Analyzer
Creating a chain of belows to get better results
- Query Schema
- Prompt
- Structured LLM

### Prompt

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

system = """
    You are an expert planning a date for loved one, family and friends.
    Your task is retrieving relevant data to generate a date plan.
    You have access to a database of locations for dating in Seoul.

    Each row in the table represents a location and its featrues.
    Features are separated by [SEP].
    If a row have 'None' in the feature, it means that the row doens't have that feature.
    Every row is in Korean while column names are in English.

    Given a question, return a list of database queries that are relevant to the question.
"""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

### Structure LLM

#### Query Schema for structured LLM

In [None]:
from typing import Optional

from langchain_core.pydantic_v1 import BaseModel, Field

class Search(BaseModel):
  datingTitle: Optional[str] = Field(None, description="Main title or event name")
  datingTime: Optional[str] = Field(None, description="Event date and time")
  datingLoc: Optional[str] = Field(None, description="Event location")
  timeTotal: Optional[str] = Field(None, description="Total duration of the event")
  datingTheme: Optional[str] = Field(None, description="Event theme or type")

#### Structured LLM init

In [None]:
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = llm.with_structured_output(Search)

### Query Analyzer Chain Init

In [None]:
query_analyzer = {"question": RunnablePassthrough()} | prompt | structured_llm

### Query Analyzer Test

In [None]:
query_analyzer.invoke("성북구 식도락 여행!")

Search(datingTitle=None, datingTime=None, datingLoc='성북구', timeTotal=None, datingTheme=None)

### Retrieval Chain

In [None]:
# results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
# for doc in results:
#     print(f"* {doc.page_content} [{doc.metadata}]")

from typing import List
from langchain_core.documents import Document
def retrieval(search: Search) -> List[Document]:
  query = f"""
    {search.datingTitle}
    {search.datingTime}
    {search.datingLoc}
    {search.timeTotal}
    {search.datingTheme}
  """
  return loaded_vector_store.similarity_search(query)

NameError: name 'vector_store' is not defined

In [None]:
retreival_chain = query_analyzer | retrieval

### Test: Retreieval with Qeury Analysis

In [None]:
result = retreival_chain.invoke("성북구 식도락 여행!")

In [None]:
for row in result:
  print(row.page_content)

성북구 [SEP] 성북동 [SEP] 서울 성북구 성북로23길 81 [SEP] 북정마을팔각정 [SEP] None [SEP] None [SEP] https://naver.me/5hgoVGNH [SEP] 좋아요 [SEP] None
성북구 [SEP] 삼선동 [SEP] 서울 성북구 삼선교로 22 1층 [SEP] 삼선 [SEP] None [SEP] None [SEP] https://naver.me/F9p5GpfU [SEP] None [SEP] None
성북구 [SEP] 삼선동 [SEP] 서울 성북구 삼선교로2길 11 [SEP] 새공간 [SEP] None [SEP] None [SEP] https://naver.me/xyUDWA9W [SEP] None [SEP] None
성북구 [SEP] 동선동 [SEP] 서울 성북구 흥천사길 49-23 [SEP] 적조사 [SEP] None [SEP] None [SEP] https://naver.me/F7CPNKxa [SEP] None [SEP] None


In [None]:
print(type(result[0]))

<class 'langchain_core.documents.base.Document'>
