## Colab Imports

In [None]:
from google.colab import drive
from google.colab import userdata
drive.mount('/content/drive')

Mounted at /content/drive


## Installation

In [None]:
!pip install -qU langchain-community faiss-cpu langchain-openai langchain langchainhub

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m835.3 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m48.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m396.2/396.2 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.7/150.7 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.7/365.7 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## Env Setting

In [None]:
import os
from langchain_openai import OpenAIEmbeddings


os.environ['OPENAI_API_KEY'] = userdata.get('openAI')


embeddings = OpenAIEmbeddings(model="text-embedding-3-large")


# Index Path(Directory to save initialized index)
DRIVE_PATH = '/content/drive/MyDrive/RAG_JSON_EMBEDDINGS_INDEX'
INDEX_DIR_PATH = os.path.join(DRIVE_PATH, "INDEX")
HA_INDEX_PATH = os.path.join(INDEX_DIR_PATH, "HA_INDEX")
if not os.path.exists(INDEX_DIR_PATH):
    os.makedirs(INDEX_DIR_PATH)
faiss_index_path = os.path.join(HA_INDEX_PATH, "combined_faiss_index")

print(faiss_index_path)

/content/drive/MyDrive/RAG_JSON_EMBEDDINGS_INDEX/INDEX/HA_INDEX/combined_faiss_index


## Index Loading

In [None]:
# Faiss index set
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

loaded_vector_store = FAISS.load_local(
    faiss_index_path, embeddings, allow_dangerous_deserialization=True
    )

## Query Analyzer + Retrieval Chain

In [None]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """
    You are an expert planning a date for loved one, family and friends.
    Your task is retrieving relevant data to generate a date plan.
    You have access to a database of locations for dating in Seoul.
    Your task is to generate five different versions of the given user question to retrieve relevant documents from a vector
    database.
    By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search.


    Each row in the table represents a location and its featrues.
    Features are separated by [SEP].
    If a row have 'None' in the feature, it means that the row doens't have that feature.
    Every row is in Korean while column names are in English.
    Provide these alternative questions separated by newlines.
    Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_perspectives
    | ChatOpenAI(temperature=0)
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

In [None]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

retriever = loaded_vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 5, "fetch_k": 10}
)

# Retrieval chain
retrieval_chain = generate_queries | retriever.map() | get_unique_union

### Query Analyzer + Retrieval Chain Test


In [None]:
question = "성북구에서 할 수 있는 식도락 데이트!"
docs = retrieval_chain.invoke({"question":question})
for doc in docs:
  print(doc)

page_content='성북구 [SEP] 성북동 [SEP] 서울 성북구 동소문동4가 [SEP] 돈암장 [SEP] None [SEP] None [SEP] https://naver.me/G4wR3c1I [SEP] 좋아요 친절해요 굿 맛있어요 좋아요 좋아요 굿 친절해요 굿 굳 [SEP] 대표 키워드 일제시대한옥'
page_content='성북구 [SEP] 돈암1동 [SEP] 서울 성북구 북악산로 913 [SEP] 청춘식당 묵은지김치찜&알탕 [SEP] None [SEP] None [SEP] https://naver.me/GB0H2IAZ [SEP]  맛있어요 ^^ 맛있어요 번창하세요 굿 매콤하니 한끼 먹기좋아요 맛은그닥 배달로 먹었어요! 짭조름하고, 살짝 맵네요:)  굿굿 집에서 맛있는걸 먹을수 있어 조아여 [SEP] None'
page_content='중구 [SEP] 필동 [SEP] 서울 중구 동호로 287 앰배서더 서울 풀만 호텔 2층 [SEP] 호빈 [SEP] 1스타 - 요리가 훌륭한 레스토랑 미쉐린 가이드 서울 2024 [SEP] None [SEP] https://naver.me/GoBwg6yi [SEP] 남편 생일이라 베이징덕 먹으러 방문했어요. 분위기 좋고 음식도 맛있어요 💕  후덕죽 상무님 울 할부지 계신 곳. 말해모해 참말로. 미디어에 나오는 셰프들 주방에 드갑시다. 대한민국 식음료 업계 진짜.ㅂㄷ⁷ 인생 유린기와 짬뽕이었습니다! 너무 친절하고, 홀 이어도 프라이빗하였습니다! 매일매일 먹고 싶은 맛입니다!! 불도장은 정말 최고예요 플라시보 효과일지 모르지만 먹고나면 몸이 개운합니다 바닷가재 마늘찜은 전 별로였어요...랍스타는 먹고싶다면 랍스타집에 가든지 삼성동 인터컨티넨탈 파르나스로 가든지... 아무튼 불도장은 원조쉐프가 계셔서 그런지 여기가 최고입니다 [SEP] None'
page_content='성북구 [SEP] 성북동 [SEP] 서울 성북구 동소문로7길 5 이가주방 [SEP] 이가주방 [SEP] None [SEP] None [SEP] https:/

## Generation

In [None]:
from typing import Optional

from langchain_core.pydantic_v1 import BaseModel, Field

class Search(BaseModel):
  datingTitle: Optional[str] = Field(None, description="Get name of the place")
  datingLoc: Optional[str] = Field(None, description="Get location of the place")
  timeTotal: Optional[str] = Field(None, description="Generate expected time")
  datingDescription: Optional[str] = Field(None, description="Generate description of the place")
  datingImage: Optional[str] = Field(None, description="Get url of the location")

In [None]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
question = "성북구에서 할 수 있는 식도락 데이트!"
# RAG
template = """
- You are a helpful assistant that answers questions about the context below.
- You do not make up answers to questions that cannot be found in the context.
- If you don't know the answer to a question, just say that you don't know. Don't try to make up an answer.
- You will generate a list of activities and please follow the format:
[
  {{
    "activityTitle": Get the name of the place,
    "activityLoc": Get the address of the place,
    "timeTotal": Generate your expected time about the place or just put 1 hour,
    "activityDescription": Generate a description of the place based on your understanding,
    "activityImage": Get url of the place
  }},
  ...
]
- Make sure the list contains at least 5 activities
- You have to answer in Korean.

Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

# llm = ChatOpenAI(temperature=0).with_structured_output(Search)
llm = ChatOpenAI(temperature=0)

final_rag_chain = (
    {"context": retrieval_chain,
     "question": itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

generated_result = final_rag_chain.invoke({"question":question})

In [None]:
import json

activities_list = json.loads(generated_result)
for activity in activities_list:
  print(activity)

{'activityTitle': '불타는소금구이', 'activityLoc': '서울 성북구 보문로30길 43-3', 'timeTotal': '1시간', 'activityDescription': '고기와 된장찌개가 맛있는 불타는소금구이. 북적북적한 분위기와 푸짐한 안주로 유명한 맛집입니다.', 'activityImage': 'https://naver.me/x9JcBZ9g'}
{'activityTitle': '열린마당', 'activityLoc': '서울 성북구 동소문로2길 27', 'timeTotal': '1시간', 'activityDescription': '한잔하기 좋은 분위기와 가성비 좋은 안주로 소문난 열린마당. 맛있는 안주와 친절한 서비스가 자랑입니다.', 'activityImage': 'https://naver.me/IxD0x0dR'}
{'activityTitle': '다바타식당', 'activityLoc': '서울 성북구 삼선교로16길 54 2층', 'timeTotal': '1시간', 'activityDescription': '한식과 일식이 어우러진 맛있는 음식을 즐길 수 있는 다바타식당. 아늑한 분위기와 친절한 사장님으로 소문난 곳입니다.', 'activityImage': 'https://naver.me/FWJ6y4fy'}
{'activityTitle': '달달커피', 'activityLoc': '서울 성북구 동소문로 22-1', 'timeTotal': '1시간', 'activityDescription': '미각을 만족시키는 커피와 디저트를 즐길 수 있는 달달커피. 아늑한 분위기와 다양한 디저트로 소문난 카페입니다.', 'activityImage': 'https://naver.me/5cTd22tI'}
{'activityTitle': '퐁닭퐁닭 본점', 'activityLoc': '서울 성북구 성북로4길 52 한진한신아파트상가', 'timeTotal': '1시간', 'activityDescription': '매콤하고 고소한 맛으로 유명한 퐁닭퐁닭 본