# 1. start with small idea of RAG - search by keywords matching

In [1]:
# imports

import os
import glob
from dotenv import load_dotenv
import gradio as gr
from openai import OpenAI
import functools
from concurrent.futures import ThreadPoolExecutor
import time

  from .autonotebook import tqdm as notebook_tqdm


In [18]:
# Load environment variables in a file called .env

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
openai = OpenAI()

# Configuration

MODEL = "gpt-4o-mini"
db_name = "vector_db"

In [3]:
# Performance improvement: Use caching for context loading
@functools.lru_cache(maxsize=None)
def load_context():
    """Load and cache context data to avoid repeated file I/O"""
    context = {}
    
    # Load employees with threading for faster I/O
    def load_employee_files():
        employee_context = {}
        employees = glob.glob("knowledge-base/employees/*")
        
        def load_single_employee(employee):
            name = employee.split(' ')[-1][:-3]
            try:
                with open(employee, "r", encoding="utf-8") as f:
                    return name, f.read()
            except Exception as e:
                print(f"Error loading {employee}: {e}")
                return name, ""
        
        # Use ThreadPoolExecutor for concurrent file loading
        with ThreadPoolExecutor(max_workers=4) as executor:
            results = executor.map(load_single_employee, employees)
            employee_context.update(dict(results))
        
        return employee_context
    
    # Load schools with threading
    def load_school_files():
        school_context = {}
        schools = glob.glob("knowledge-base/schools/*")
        
        def load_single_school(school):
            name = school.split(os.sep)[-1][:-3]
            try:
                with open(school, "r", encoding="utf-8") as f:
                    return name, f.read()
            except Exception as e:
                print(f"Error loading {school}: {e}")
                return name, ""
        
        with ThreadPoolExecutor(max_workers=4) as executor:
            results = executor.map(load_single_school, schools)
            school_context.update(dict(results))
        
        return school_context
    
    # Load both types concurrently
    with ThreadPoolExecutor(max_workers=2) as executor:
        employee_future = executor.submit(load_employee_files)
        school_future = executor.submit(load_school_files)
        
        context.update(employee_future.result())
        context.update(school_future.result())
    
    return context

In [4]:
# Load context once at startup
context = load_context()
print(f"Loaded context with {len(context)} documents: {list(context.keys())}")

Loaded context with 11 documents: ['Kim', 'Lee', 'Lan', 'Jae', 'Park', 'Ewha Womans University', 'KAIST', 'Korea University', 'POSTECH', 'Seoul National University', 'Yonsei University']


In [5]:
context["Lan"]

'# B√† Nguy·ªÖn Th·ªã Lan - Gi√°m ƒê·ªëc ƒêi·ªÅu H√†nh\n\n## Th√¥ng Tin C√° Nh√¢n\n\n**H·ªç v√† t√™n**: Nguy·ªÖn Th·ªã Lan\n**Ch·ª©c v·ª•**: Gi√°m ƒê·ªëc ƒêi·ªÅu H√†nh (CEO)  \n**Tu·ªïi**: 42  \n**Qu·ªëc t·ªãch**: Vi·ªát Nam  \n**Ng√¥n ng·ªØ**: Ti·∫øng Vi·ªát (b·∫£n ng·ªØ), Ti·∫øng H√†n (Th√†nh th·∫°o - TOPIK 6), Ti·∫øng Anh (Th√†nh th·∫°o - IELTS 8.0)  \n\n## Li√™n H·ªá\n\nüìû **ƒêi·ªán tho·∫°i**: 0901-111-222  \nüìß **Email**: lan.nguyen@koreastudyvn.com  \nüìß **Email c√° nh√¢n**: ceo@koreastudyvn.com  \nüè¢ **VƒÉn ph√≤ng**: T·∫ßng 12, Lotte Center, H√† N·ªôi  \n\n## H·ªçc V·∫•n\n\n### B·∫±ng C·∫•p Ch√≠nh\n- **2010**: Th·∫°c sƒ© Qu·∫£n tr·ªã Kinh doanh (MBA) - ƒê·∫°i h·ªçc Yonsei, Seoul, H√†n Qu·ªëc\n- **2005**: C·ª≠ nh√¢n Quan h·ªá Qu·ªëc t·∫ø - ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng, H√† N·ªôi, Vi·ªát Nam\n\n### Ch·ª©ng Ch·ªâ B·ªï Sung\n- **2018**: Ch·ª©ng ch·ªâ T∆∞ v·∫•n Gi√°o d·ª•c Qu·ªëc t·∫ø - ICEF (International Consultants for Education and Fairs)\n- **2016**: Ch·ª©ng ch·ªâ Qu·∫£

In [6]:
system_message = (
    "B·∫°n l√† m·ªôt chuy√™n gia t∆∞ v·∫•n du h·ªçc H√†n Qu·ªëc t·∫°i trung t√¢m Korea Study. "
    "Nhi·ªám v·ª• c·ªßa b·∫°n l√† tr·∫£ l·ªùi c√°c c√¢u h·ªèi li√™n quan ƒë·∫øn trung t√¢m, nh√¢n vi√™n, tr∆∞·ªùng h·ªçc, v√† th√¥ng tin visa m·ªôt c√°ch ng·∫Øn g·ªçn v√† ch√≠nh x√°c. "
    "N·∫øu b·∫°n kh√¥ng bi·∫øt c√¢u tr·∫£ l·ªùi, h√£y n√≥i r√µ r·∫±ng b·∫°n kh√¥ng bi·∫øt. "
    "Tuy·ªát ƒë·ªëi kh√¥ng b·ªãa ra th√¥ng tin n·∫øu kh√¥ng c√≥ ng·ªØ c·∫£nh li√™n quan ƒë∆∞·ª£c cung c·∫•p."
)

In [7]:
def get_relevant_context(message):
    relevant_context = []
    for context_title, context_details in context.items():
        if context_title.lower() in message.lower():
            relevant_context.append(context_details)
    return relevant_context  

In [8]:
get_relevant_context("Who is Timi?")

[]

In [9]:
get_relevant_context("who is Nguy·ªÖn Th·ªã Lan")

['# B√† Nguy·ªÖn Th·ªã Lan - Gi√°m ƒê·ªëc ƒêi·ªÅu H√†nh\n\n## Th√¥ng Tin C√° Nh√¢n\n\n**H·ªç v√† t√™n**: Nguy·ªÖn Th·ªã Lan\n**Ch·ª©c v·ª•**: Gi√°m ƒê·ªëc ƒêi·ªÅu H√†nh (CEO)  \n**Tu·ªïi**: 42  \n**Qu·ªëc t·ªãch**: Vi·ªát Nam  \n**Ng√¥n ng·ªØ**: Ti·∫øng Vi·ªát (b·∫£n ng·ªØ), Ti·∫øng H√†n (Th√†nh th·∫°o - TOPIK 6), Ti·∫øng Anh (Th√†nh th·∫°o - IELTS 8.0)  \n\n## Li√™n H·ªá\n\nüìû **ƒêi·ªán tho·∫°i**: 0901-111-222  \nüìß **Email**: lan.nguyen@koreastudyvn.com  \nüìß **Email c√° nh√¢n**: ceo@koreastudyvn.com  \nüè¢ **VƒÉn ph√≤ng**: T·∫ßng 12, Lotte Center, H√† N·ªôi  \n\n## H·ªçc V·∫•n\n\n### B·∫±ng C·∫•p Ch√≠nh\n- **2010**: Th·∫°c sƒ© Qu·∫£n tr·ªã Kinh doanh (MBA) - ƒê·∫°i h·ªçc Yonsei, Seoul, H√†n Qu·ªëc\n- **2005**: C·ª≠ nh√¢n Quan h·ªá Qu·ªëc t·∫ø - ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng, H√† N·ªôi, Vi·ªát Nam\n\n### Ch·ª©ng Ch·ªâ B·ªï Sung\n- **2018**: Ch·ª©ng ch·ªâ T∆∞ v·∫•n Gi√°o d·ª•c Qu·ªëc t·∫ø - ICEF (International Consultants for Education and Fairs)\n- **2016**: Ch·ª©ng ch·ªâ Qu·∫

In [10]:
get_relevant_context("who is Nguy·ªÖn Th·ªã Lan and introduce about Yonsei University")

['# B√† Nguy·ªÖn Th·ªã Lan - Gi√°m ƒê·ªëc ƒêi·ªÅu H√†nh\n\n## Th√¥ng Tin C√° Nh√¢n\n\n**H·ªç v√† t√™n**: Nguy·ªÖn Th·ªã Lan\n**Ch·ª©c v·ª•**: Gi√°m ƒê·ªëc ƒêi·ªÅu H√†nh (CEO)  \n**Tu·ªïi**: 42  \n**Qu·ªëc t·ªãch**: Vi·ªát Nam  \n**Ng√¥n ng·ªØ**: Ti·∫øng Vi·ªát (b·∫£n ng·ªØ), Ti·∫øng H√†n (Th√†nh th·∫°o - TOPIK 6), Ti·∫øng Anh (Th√†nh th·∫°o - IELTS 8.0)  \n\n## Li√™n H·ªá\n\nüìû **ƒêi·ªán tho·∫°i**: 0901-111-222  \nüìß **Email**: lan.nguyen@koreastudyvn.com  \nüìß **Email c√° nh√¢n**: ceo@koreastudyvn.com  \nüè¢ **VƒÉn ph√≤ng**: T·∫ßng 12, Lotte Center, H√† N·ªôi  \n\n## H·ªçc V·∫•n\n\n### B·∫±ng C·∫•p Ch√≠nh\n- **2010**: Th·∫°c sƒ© Qu·∫£n tr·ªã Kinh doanh (MBA) - ƒê·∫°i h·ªçc Yonsei, Seoul, H√†n Qu·ªëc\n- **2005**: C·ª≠ nh√¢n Quan h·ªá Qu·ªëc t·∫ø - ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng, H√† N·ªôi, Vi·ªát Nam\n\n### Ch·ª©ng Ch·ªâ B·ªï Sung\n- **2018**: Ch·ª©ng ch·ªâ T∆∞ v·∫•n Gi√°o d·ª•c Qu·ªëc t·∫ø - ICEF (International Consultants for Education and Fairs)\n- **2016**: Ch·ª©ng ch·ªâ Qu·∫

In [11]:
get_relevant_context("who is nguyen thi la and introduce about yonsei")

[]

In [12]:
def add_context(message):
    """Add relevant context to message"""
    relevant_context = get_relevant_context(message)
    if relevant_context:
        message += "\n\nNh·ªØng th√¥ng tin sau c√≥ th·ªÉ h·ªØu √≠ch cho vi·ªác tr·∫£ l·ªùi c√¢u h·ªèi n√†y:\n\n"
        for relevant in relevant_context:
            message += relevant + "\n\n"
    return message

In [13]:
def chat(message, history):
    """Optimized chat function with better error handling"""
    try:
        messages = [{"role": "system", "content": system_message}] + history
        message = add_context(message)
        messages.append({"role": "user", "content": message})

        stream = openai.chat.completions.create(
            model=MODEL, 
            messages=messages, 
            stream=True,
            max_tokens=1000,  # Limit response length for faster generation
            temperature=0.7
        )

        response = ""
        for chunk in stream:
            if chunk.choices[0].delta.content:
                response += chunk.choices[0].delta.content
                yield response
    except Exception as e:
        yield f"Xin l·ªói, ƒë√£ c√≥ l·ªói x·∫£y ra: {str(e)}"

In [14]:
# Launch first version
print("Launching keyword-based RAG chatbot...")
view = gr.ChatInterface(chat, type="messages").launch()

Launching keyword-based RAG chatbot...
* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


# 2. RAG bigger idea with vector search - optimized version

In [15]:
# imports for langchain

from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [16]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob("knowledge-base/*")

#text_loader_kwargs = {'encoding': 'utf-8'}
# N·∫øu d√≤ng tr√™n kh√¥ng ho·∫°t ƒë·ªông, ng∆∞·ªùi d√πng Windows c√≥ th·ªÉ d√πng d√≤ng d∆∞·ªõi thay th·∫ø
text_loader_kwargs={'autodetect_encoding': True}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)

print("Total documents loaded:", len(documents))


Total documents loaded: 17


In [17]:
documents[0]

Document(metadata={'source': 'knowledge-base\\company\\about.md', 'doc_type': 'company'}, page_content='# V·ªÅ C√¥ng Ty - Korea Study Consultant Center\n\n## L·ªãch S·ª≠ Th√†nh L·∫≠p\n\nKorea Study Consultant Center ƒë∆∞·ª£c th√†nh l·∫≠p v√†o nƒÉm 2018 v·ªõi s·ª© m·ªánh k·∫øt n·ªëi c√°c b·∫°n tr·∫ª Vi·ªát Nam v·ªõi h·ªá th·ªëng gi√°o d·ª•c ch·∫•t l∆∞·ª£ng cao c·ªßa H√†n Qu·ªëc. ƒê∆∞·ª£c s√°ng l·∫≠p b·ªüi c√°c chuy√™n gia gi√°o d·ª•c c√≥ nhi·ªÅu nƒÉm kinh nghi·ªám t·∫°i H√†n Qu·ªëc, ch√∫ng t√¥i ƒë√£ h·ªó tr·ª£ h∆°n 2,000 h·ªçc sinh Vi·ªát Nam th·ª±c hi·ªán ∆∞·ªõc m∆° du h·ªçc t·∫°i x·ª© s·ªü kim chi.\n\n## T·∫ßm Nh√¨n & S·ª© M·ªánh\n\n### T·∫ßm Nh√¨n\nTr·ªü th√†nh trung t√¢m t∆∞ v·∫•n du h·ªçc H√†n Qu·ªëc h√†ng ƒë·∫ßu t·∫°i Vi·ªát Nam, mang ƒë·∫øn c∆° h·ªôi gi√°o d·ª•c t·ªët nh·∫•t cho th·∫ø h·ªá tr·∫ª Vi·ªát Nam.\n\n### S·ª© M·ªánh\n- Cung c·∫•p d·ªãch v·ª• t∆∞ v·∫•n du h·ªçc chuy√™n nghi·ªáp v√† uy t√≠n\n- H·ªó tr·ª£ to√†n di·ªán t·ª´ kh√¢u chu·∫©n b·ªã h·ªì s∆° ƒë·∫øn khi ƒë·ªãnh c∆∞

In [19]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,  # Slightly smaller chunks for better retrieval
    chunk_overlap=100,  # Reduced overlap for performance
    separators=["\n\n", "\n", ". ", " ", ""]  # Better separation
)

chunks = text_splitter.split_documents(documents)
print(f"Created {len(chunks)} chunks")

Created 155 chunks


In [38]:
chunks[9]

Document(metadata={'source': 'knowledge-base\\company\\overview.md', 'doc_type': 'company'}, page_content='## Tr∆∞·ªùng ƒê·ªëi T√°c Ch√≠nh\n\n### Top Universities\n- Seoul National University (ÏÑúÏö∏ÎåÄÌïôÍµê)\n- Yonsei University (Ïó∞ÏÑ∏ÎåÄÌïôÍµê)  \n- Korea University (Í≥†Î†§ÎåÄÌïôÍµê)\n- KAIST (ÌïúÍµ≠Í≥ºÌïôÍ∏∞Ïà†Ïõê)\n- POSTECH (Ìè¨Ìï≠Í≥µÍ≥ºÎåÄÌïôÍµê)\n\n### Private Universities\n- Hanyang University (ÌïúÏñëÎåÄÌïôÍµê)\n- Kyung Hee University (Í≤ΩÌù¨ÎåÄÌïôÍµê)\n- Ewha Womans University (Ïù¥ÌôîÏó¨ÏûêÎåÄÌïôÍµê)\n- Sogang University (ÏÑúÍ∞ïÎåÄÌïôÍµê)\n- Sungkyunkwan University (ÏÑ±Í∑†Í¥ÄÎåÄÌïôÍµê)\n\n### Language Institutes\n- Yonsei Korean Language Institute\n- Seoul National University LEI\n- Sogang Korean Language Program\n- Ewha Language Center\n\n## Khu V·ª±c Ph·ª•c V·ª•\n\n### Vi·ªát Nam\n- **Mi·ªÅn B·∫Øc**: H√† N·ªôi, H·∫£i Ph√≤ng, Nam ƒê·ªãnh, Th√°i B√¨nh\n- **Mi·ªÅn Trung**: ƒê√† N·∫µng, Hu·∫ø, Vinh, Quy Nhon\n- **Mi·ªÅn Nam**: TP.HCM, C·∫ßn Th∆°, V≈©ng T√†u, ƒê·ªìng Nai')

In [21]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"C√°c lo·∫°i t√†i li·ªáu ƒë√£ t√¨m th·∫•y: {', '.join(doc_types)}")

C√°c lo·∫°i t√†i li·ªáu ƒë√£ t√¨m th·∫•y: company, visas, schools, employees


In [22]:
for chunk in chunks:
    if 'Nguy·ªÖn Th·ªã Lan' in chunk.page_content:
        print(chunk)
        print("_________")

page_content='### 4. Hi·ªáu Qu·∫£
- R√∫t ng·∫Øn th·ªùi gian x·ª≠ l√Ω h·ªì s∆°
- Chi ph√≠ h·ª£p l√Ω, minh b·∫°ch
- Cam k·∫øt ho√†n ti·ªÅn n·∫øu kh√¥ng ƒë·∫≠u visa

## Th√†nh T·ª±u N·ªïi B·∫≠t

### S·ªë Li·ªáu Th·ªëng K√™
- **2,000+** h·ªçc sinh ƒë∆∞·ª£c h·ªó tr·ª£ th√†nh c√¥ng
- **98.5%** t·ª∑ l·ªá ƒë·∫≠u visa du h·ªçc
- **50+** tr∆∞·ªùng ƒë·∫°i h·ªçc ƒë·ªëi t√°c
- **95%** h·ªçc sinh h√†i l√≤ng v·ªõi d·ªãch v·ª•

### Gi·∫£i Th∆∞·ªüng
- **2022**: Trung t√¢m t∆∞ v·∫•n du h·ªçc uy t√≠n nh·∫•t Vi·ªát Nam
- **2021**: ƒê·∫°i l√Ω ch√≠nh th·ª©c xu·∫•t s·∫Øc c·ªßa ƒê·∫°i h·ªçc Yonsei
- **2020**: Top 3 c√¥ng ty t∆∞ v·∫•n du h·ªçc H√†n Qu·ªëc t·∫°i Vi·ªát Nam

## ƒê·ªôi Ng≈© L√£nh ƒê·∫°o

### Gi√°m ƒê·ªëc ƒêi·ªÅu H√†nh
**B√† Nguy·ªÖn Th·ªã Lan** - 15 nƒÉm kinh nghi·ªám trong lƒ©nh v·ª±c gi√°o d·ª•c qu·ªëc t·∫ø, t·ª´ng h·ªçc t·∫≠p v√† l√†m vi·ªác t·∫°i H√†n Qu·ªëc 8 nƒÉm.' metadata={'source': 'knowledge-base\\company\\about.md', 'doc_type': 'company'}
_________
page_content='### Th√†nh Vi√™n C·ªßa


In [23]:
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go

In [24]:
# ƒê∆∞a c√°c ƒëo·∫°n vƒÉn b·∫£n (chunks) v√†o Vector Store, li√™n k·∫øt m·ªói ƒëo·∫°n v·ªõi m·ªôt vector embedding

embeddings = OpenAIEmbeddings()

# N·∫øu b·∫°n mu·ªën s·ª≠ d·ª•ng embeddings mi·ªÖn ph√≠ t·ª´ HuggingFace (thay v√¨ OpenAI),
# h√£y thay d√≤ng embeddings = OpenAIEmbeddings()
# b·∫±ng:
# from langchain.embeddings import HuggingFaceEmbeddings
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [25]:
# ta s·∫Ω s·ª≠ d·ª•ng m√¥ h√¨nh chi ph√≠ th·∫•p
MODEL = "gpt-4o-mini"

# ƒê·∫∑t t√™n cho database vector (c√≥ th·ªÉ t√πy ch·ªçn)
db_name = "vector_db"

# Ki·ªÉm tra n·∫øu database Chroma ƒë√£ t·ªìn t·∫°i, th√¨ x√≥a collection ƒë·ªÉ kh·ªüi ƒë·ªông l·∫°i t·ª´ ƒë·∫ßu
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

In [26]:
# T·∫°o vector store b·∫±ng Chroma
vectorstore = Chroma.from_documents(
    documents=chunks,              # Danh s√°ch c√°c ƒëo·∫°n vƒÉn b·∫£n ƒë√£ chia nh·ªè
    embedding=embeddings,          # H√†m embedding (v√≠ d·ª•: OpenAI ho·∫∑c HuggingFace)
    persist_directory=db_name      # Th∆∞ m·ª•c l∆∞u tr·ªØ database
)
# Ki·ªÉm tra s·ªë l∆∞·ª£ng document ƒë√£ ƒë∆∞·ª£c l∆∞u v√†o vector store
print(f"Vectorstore created with {vectorstore._collection.count()} documents")


Vectorstore created with 155 documents


In [27]:
# L·∫•y ra b·ªô s∆∞u t·∫≠p vector t·ª´ vectorstore
collection = vectorstore._collection

# L·∫•y 1 embedding t·ª´ database
sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]

# Ki·ªÉm tra s·ªë chi·ªÅu (s·ªë ph·∫ßn t·ª≠ trong vector)
dimensions = len(sample_embedding)
print(f"The vectors have {dimensions:,} dimensions")

The vectors have 1,536 dimensions


In [28]:
sample_embedding

array([-0.00046667,  0.00158895,  0.01983302, ..., -0.00275115,
       -0.01760473,  0.00132589])

In [29]:
# L·∫•y to√†n b·ªô vector, t√†i li·ªáu v√† metadata t·ª´ collection
result = collection.get(include=['embeddings', 'documents', 'metadatas'])

# ƒê∆∞a embedding v√†o m·∫£ng numpy
vectors = np.array(result['embeddings'])

# L∆∞u l·∫°i vƒÉn b·∫£n
documents = result['documents']

# Tr√≠ch lo·∫°i t√†i li·ªáu t·ª´ metadata (gi·∫£ s·ª≠ c√≥ 'doc_type')
doc_types = [metadata['doc_type'] for metadata in result['metadatas']]

# G√°n m√†u s·∫Øc t√πy theo lo·∫°i t√†i li·ªáu
colors = [['blue', 'green', 'red', 'orange'][['company', 'employees', 'visas', 'schools'].index(t)] for t in doc_types]


In [30]:
# 2D dimension!
# Gi·∫£m s·ªë chi·ªÅu c·ªßa vector xu·ªëng 2D b·∫±ng t-SNE
# (T-distributed Stochastic Neighbor Embedding)

tsne = TSNE(n_components=2, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# T·∫°o bi·ªÉu ƒë·ªì scatter 2D
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Lo·∫°i: {t}<br>VƒÉn b·∫£n: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='Bi·ªÉu ƒë·ªì 2D Chroma Vector Store',
    scene=dict(xaxis_title='x', yaxis_title='y'),
    width=800,
    height=600,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show(renderer="browser")

In [31]:
from langchain.memory import ConversationBufferWindowMemory  
from langchain.chains import ConversationalRetrievalChain

In [32]:
# T·∫°o m√¥ h√¨nh Chat v·ªõi OpenAI
llm = ChatOpenAI(
    temperature=0.7, 
    model_name=MODEL,
)

# Thi·∫øt l·∫≠p b·ªô nh·ªõ h·ªôi tho·∫°i
memory = ConversationBufferWindowMemory(memory_key='chat_history', return_messages=True)

# T·∫°o retriever t·ª´ vector store (Chroma)
retriever = vectorstore.as_retriever()

# K·∫øt n·ªëi t·∫•t c·∫£ th√†nh m·ªôt chu·ªói h·ªôi tho·∫°i c√≥ kh·∫£ nƒÉng truy xu·∫•t (RAG pipeline)
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)



Please see the migration guide at: https://python.langchain.com/docs/versions/migrating_memory/



In [33]:
# Test query with performance monitoring
def test_query_performance():
    """Test query with timing"""
    query = "B·∫°n c√≥ th·ªÉ m√¥ t·∫£ ng·∫Øn g·ªçn v·ªÅ Korea study center kh√¥ng?"
    start_time = time.time()
    result = conversation_chain.invoke({"question": query})
    end_time = time.time()
    
    print(f"Query processed in {end_time - start_time:.2f} seconds")
    print("Answer:", result["answer"])
    if "source_documents" in result:
        print(f"Used {len(result['source_documents'])} source documents")

In [34]:
test_query_performance()

Query processed in 7.58 seconds
Answer: Korea Study Consultant Center (KSCC) l√† m·ªôt c√¥ng ty t∆∞ v·∫•n gi√°o d·ª•c qu·ªëc t·∫ø ƒë∆∞·ª£c th√†nh l·∫≠p v√†o nƒÉm 2018, v·ªõi s·ª© m·ªánh k·∫øt n·ªëi h·ªçc sinh Vi·ªát Nam v·ªõi h·ªá th·ªëng gi√°o d·ª•c ch·∫•t l∆∞·ª£ng cao c·ªßa H√†n Qu·ªëc. C√¥ng ty ƒë√£ h·ªó tr·ª£ h∆°n 2,000 h·ªçc sinh du h·ªçc t·∫°i H√†n Qu·ªëc, chuy√™n t∆∞ v·∫•n c√°c ch∆∞∆°ng tr√¨nh th·∫°c sƒ© v√† ti·∫øn sƒ©, ƒë·∫∑c bi·ªát trong c√°c ng√†nh STEM. KSCC c√≥ m·ªëi quan h·ªá ƒë·ªëi t√°c v·ªõi 68 tr∆∞·ªùng ƒë·∫°i h·ªçc v√† cam k·∫øt ƒë·∫£m b·∫£o ch·∫•t l∆∞·ª£ng t∆∞ v·∫•n ƒë·∫°t ti√™u chu·∫©n qu·ªëc t·∫ø. Tr·ª• s·ªü ch√≠nh ƒë·∫∑t t·∫°i H√† N·ªôi v√† c√≥ chi nh√°nh t·∫°i TP.HCM.


In [35]:
# set up a new conversation memory for the chat
memory = ConversationBufferWindowMemory(memory_key='chat_history', return_messages=True)

# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [36]:
# Wrapping in a function - note that history isn't used, as the memory is in the conversation_chain

def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [37]:
# And in Gradio:

view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


In [39]:
# H√£y c√πng t√¨m hi·ªÉu xem ƒëi·ªÅu g√¨ ƒë∆∞·ª£c g·ª≠i ph√≠a sau h·∫≠u tr∆∞·ªùng

from langchain_core.callbacks import StdOutCallbackHandler

llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

memory = ConversationBufferWindowMemory(memory_key='chat_history', return_messages=True)

retriever = vectorstore.as_retriever()

conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm, 
    retriever=retriever, 
    memory=memory, 
    callbacks=[StdOutCallbackHandler()]
)

query = "Nh√¢n vi√™n n√†o trong c√¥ng ty ƒë√£ t·ªët nghi·ªáp ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng?"
result = conversation_chain.invoke({"question": query})
answer = result["answer"]
print("\nAnswer:", answer)




[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
**Q: Job prospects sau t·ªët nghi·ªáp nh∆∞ th·∫ø n√†o?**
A: R·∫•t t·ªët, ƒë·∫∑c bi·ªát cho R&D positions. Many graduates become leaders trong tech industry.

### Khoa Y h·ªçc
- B√°c sƒ© ƒëa khoa (6 nƒÉm)
- Nha khoa
- Y h·ªçc d·ª± ph√≤ng

### Khoa Kinh doanh
- Qu·∫£n tr·ªã kinh doanh
- Kinh t·∫ø h·ªçc
- T√†i ch√≠nh ng√¢n h√†ng

### Khoa Nh√¢n vƒÉn
- Ng√¥n ng·ªØ v√† vƒÉn h·ªçc H√†n Qu·ªëc
- L·ªãch s·ª≠ H√†n Qu·ªëc
- Tri·∫øt h·ªçc ph∆∞∆°ng ƒê√¥ng

## Y√™u c·∫ßu tuy·ªÉn sinh

### Sinh vi√™n qu·ªëc t·∫ø (Undergraduate)
- **H·ªçc l·ª±c:** T·ªët nghi·ªáp THPT v·ªõi ƒëi·ªÉm trung b√¨nh t·ªëi thi·ªÉu 3.5/4.0
- **Ti·∫øn

In [63]:
# create a new Chat with OpenAI
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferWindowMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG; k is how many chunks to use
retriever = vectorstore.as_retriever(search_kwargs={"k": 30})

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm, 
    retriever=retriever, 
    memory=memory, 
    callbacks=[StdOutCallbackHandler()]
)

query = "Nh√¢n vi√™n n√†o trong c√¥ng ty ƒë√£ t·ªët nghi·ªáp ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng?"
result = conversation_chain.invoke({"question": query})
answer = result["answer"]
print("\nAnswer:", answer)




[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
**Q: Job prospects sau t·ªët nghi·ªáp nh∆∞ th·∫ø n√†o?**
A: R·∫•t t·ªët, ƒë·∫∑c bi·ªát cho R&D positions. Many graduates become leaders trong tech industry.

### Khoa Y h·ªçc
- B√°c sƒ© ƒëa khoa (6 nƒÉm)
- Nha khoa
- Y h·ªçc d·ª± ph√≤ng

### Khoa Kinh doanh
- Qu·∫£n tr·ªã kinh doanh
- Kinh t·∫ø h·ªçc
- T√†i ch√≠nh ng√¢n h√†ng

### Khoa Nh√¢n vƒÉn
- Ng√¥n ng·ªØ v√† vƒÉn h·ªçc H√†n Qu·ªëc
- L·ªãch s·ª≠ H√†n Qu·ªëc
- Tri·∫øt h·ªçc ph∆∞∆°ng ƒê√¥ng

## Y√™u c·∫ßu tuy·ªÉn sinh

### Sinh vi√™n qu·ªëc t·∫ø (Undergraduate)
- **H·ªçc l·ª±c:** T·ªët nghi·ªáp THPT v·ªõi ƒëi·ªÉm trung b√¨nh t·ªëi thi·ªÉu 3.5/4.0
- **Ti·∫øn

In [None]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]


In [None]:

view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.




[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: Nh√¢n vi√™n n√†o trong c√¥ng ty ƒë√£ t·ªët nghi·ªáp ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng?
Assistant: Trong c√¥ng ty, Sarah Park (Nguy·ªÖn Sarah) ƒë√£ t·ªët nghi·ªáp t·ª´ ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng H√† N·ªôi.
Follow Up Input: Nh√¢n vi√™n n√†o trong c√¥ng ty ƒë√£ t·ªët nghi·ªáp ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
**Q: Job pr

# 3 Improve RAG Ensemble Hybrid Retrieval.

In [66]:
from langchain.schema import BaseRetriever, Document
from langchain.retrievers import EnsembleRetriever
from typing import List, Dict

# 1. Fix keyword retriever to search content, not just title
class KeywordRetriever(BaseRetriever):
    context_dict: Dict[str, str]

    def get_relevant_documents(self, query: str) -> List[Document]:
        relevant_docs = []
        for title, content in self.context_dict.items():
            if any(kw in content.lower() for kw in query.lower().split()):
                relevant_docs.append(Document(page_content=content, metadata={"source": title}))
        return relevant_docs

    async def aget_relevant_documents(self, query: str) -> List[Document]:
        return self.get_relevant_documents(query)

# 2. Use keyword + properly configured vector retriever
keyword_retriever = KeywordRetriever(context_dict=context)
vector_retriever = vectorstore.as_retriever(
    search_kwargs={"k": 30}
)

# 3. Ensemble
hybrid_retriever = EnsembleRetriever(
    retrievers=[keyword_retriever, vector_retriever],
    weights=[0.5, 0.5]
)

# 4. Conversation chain
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=hybrid_retriever,
    memory=memory,
    callbacks=[StdOutCallbackHandler()]
)



Retrievers must implement abstract `_get_relevant_documents` method instead of `get_relevant_documents`


Retrievers must implement abstract `_aget_relevant_documents` method instead of `aget_relevant_documents`



In [88]:
query = "Nh√¢n vi√™n n√†o trong c√¥ng ty ƒë√£ t·ªët nghi·ªáp ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng?"
result = conversation_chain.invoke({"question": query})
answer = result["answer"]
print("\nAnswer:", answer)




[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: nh√¢n vi√™n n√†o t·ªët nghi·ªáp ƒë·∫°i h·ªçc ngo·∫°i th∆∞∆°ng
Assistant: Trong th√¥ng tin ƒë√£ cung c·∫•p, c√≥ m·ªôt nh√¢n vi√™n t·ªët nghi·ªáp t·ª´ ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng H√† N·ªôi, ƒë√≥ l√† Park Sarah (Nguy·ªÖn Sarah), v·ªõi b·∫±ng c·ª≠ nh√¢n Quan h·ªá Qu·ªëc t·∫ø.
Human: Nh√¢n vi√™n n√†o trong c√¥ng ty ƒë√£ t·ªët nghi·ªáp ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng?
Assistant: C√≥ th√¥ng tin v·ªÅ B√† Nguy·ªÖn Th·ªã Lan, Gi√°m ƒê·ªëc ƒêi·ªÅu H√†nh (CEO) c·ªßa Korea Study Consultant Center, ƒë√£ t·ªët nghi·ªáp C·ª≠ nh√¢n Quan h·ªá Qu·ªëc t·∫ø t·ª´ ƒê·∫°i h·ªçc Ngo·∫°i th∆∞∆°ng, H√† N·ªôi, Vi·ªát Nam.
Follow Up Input: Nh√¢n vi√™n n√†o trong c√¥ng ty ƒë√£ t·ªët nghi·ªáp ƒê·∫°i h·ªçc Ngo

In [40]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


In [None]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)