In [7]:
from langchain_community.document_loaders import (
    PyPDFLoader,
    Docx2txtLoader,
    CSVLoader,
    TextLoader
)
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from typing import List
import os

def load_docs(folder_path: str) -> List[Document]:
    documents = []

    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)

        if filename.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
        elif filename.endswith(".csv"):
            loader = CSVLoader(file_path)
        elif filename.endswith(".docx"):
            loader = Docx2txtLoader(file_path)
        elif filename.endswith(".txt"):
            loader = TextLoader(file_path)
        else:
            print(f"[SKIP] Unsupported file type: {filename}")
            continue

        try:
            docs = loader.load()
            documents.extend(docs)
        except Exception as e:
            print(f"[ERROR] Failed to load {filename}: {e}")

    return documents


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap= 200,
    length_function = len
)
#load documents from a folder
folder_path ="./data"
documents = load_docs(folder_path)
print(f"Loaded {len(documents)} documents from the folder")
splits = text_splitter.split_documents(documents)
print(f" Split the documents into {len(splits)} chunks.")

Loaded 694 documents from the folder
 Split the documents into 2533 chunks.


In [8]:


splits[1000]
documents[400]

Document(metadata={'producer': 'ReportLab PDF Library - www.reportlab.com', 'creator': '(unspecified)', 'creationdate': '2025-09-03T21:24:38+00:00', 'author': '(anonymous)', 'keywords': '', 'moddate': '2025-09-03T21:24:38+00:00', 'subject': '(unspecified)', 'title': '(anonymous)', 'trapped': '/False', 'source': './data\\PDF3_Regional_Travel_Guides.pdf', 'total_pages': 40, 'page': 0, 'page_label': '1'}, page_content='Ethiopia’s Regional Travel Guides\nComprehensive Guides to Oromia, Amhara, Tigray, SNNPR, Afar, Somali, and Other\nRegions')

In [9]:
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings

embedding_function = SentenceTransformerEmbeddings( model_name = "all-MiniLM-L6-v2")
document_embeddings = embedding_function.embed_documents([split.page_content for split in splits])
document_embeddings[0]

  embedding_function = SentenceTransformerEmbeddings( model_name = "all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


[0.07520188391208649,
 0.06491214781999588,
 -0.007394856307655573,
 0.053554847836494446,
 -0.09985208511352539,
 -0.025790296494960785,
 -0.02081574872136116,
 -0.007259671576321125,
 -0.028843946754932404,
 0.052356455475091934,
 -0.02050691284239292,
 -0.10955535620450974,
 -0.06424570828676224,
 -0.06840629130601883,
 -0.006619225721806288,
 -0.021808423101902008,
 0.04156022518873215,
 -0.022118091583251953,
 0.01570218987762928,
 -0.13649344444274902,
 0.02337038703262806,
 -0.04741043597459793,
 0.0369960181415081,
 -0.07387041300535202,
 -0.04397568479180336,
 0.03561662510037422,
 -0.034159593284130096,
 -0.04017413780093193,
 0.02786516584455967,
 -0.03370138630270958,
 0.031002232804894447,
 0.05550430342555046,
 0.026123346760869026,
 0.02056647464632988,
 -0.009914271533489227,
 0.10292069613933563,
 0.00021525243937503546,
 -0.09381600469350815,
 0.05829606205224991,
 0.021001646295189857,
 0.10970146209001541,
 0.04543835669755936,
 0.045199014246463776,
 -0.11325206607

In [10]:
from langchain_community.vectorstores import Chroma

embedding_function =  SentenceTransformerEmbeddings()
collection_name ="my_collection"
vectorestore =Chroma.from_documents(collection_name = collection_name, documents = splits, embedding = embedding_function, persist_directory="./chroma_db")

print("Vector store created and persisted to './chroma_db' ")

  embedding_function =  SentenceTransformerEmbeddings()


Vector store created and persisted to './chroma_db' 


In [11]:
query = "list 2 turism attraction places in ethiopia"
search_results = vectorestore.similarity_search(query, k=3)


print(f"\nTop 2 most relevant chunks for the query: '{query} \n")
for i, result in enumerate(search_results, 1):
    print(f"Result {i}:")
    print(f"Source: {result.metadata.get('source', 'Unknown')}")
    print(f"Content: {result.page_content}")
    print()


Top 2 most relevant chunks for the query: 'list 2 turism attraction places in ethiopia 

Result 1:
Source: ./data\Ethiopia_100plus_Places_To_Visit.pdf
Content: 100+ Places to Visit in Ethiopia
A Comprehensive Guide with Locations, History, and Descriptions

Result 2:
Source: ./data\Ethiopia_100plus_Places_To_Visit.pdf
Content: 100+ Places to Visit in Ethiopia
A Comprehensive Guide with Locations, History, and Descriptions

Result 3:
Source: ./data\Ethiopia_100plus_Tourism_Destinations.pdf
Content: Field
Value
Top 3 Nearby Attractions
Gondar Fasil Ghebbi, Gorgora (North Lake Tana), Narga Selassie Monastery (Tana)
Estimated Hotel Price Range
1200–6000  ($9.6–$48.0)
Transportation Options
Bus (Selam/Sky Bus), Flight (Ethiopian Airlines where applicable), Car rental with driver
Estimated Transportation Cost from Addis
Bus return ≈ 2028; Flight one-way ≈ 3500; Car/day ≈ 8000  |  Bus return ≈ $16.22; Flight one-way ≈ $28.0; Car/day ≈ $64.0
Entry Fees or Permits
200 ETB
Recommended Duration 

In [12]:
retriever = vectorestore.as_retriever(search_kwargs ={"k": 3})
retriever.invoke("tell me ethiopia's tourism and give me a Trip plan in ethiopia for 1 day?")

[Document(metadata={'creationdate': '2025-09-03T21:24:38+00:00', 'author': '(anonymous)', 'source': './data\\PDF3_Regional_Travel_Guides.pdf', 'trapped': '/False', 'producer': 'ReportLab PDF Library - www.reportlab.com', 'keywords': '', 'total_pages': 40, 'page_label': '40', 'moddate': '2025-09-03T21:24:38+00:00', 'creator': '(unspecified)', 'page': 39, 'title': '(anonymous)', 'subject': '(unspecified)'}, page_content='travel guides highlight the breadth of Ethiopia’s experiences, allowing visitors to tailor\njourneys across diverse landscapes and traditions... This section expands with in-depth\ntravel recommendations, cultural insights, transportation advice, and practical safety tips\nfor travelers. This section expands with in-depth travel recommendations, cultural insights,\ntransportation advice, and practical safety tips for travelers. This section expands with\nin-depth travel recommendations, cultural insights, transportation advice, and practical\nsafety tips for travelers. T

In [13]:
from langchain_core.prompts import ChatPromptTemplate
template = """Answer the question based on the following context: 
{context}
Answer: """
prompt = ChatPromptTemplate.from_template(template)

In [14]:
def docs2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [15]:
import os
os.environ["HF_HUB_DISABLE_XET"] = "1"

In [19]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(
    base_url="http://localhost:11434/v1",
    api_key="ollama",
    model_name="qwen:0.5b"  # or 
)

In [27]:
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain =(
    {"context": retriever | docs2str, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser()
)
question = "can you tell me about semin mountains in ethiopia ?"
response = rag_chain.invoke(question)
print(response)


The term "ethiopian highlands" refers to a region of high altitude in Africa that is distinct from other regions of high altitude in Africa.

The term "ethiopian highlands" is used to refer to a specific area or region of high altitude in Africa. For example, the term "ethiopian highlands" is used to refer to the African High Altitude Region (AHAAR) located at an elevation of over 10,000 feet (32,800 miles)).


In [28]:
from langchain_core.messages import HumanMessage, AIMessage

chat_history = []
chat_history.extend([
    HumanMessage(content = question),
    AIMessage(content = response)
])

In [42]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

# 🧠 System prompt
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. DO NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

# 🧠 Prompt template
contextualize_q_prompt = ChatPromptTemplate.from_messages([
    ("system", contextualize_q_system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}")
])

# 🔗 Chain
contextualize_chain = contextualize_q_prompt | llm | StrOutputParser()



question = "List 3 hotels in Ethiopia?"

response = contextualize_chain.invoke({
    "input": question,
    "chat_history": chat_history
})

print(response)

1. The Al-Asalam Hotel
2. The Mekawem Hotel
3. The Kassimwa Hotel


In [None]:
from langchain.chains import create_history_aware_retriever

# Use the LLM directly, not piped
history_aware_retriever = create_history_aware_retriever(
    llm,                     # Your Ollama-powered LLM
    retriever,                # Your Chroma retriever
    contextualize_q_prompt    # The prompt that reformulates questions
)

# Run a query with chat history
response = history_aware_retriever.invoke({
    "input": "List two places to visit?",
    "chat_history": chat_history
})

print(response)

[Document(metadata={'keywords': '', 'page': 0, 'source': './data\\Ethiopia_100plus_Places_To_Visit.pdf', 'producer': 'ReportLab PDF Library - www.reportlab.com', 'subject': '(unspecified)', 'author': '(anonymous)', 'trapped': '/False', 'creator': '(unspecified)', 'creationdate': '2025-09-03T21:47:24+00:00', 'page_label': '1', 'total_pages': 20, 'moddate': '2025-09-03T21:47:24+00:00', 'title': '(anonymous)'}, page_content='100+ Places to Visit in Ethiopia\nA Comprehensive Guide with Locations, History, and Descriptions'), Document(metadata={'page': 0, 'creationdate': '2025-09-03T21:47:24+00:00', 'producer': 'ReportLab PDF Library - www.reportlab.com', 'source': './data\\Ethiopia_100plus_Places_To_Visit.pdf', 'title': '(anonymous)', 'author': '(anonymous)', 'moddate': '2025-09-03T21:47:24+00:00', 'page_label': '1', 'creator': '(unspecified)', 'subject': '(unspecified)', 'trapped': '/False', 'total_pages': 20, 'keywords': ''}, page_content='100+ Places to Visit in Ethiopia\nA Comprehensiv

In [46]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "you are a helpful AI assistant. Use the following context to answer the user's question. "),
    ("system", "Context: {context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain) 

rag_chain.invoke({"input": "list 4 places to vist", "chat_history": chat_history})

{'input': 'list 4 places to vist',
 'chat_history': [{'role': 'user',
   'content': "I'm planning a trip to Ethiopia."},
  {'role': 'assistant',
   'content': 'Great! Ethiopia has amazing destinations like Lalibela and the Simien Mountains.'}],
 'context': [Document(metadata={'page_label': '12', 'moddate': '2025-09-03T21:22:59+00:00', 'producer': 'ReportLab PDF Library - www.reportlab.com', 'creator': '(unspecified)', 'source': './data\\PDF1_Ethiopia_Natural_Wonders.pdf', 'subject': '(unspecified)', 'title': '(anonymous)', 'creationdate': '2025-09-03T21:22:59+00:00', 'total_pages': 49, 'author': '(anonymous)', 'trapped': '/False', 'page': 11, 'keywords': ''}, page_content='ecological importance, and visitor perspectives. This section elaborates in detail on the\nthemes introduced, weaving together historical background, scientific explanation,\necological importance, and visitor perspectives. This section elaborates in detail on the\nthemes introduced, weaving together historical backg

In [62]:
import sqlite3
from datetime import datetime

DB_NAME = "rag_app.db"

def get_db_connection():
    conn = sqlite3.connect(DB_NAME)
    conn.row_factory = sqlite3.Row
    return conn

def create_application_logs(): 
    conn = get_db_connection()
    conn.execute(''' 
        CREATE TABLE IF NOT EXISTS application_logs (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            session_id TEXT,
            user_query TEXT,
            gpt_response TEXT,
            model TEXT,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')    
    conn.commit()
    conn.close()

def insert_application_logs(session_id, user_query, gpt_response, model): 
    conn = get_db_connection()
    conn.execute(
        'INSERT INTO application_logs (session_id, user_query, gpt_response, model) VALUES (?, ?, ?, ?)',
        (session_id, user_query, gpt_response, model)
    )
    conn.commit()
    conn.close()

def get_chat_history(session_id):
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute(
        'SELECT user_query, gpt_response FROM application_logs WHERE session_id = ? ORDER BY created_at',
        (session_id,)
    )
    messages = []
    for row in cursor.fetchall():
        messages.extend([
            {"role": "human", "content": row["user_query"]},
            {"role": "ai", "content": row["gpt_response"]}
        ])
    conn.close()
    return messages

# Initialize the table
create_application_logs()



In [65]:
import uuid
session_id = str(uuid.uuid4())
chat_history = get_chat_history(session_id)
print(chat_history)
question1 = "suggest 2 places near addis ababa?"  
answer1 = rag_chain.invoke({"input": question1, "chat_history": chat_history})['answer']
insert_application_logs(session_id, question1, answer1, "qwen:0.5b")

print(f"Human: {question1}")
print(f"AI:  {answer1}\n {session_id}")

[]
Human: suggest 2 places near addis ababa?
AI:  Option 1: Addis Ababa City Park and Museum.
Option 2: Addis Ababa Selva Gorge National Park and Museum.
 0ffe4ba8-394c-49b4-839b-ea646e85f2f3


In [67]:
import uuid
session_id = "0ffe4ba8-394c-49b4-839b-ea646e85f2f3"
chat_history = get_chat_history(session_id)
print(chat_history)
question1 = "is there any hotel list three hotels and their estimated price?"  
answer1 = rag_chain.invoke({"input": question1, "chat_history": chat_history})['answer']
insert_application_logs(session_id, question1, answer1, "qwen:0.5b")

print(f"Human: {question1}")
print(f"AI:  {answer1}\n")

[{'role': 'human', 'content': 'suggest 2 places near addis ababa?'}, {'role': 'ai', 'content': 'Option 1: Addis Ababa City Park and Museum.\nOption 2: Addis Ababa Selva Gorge National Park and Museum.'}, {'role': 'human', 'content': 'is there any hotel list three hotels?'}, {'role': 'ai', 'content': 'Yes, here is a list of hotels in Ethiopia:\n\n- Central Hotel Addis Ababa (CHA)\n- Addis Ababa City Park and Museum\n- The Grand Hotel Addis Ababa\n\nPlease note that this list may not be up-to-date with the latest information about Ethiopian hotels.'}]
Human: is there any hotel list three hotels and their estimated price?
AI:  Yes, here is a list of hotels in Ethiopia: 

- Central Hotel Addis Ababa  (CHA) 
- Addis Ababa City Park and Museum 
- The Grand Hotel Addis Ababa 


Please note that this list may not be up- to-Date with the latest information about Ethiopian hotels.



In [68]:
import uuid
session_id = "0ffe4ba8-394c-49b4-839b-ea646e85f2f3"
chat_history = get_chat_history(session_id)
print(chat_history)
question1 = "how much for one night?"  
answer1 = rag_chain.invoke({"input": question1, "chat_history": chat_history})['answer']
insert_application_logs(session_id, question1, answer1, "qwen:0.5b")

print(f"Human: {question1}")
print(f"AI:  {answer1}\n")

[{'role': 'human', 'content': 'suggest 2 places near addis ababa?'}, {'role': 'ai', 'content': 'Option 1: Addis Ababa City Park and Museum.\nOption 2: Addis Ababa Selva Gorge National Park and Museum.'}, {'role': 'human', 'content': 'is there any hotel list three hotels?'}, {'role': 'ai', 'content': 'Yes, here is a list of hotels in Ethiopia:\n\n- Central Hotel Addis Ababa (CHA)\n- Addis Ababa City Park and Museum\n- The Grand Hotel Addis Ababa\n\nPlease note that this list may not be up-to-date with the latest information about Ethiopian hotels.'}, {'role': 'human', 'content': 'is there any hotel list three hotels and their estimated price?'}, {'role': 'ai', 'content': 'Yes, here is a list of hotels in Ethiopia: \n\n- Central Hotel Addis Ababa  (CHA) \n- Addis Ababa City Park and Museum \n- The Grand Hotel Addis Ababa \n\n\nPlease note that this list may not be up- to-Date with the latest information about Ethiopian hotels.'}]
Human: how much for one night?
AI:  The estimated price pe