# Imports used in Project

In [None]:
!pip install -U transformers accelerate bitsandbytes peft psycopg2 datasets pyngrok
!pip install fastapi nest-asyncio uvicorn
import requests, torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

Collecting transformers
  Downloading transformers-4.50.1-py3-none-any.whl.metadata (39 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting peft
  Downloading peft-0.15.0-py3-none-any.whl.metadata (13 kB)
Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12

# Hugging Face Login


You need to sign up/in into your **huggingface** account, navigate to the settings and get your Token, copy it and enter it in the field that requests it below.

(put 'n' for git credentials)



In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineGrained).
The token `LlamaFineTuning` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `Ll

# NGROK configuration


You need to sign up/in into your **ngrok** account, navigate to the settings and get your authToken, copy it and add it in the last argument in the command below.

This let you make a public URL for later to use it when getting requests from the app into colab.


In [None]:
!ngrok config add-authtoken 2ujpzry78BaXmcT8CABSODlM1Rz_2abMWhNo2X22bZVmJf9pH

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


# Local Database Connection
This step is a bit complicated since you need the database configured, make the TCP into HTTP, then exposed using ngrok command line.

And this is where the RAG is taking the activities from.



In [None]:
pub_ip="https://f502-185-134-176-158.ngrok-free.app" #got from the local ngrok cmd tool
# ✅ Use your new ngrok URL
NGROK_POSTGREST_URL = pub_ip

# Retrieve Actvities Functions
We have two functions, one to get the activites as a String, and one as JSON cards so we can use them in the UI integration.

In [None]:
def retrieve_activities(category_id, location):
    query_url = f"{NGROK_POSTGREST_URL}/activities?category_id=eq.{category_id}&location=eq.{location}"

    response = requests.get(query_url)

    if response.status_code == 200:
        data = response.json()
        if len(data) == 0:
            return "No available activities found in Adventura's database."

        activities = [
            f"🔹 {item['name']} - {item['description']} (💰 Price: ${item['price']}, ⏳ Duration: {item['duration']} mins, 🏆 Seats: {item['nb_seats']}, ✅ Available: {'Yes' if item['availability_status'] else 'No'})"
            for item in data
        ]
        return "\n".join(activities)

    return "Error retrieving data."

In [None]:
def retrieve_activities_two(category_id, location):
    query_url = f"{NGROK_POSTGREST_URL}/activities?category_id=eq.{category_id}&location=eq.{location}&availability_status=is.true"

    response = requests.get(query_url)

    if response.status_code == 200:
        data = response.json()
        if len(data) == 0:
            return []

        cards = []
        for item in data:
            cards.append({
                "name": item.get('name'),
                "description": item.get('description'),
                "price": float(item.get('price')),
                "duration": int(item.get('duration')),
                "seats": int(item.get('nb_seats')),
                "location": item.get('location')
            })
        return cards  # ✅ returns LIST of dicts

    return []


In [None]:
# ✅ Test fetching activities as String
print(retrieve_activities(2, "Ehden"))
# ✅ Test fetching activities as Cards
print(retrieve_activities_two(2, "Ehden"))


🔹 Picnic Adventure 4 - Enjoy an exciting picnic experience with professional guides and breathtaking views. (💰 Price: $375.01, ⏳ Duration: 480 mins, 🏆 Seats: 8, ✅ Available: Yes)
🔹 Picnic Adventure 10 - Enjoy an exciting picnic experience with professional guides and breathtaking views. (💰 Price: $483.92, ⏳ Duration: 180 mins, 🏆 Seats: 29, ✅ Available: Yes)
[{'name': 'Picnic Adventure 4', 'description': 'Enjoy an exciting picnic experience with professional guides and breathtaking views.', 'price': 375.01, 'duration': 480, 'seats': 8, 'location': 'Ehden'}, {'name': 'Picnic Adventure 10', 'description': 'Enjoy an exciting picnic experience with professional guides and breathtaking views.', 'price': 483.92, 'duration': 180, 'seats': 29, 'location': 'Ehden'}]


# Llama Model Access
First time running this will give you an error because its a gated repo.

You need to accept the policy in the Hf website (from the link in the error) and you'll be good to go.

In [None]:
# ✅ Use an official or quantized LLaMA model (depending on your resources)
llama_model_name = "mistralai/Mistral-7B-Instruct-v0.2" # or use "meta-llama/Llama-2-7b-chat-hf" for older versions

# ✅ Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(llama_model_name)

# ✅ Load model directly to GPU
model = AutoModelForCausalLM.from_pretrained(
    llama_model_name,
    device_map="cuda",
    torch_dtype=torch.float16  # FP16 for efficiency
)

# ✅ Define generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

print("✅ LLaMA model loaded successfully on GPU!")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Device set to use cuda


✅ LLaMA model loaded successfully on GPU!


# Generate RAG Response Function
This is the core of the project. The function retrieves the cards, applies custom prompt to tell how the model should respond, handles if no activities are available, and finally pipes the prompt with the defined hyperparameters that we can tune later.

And then it extracts only the needed output from the chatbot which is after the [/INST] tag.

In [None]:
def generate_rag_response_two(user_query, category, location):
    # 🔹 Retrieve JSON cards
    cards = retrieve_activities_two(category, location)

    if not cards:
        retrieved_data = "No available activities were found in Adventura's database."
    else:
        # Format activities block for LLaMA prompt
        retrieved_data = ""
        for idx, card in enumerate(cards[:3]):
            retrieved_data += f"\n{idx+1}. {card['name']} ({card['location']}): {card['description']}, Price: ${card['price']}, Duration: {card['duration']} mins, Seats: {card['seats']}"

    # 🔹 Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

    # Define the messages
    messages = [
        {"role": "user", "content": f"You are Adventura's helpful assistant. Recommend activities based on the user query and database info below.\n\n**User Query:**\n{user_query}\n\n**Available Activities:**\n{retrieved_data}\n\nPlease reply in a fun short way and make it exciting. Greet the user, and naturally recommend up to 4 activities."}
    ]

    # Apply the chat template
    prompt = tokenizer.apply_chat_template(messages, tokenize=False)

    # 🔹 LLaMA generation
    raw_response = pipe(
        prompt,
        max_new_tokens=700,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        top_k=50,
        eos_token_id=tokenizer.eos_token_id
    )[0]['generated_text']

    # 🔹 Extract clean chatbot reply
    def extract_reply_after_inst(generated_text):
        closing_tag = "[/INST]"
        tag_position = generated_text.find(closing_tag)
        if tag_position != -1:
            return generated_text[tag_position + len(closing_tag):].strip()
        else:
            return generated_text.strip()

    chatbot_reply = extract_reply_after_inst(raw_response)

    # 🔹 Final payload
    return {
        "chatbot_reply": chatbot_reply,
        "cards": cards  # Already a list of dicts (clean JSON)
    }


In [None]:
# Testing RAG generation
query = "i wanna make a picnic in ehden can you help me with it ?"
location = "Ehden"
cat_id = 2
response = generate_rag_response_two(query, cat_id, location)
response

NameError: name 'retrieve_activities_two' is not defined

# Context Detector Function
This function detects if the user is entering out-of-context prompts and replies in a polite way, if it is a in-context prompt, it transform the prompt to the RAG function to generate recommendations.

In [None]:
class ContextDetector:
    def __init__(self):
        self.travel_keywords = {
    # Core Adventura concepts
    'activity', 'activities', 'event', 'events', 'booking', 'trip', 'plan', 'schedule',
    # Adventure types
    'picnic', 'hike', 'hiking', 'tour', 'sunset', 'sea trip', 'boat', 'jetski', 'festival', 'car event',
    'paragliding', 'camping', 'getaway', 'scenery', 'nature', 'mountain', 'beach', 'lake', 'snow', 'skiing',
    # Experience-related
    'recommend', 'suggest', 'idea', 'things', 'explore', 'discover', 'experience', 'fun', 'outdoor',
    # Locations or vibe
    'ehden', 'tripoli', 'batroun', 'lebanon', 'coast', 'forest', 'adventure', 'culture', 'local',

}


        # Optional: Log non-contextual queries for analysis
        self.non_travel_logs = []

    def is_travel_related(self, query):
        """Fuzzy check: Does the query include any travel keywords (substring)?"""
        query_lower = query.lower()
        return any(keyword in query_lower for keyword in self.travel_keywords)

    def get_confidence_score(self, query):
        """Optional confidence score using word overlap"""
        query_words = set(query.lower().split())
        matches = query_words & self.travel_keywords
        return len(matches) / max(len(query_words), 1)

    def get_non_context_response(self):
        """Friendly fallback response for off-topic queries"""
        return (
            "Hi there! 😊 I'm EVA Adventura's chatbot, built to help you plan your next adventure.\n\n"
            "Here's what I can help you with:\n"
            "🌍 Destination ideas all in Lebanon\n🗺️ Itineraries\n🎟️ Booking support\n🎒 Recommendations for where to go\n\n"
            "Just ask me anything travel-related — let's plan something amazing together! 🌟"
        )

    def log_unrelated_query(self, query):
        self.non_travel_logs.append(query)


# 🔹 Initialize globally
context_detector = ContextDetector()


def handle_user_query(user_query, category=None, location=None):
    """
    Entry point for all incoming queries.
    If the query is travel-related, pass to the RAG system.
    Otherwise, show a helpful fallback message.
    """
    if not context_detector.is_travel_related(user_query):
        context_detector.log_unrelated_query(user_query)
        return {
            "chatbot_reply": context_detector.get_non_context_response(),
            "cards": []
        }

    # Optionally: you can use confidence score here too
    # confidence = context_detector.get_confidence_score(user_query)
    # if confidence < 0.2:
    #     return {
    #         "chatbot_reply": context_detector.get_non_context_response(),
    #         "cards": []
    #     }

    # Travel context confirmed – proceed to RAG
    return generate_rag_response_two(user_query, category, location)


In [None]:
print("TRAVEL TEST:")
print(handle_user_query("ay picnic hayla b batroun?", category=2, location="Ehden"))

print("\nNON-TRAVEL TEST:")
print(handle_user_query("What is quantum entanglement?"))


TRAVEL TEST:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'chatbot_reply': "Hello there, Hayla from Batroun! I'm Adventura's helpful assistant and I'd be more than happy to help you plan an unforgettable adventure. Based on your query, it sounds like you're looking for a picnic experience. Well, you're in luck! We have not one, not two, but four fantastic picnic adventures for you to choose from.\n\nFirst up, let me introduce you to Picnic Adventure 4 in Ehden. Imagine spreading out a cozy blanket under the shade of towering trees, while professional guides take care of the rest. And let's not forget the breathtaking views that come with it! This picnic adventure is priced at $375.01 for a group of eight friends. It lasts for 8 hours, giving you plenty of time to relax and soak in the beauty of nature.\n\nNext, we have Picnic Adventure 10, also in Ehden. This picnic adventure is perfect for larger groups, accommodating up to 29 people. With professional guides, delicious food, and breathtaking views, this picnic is sure to be an adventure to

# FASTAPI/NGROK Configuration
We're using this to receive, process, and send the generated reply from the sent prompt by the user in the application.

In [None]:
from fastapi.middleware.cors import CORSMiddleware
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
import nest_asyncio
from pyngrok import ngrok
import uvicorn

app = FastAPI()
# ✅ Allow CORS from anywhere (for now)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # For dev only
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
nest_asyncio.apply()
public_url = ngrok.connect(8000)
print(f"🔗 Public URL: {public_url}")



🔗 Public URL: NgrokTunnel: "https://22fa-34-125-90-189.ngrok-free.app" -> "http://localhost:8000"


In [None]:
@app.post("/chat")
async def chat_endpoint(request: Request):
    body = await request.json()
    user_query = body.get("query", "")
    category = 1
    location =  "Tripoli"

    # Use the new handler instead of direct RAG call
    response = handle_user_query(user_query, category, location)
    return JSONResponse(content=response)

# ✅ Directly launch FastAPI inside Colab
uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [305]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     94.187.0.97:0 - "OPTIONS /chat HTTP/1.1" 200 OK
INFO:     94.187.0.97:0 - "POST /chat HTTP/1.1" 200 OK


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


INFO:     94.187.0.97:0 - "POST /chat HTTP/1.1" 200 OK


In [None]:
from fastapi.middleware.cors import CORSMiddleware
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
import nest_asyncio
from pyngrok import ngrok
import uvicorn

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

nest_asyncio.apply()
public_url = ngrok.connect(8000)
print(f"🔗 Public URL: {public_url}")

@app.post("/chat")
async def chat_endpoint(request: Request):
    body = await request.json()
    user_query = body.get("query", "")
    category = body.get("category", 1)
    location = body.get("location", "Tripoli")

    print(f"📨 Query received: {user_query}")
    response = handle_user_query(user_query, category, location)
    return JSONResponse(content=response)

uvicorn.run(app, host="0.0.0.0", port=8000)
