In [None]:
# Check if GPU is available, if not you may need to change a runtime with GPU
!nvidia-smi

import sys
print(sys.executable)


Wed Nov 26 10:33:35 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 581.80                 Driver Version: 581.80         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 2070      WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   40C    P0             55W /  175W |     821MiB /   8192MiB |      1%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [2]:
# Imports
from huggingface_hub import login
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from datasets import load_dataset
import pandas as pd
import tqdm
from sentence_transformers import SentenceTransformer
import faiss
import gradio as gr
import time
import statistics

print(torch.cuda.is_available()) 
print(torch.__version__)     
print(torch.version.cuda)     
print(torch.backends.cudnn.version())  
print(torch.cuda.is_available())       
print(torch.cuda.get_device_name(0))   


True
2.5.1+cu121
12.1
90100
True
NVIDIA GeForce RTX 2070


In [4]:
# Load the model

model_name = "mistralai/Mistral-7B-Instruct-v0.2"  
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16"
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"   # GPU
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
model
llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512, #512
    temperature=0.5
)

Device set to use cuda:0


In [6]:
# Dataset preprocessing and conversion to RAG

df = pd.read_csv("Travel details dataset.csv")
df['Traveler age'] = df['Traveler age'].fillna(0).astype(int)
df['Duration (days)'] = df['Duration (days)'].fillna(0).astype(int)
df['Accommodation cost'] = df['Accommodation cost'].astype(str)
df['Transportation cost'] = df['Transportation cost'].astype(str)
df['Traveler gender'] = df['Traveler gender'].str.lower()


def row_to_sentence(row):
    return (
        f"({row['Traveler gender']}, {row['Traveler age']} years old, " #{row['Traveler name']} removed
        f"{row['Traveler nationality']}) went to {row['Destination']} from {row['Start date']} to {row['End date']} "
        f"({row['Duration (days)']} days). They stayed at a {row['Accommodation type']} costing ${row['Accommodation cost']}, "
        f"and used {row['Transportation type']} costing ${row['Transportation cost']}."
    )

df['RowString'] = df.apply(row_to_sentence, axis=1)
df_mistral = df['RowString']

print(df_mistral)



# Generate embeddings + FAISS index
embedder = SentenceTransformer("all-MiniLM-L6-v2")

texts = df['RowString'].tolist()
embeddings = embedder.encode(texts, convert_to_numpy=True, show_progress_bar=True).astype("float32")

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension) 
index.add(embeddings)
print(f"Indexed {index.ntotal} rows")

0      (male, 35 years old, American) went to London,...
1      (female, 28 years old, Canadian) went to Phuke...
2      (male, 45 years old, Korean) went to Bali, Ind...
3      (female, 29 years old, British) went to New Yo...
4      (female, 26 years old, Vietnamese) went to Tok...
                             ...                        
134    (male, 37 years old, Brazilian) went to Rio de...
135    (female, 29 years old, Canadian) went to Vanco...
136    (male, 34 years old, Chinese) went to Bangkok,...
137    (female, 25 years old, Spanish) went to Barcel...
138    (male, 39 years old, New Zealander) went to Au...
Name: RowString, Length: 139, dtype: object


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Indexed 139 rows


In [7]:
# Retrieval function and similarity comparison

def retrieve(query, top_k, threshold):
    query_emb = embedder.encode([query], convert_to_numpy=True).astype("float32")
    distances, indices = index.search(query_emb, top_k)

    # Convert L2 to pseudo-similarity
    similarities = 1 / (1 + distances[0])

    # Compute max similarity
    max_similarity = float(similarities.max())

    # Only include rows above threshold
    relevant_texts = [texts[i] for i, sim in zip(indices[0], similarities) if sim >= threshold]

    # Return only if threshold satisfied
    return relevant_texts, max_similarity

# Generation based on RAG similarity

def generate_answer(hist, query, top_k=5, threshold=0.43):
    context_rows, max_similarity = retrieve(query, top_k, threshold)

    if context_rows and max_similarity >= threshold:
        print(f"Using RAG (similarity={max_similarity:.2f} ≥ {threshold})")
        #  RAG prompt
        context = "\n".join(context_rows)
        prompt = f"""
        Instructions:
        You are a friendly and knowledgeable travel assistant that helps users plan short trips and vacations. 
        The "Context" section below includes a dataset of real travelers, their preferences, destinations, and trip details. 
        Use this data as a general guide to infer travel patterns and make recommendations.
        Do NOT make bullet points.
        When suggesting locations or activities:
        - Prefer destinations that are geographically close to the traveler’s starting location, unless they explicitly request otherwise.
        - Keep your responses brief (1–3 sentences maximum).
        - Speak conversationally — no lists or bullet points.
        - Use confident, friendly language.
        - If the dataset doesn’t provide relevant information, use your general travel knowledge to give useful advice.
        - Only mention gender or age of the user if they have already been mentioned.
        - Try to ask for prices/cost of the user and match it with the Context.
        - Do NOT mention the user previous travels and/or preferences.

        Always maintain privacy:
        - Never mention “the dataset” explicitly in your reply.
        - You may generalize patterns (e.g., “many travelers from Germany enjoy city breaks in Italy”), but keep it natural.

        If the user’s query is ambiguous, ask one short clarifying question instead of making assumptions.

        Context:
        {context}

        Chat History: {hist}

        User: {query}
        Assistant: """
        
    else:
        print(f"Skipping RAG (similarity={max_similarity:.2f} < {threshold})")
        prompt = f"""
        Instructions:
        You are a friendly travel assistant. Only provide travel suggestions if the user asks a question about a trip. 
        If the user just says 'Hi', 'Hello', or a greeting, respond with a friendly greeting and a short clarifying question about their travel preferences. 
        If the user does not specify his location and travel needs, ask for further clarification.
        Do NOT invent travel suggestions for greetings.
        
        Chat History: {hist}

        User: {query}
        Assistant: """

    output = llm(prompt)[0]["generated_text"]
    print(hist)
    out = output.split("Assistant:") # Split on assistant to keep what we need
    return out[-1]




In [None]:
# Simple Geo Tests 
hist = ""

queries = [
    "Hi!",
    "I'm a 41 year old female traveler living in Great Britain. I want to go for about a week that's not far away, what would fit?",
    "I'm a 41 year old female traveler living in Germany. I want to go for about a week that's not far away, what would fit?",
    "Im 18 and want to have fun someplace warm!, im looking for someplace cheap what can you give me",
    "I'm a 41 year old female traveler living in China. I want to go for about a week that's not far away, what would fit?",
    "I'm a 41 year old female traveler living in Moscow. I want to go for about a week that's not far away, what would fit?"
]

for q in queries:
    print("Q:", q)
    answer = generate_answer(hist, q)
    hist = ""
    print("A:", answer)
    print("------")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Q: Hi!
Skipping RAG (similarity=0.36 < 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



A:   Hello! I'd be happy to help you plan your next trip. Could you please tell me where you'd like to go and what kind of travel experience you're looking for? (Adventure, relaxation, culture, etc.)
------
Q: I'm a 41 year old female traveler living in Great Britain. I want to go for about a week that's not far away, what would fit?
Using RAG (similarity=0.48 ≥ 0.43)


In [None]:
# Chat test
def chatWindow(query, history):
    
    hist = ""
    for x in history:
        hist = hist + x['role'].title() + ": " + x['content'] + "\n"
        

    answer = generate_answer(hist, query)
    #hist = hist + "\nUser: " + query + "\nAssistant: " + answer

    return answer

demo = gr.ChatInterface(fn=chatWindow, type="messages", title="Travel Bot")
demo.launch()



* Running on local URL:  http://127.0.0.1:7872
* To create a public link, set `share=True` in `launch()`.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Skipping RAG (similarity=0.36 < 0.43)



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Using RAG (similarity=0.44 ≥ 0.43)
User: Hi!
Assistant: Hello! Where would you like to go for your next trip and what type of travel are you looking for? (Adventure, Relaxation, Cultural exploration, etc.)



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Using RAG (similarity=0.62 ≥ 0.43)
User: Hi!
Assistant: Hello! Where would you like to go for your next trip and what type of travel are you looking for? (Adventure, Relaxation, Cultural exploration, etc.)
User: I want to go somewhere warm for a beach vacation, I'm from Shanghai
Assistant: Great choice! Based on your preference for a warm and relaxing beach vacation, I'd recommend considering destinations like Bali, Phuket in Thailand, or the Maldives. These places are popular among travelers from Shanghai for their beautiful beaches and tropical weather. Let me know if you'd like more information or if you have a specific budget in mind.



In [None]:
# Tests Quantitative
example_texts = ["Hi!", "I'm a 41 year old female traveler living in China. I want to go for about a week that's not far away, what would fit?", "I want to go somewhere warm in indonesia, what would fit?",
                "What are some interesting cities in Great Britain/UK?", "Is Barcalona a good place for a young person to visit, what can I do there?", "Hi!", "I'm a 41 year old female traveler living in China. I want to go for about a week that's not far away, what would fit?", "I want to go somewhere warm in indonesia, what would fit?",
                "What are some interesting cities in Great Britain/UK?", "Is Barcalona a good place for a young person to visit, what can I do there?", "Im Theodor!", "Whats close countries to visit, im in Japan",
                "How is indonesia for beach holiday", "what are some good cities in eastern europe", "I live in Moscow, where can i go for culture travel thats close?"]
results_rag = []
results_mistral = []
hist = ""
# RAG
for example in example_texts:
    start_time = time.time()
    context_rows, max_similarity = retrieve("Hi!", 5, 0.42)
    end_time = time.time()
    results_rag.append(end_time - start_time)

print(results_rag)
print("Average of RAG calls: " + str(sum(results_rag)/len(results_rag)))
print("Range:", max(results_rag) - min(results_rag))
print("Variance:", statistics.variance(results_rag))
print("Standard Deviation:", statistics.stdev(results_rag))

# Mistal/LLM
for example in example_texts:
    start_time = time.time()
    answer = generate_answer(hist, example)
    hist = ""
    end_time = time.time()
    results_mistral.append(end_time - start_time)

print(results_mistral)
print("Average of Mistral calls: " + str(sum(res

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[0.010002374649047852, 0.013995885848999023, 0.012000560760498047, 0.007998943328857422, 0.008999347686767578, 0.008001327514648438, 0.009000539779663086, 0.007999420166015625, 0.0069997310638427734, 0.0069997310638427734, 0.008001089096069336, 0.008000373840332031, 0.007997512817382812, 0.009002208709716797, 0.00699925422668457]
Average of RAG calls: 0.008799886703491211
Range: 0.006996631622314453
Variance: 3.7410095241544434e-06
Standard Deviation: 0.0019341689492271463
Skipping RAG (similarity=0.36 < 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Using RAG (similarity=0.46 ≥ 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Using RAG (similarity=0.48 ≥ 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Skipping RAG (similarity=0.42 < 0.43)


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Using RAG (similarity=0.44 ≥ 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Skipping RAG (similarity=0.36 < 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Using RAG (similarity=0.46 ≥ 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Using RAG (similarity=0.48 ≥ 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Skipping RAG (similarity=0.42 < 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Using RAG (similarity=0.44 ≥ 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Skipping RAG (similarity=0.37 < 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Using RAG (similarity=0.47 ≥ 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Using RAG (similarity=0.53 ≥ 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Skipping RAG (similarity=0.38 < 0.43)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Skipping RAG (similarity=0.43 < 0.43)

[3.9015331268310547, 18.405828714370728, 6.992430925369263, 9.93808650970459, 22.44293761253357, 3.9812943935394287, 11.601935625076294, 49.63768124580383, 17.784074783325195, 14.804814338684082, 4.355045557022095, 12.22755765914917, 15.194730520248413, 52.05298829078674, 6.76103949546814]
Average of Mistral calls: 16.67213191986084
Range: 48.15145516395569
Variance: 224.30981282266026
Standard Deviation: 14.976976090742092


In [None]:
import sys
import os
print("Notebook Python:", sys.executable)
print("Environment:", os.environ.get("CONDA_PREFIX") or os.environ.get("VIRTUAL_ENV"))

Notebook Python: c:\Users\theod\miniconda3\python.exe
Environment: C:\Users\theod\miniconda3


In [None]:
ults_mistral)/len(results_mistral)))
print("Range:", max(results_mistral) - min(results_mistral))
print("Variance:", statistics.variance(results_mistral))
print("Standard Deviation:", statistics.stdev(results_mistral))
