In [23]:
import os
import json
import csv
from dotenv import load_dotenv
import openai
from langchain.chains import SequentialChain
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.schema import HumanMessage, SystemMessage
from langchain.agents import initialize_agent, Tool
from langchain.chat_models import AzureChatOpenAI
from langchain.vectorstores import Qdrant
import tiktoken

# Replace these placeholders with your actual Azure OpenAI credentials
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")  # API key
DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")  # The deployment name
AZURE_OPENAI_ENDPOINT = "https://096290-oai.openai.azure.com"
API_VERSION = "2023-05-15"

# Set the OpenAI API key and endpoint
openai.api_key = AZURE_OPENAI_API_KEY
openai.api_base = AZURE_OPENAI_ENDPOINT
openai.api_type = "azure"
openai.api_version = "2023-05-15"

# Initialize the Azure OpenAI Chat Model
llm = AzureChatOpenAI(
    azure_deployment=DEPLOYMENT_NAME,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    openai_api_version=API_VERSION,
    openai_api_type="azure",  # Specify the API type as 'azure'
    temperature=0.7  # Adjust temperature as per your use case
)

# Define a chat prompt template
prompt_template = ChatPromptTemplate.from_template("{input}")




In [24]:
%reload_ext autoreload
%autoreload 2
from travelwise_clf import *

In [25]:
users_inputs = ["""Hi, we’re planning a family vacation and need some help putting it together. 
                  There will be four of us—two adults and two kids, aged 8 and 12. We’re looking for something relaxing but fun 
                  for the kids, too. Ideally, we’d want a mix of outdoor activities like hiking or water sports and some downtime to 
                  just enjoy the scenery or maybe a nice pool. We’re thinking of traveling sometime in the summer, probably for a week. 
                  If there’s a place with family-friendly attractions or something educational for the kids, that would be great""",    

    """My partner and I are looking to plan a romantic getaway for just the two of us. We’d like something quiet and intimate, 
    maybe with beautiful views, good food, and opportunities to unwind—like spa treatments. We’d also love to 
    explore a bit, maybe take a cooking class or do some wine tasting, but nothing too strenuous. We’re thinking of taking this trip 
    in the spring for about 4 or 5 days.""",

    """Hi there! My friends and I are planning an adventure trip, and we’d love your help organizing it. There are five of us, and 
    we’re all pretty active—we’re into things like kayaking, rock climbing, and maybe even some zip-lining. We’re looking for 
    something that gets us outdoors and keeps us moving but also gives us a chance to explore local culture and try some great food.
      We’re thinking about a 10-day trip in the fall""",
]

### Mini Agent - input parser

In [26]:
def parse_user_input(user_input):
    """
    Parse user input to extract key features like budget, number of people,
    wanted time, activities, and former trips.
    """
    prompt = PromptTemplate(
        input_variables=["user_input"],
        template="""
        Extract the following information from the user input and return it as a JSON object, if the informtion is not present write "not specified":
        - Budget
        - Number of people
        - Desired duration
        - Preferred activities
        - Previous trips and their feedback

        User input: {user_input}
        """
    )
    formatted_prompt = prompt.format(user_input=user_input)
    messages = [HumanMessage(content=formatted_prompt)]
    response = llm(messages=messages)
    return response



In [27]:
response = parse_user_input(users_inputs[1])
write_token_usage_to_csv(response)

print(response)

content='```json\n{\n  "Budget": "not specified",\n  "Number of people": 2,\n  "Desired duration": "4 or 5 days",\n  "Preferred activities": [\n    "quiet and intimate experiences",\n    "beautiful views",\n    "good food",\n    "spa treatments",\n    "exploring",\n    "cooking class",\n    "wine tasting"\n  ],\n  "Previous trips and their feedback": "not specified"\n}\n```' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 93, 'prompt_tokens': 163, 'total_tokens': 256, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_f3927aa00d', 'finish_reason': 'stop', 'logprobs': None} id='run-db91d95a-904b-488d-b248-a5d3ee44d0f2-0'


# Storage Tool - API online data

In [28]:
import os
import random
import uuid
import numpy as np
from qdrant_client.http.models import PointStruct, VectorParams, Distance
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv

load_dotenv()
# Qdrant connection parameters
# Replace with your actual Qdrant cluster details and API key if applicable
VECTOR_DB_URL = "https://dbcfa4fa-2d01-443f-a85d-41ab85d9f3ba.europe-west3-0.gcp.cloud.qdrant.io:6333"
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")  # if required

print(VECTOR_DB_URL)
print(QDRANT_API_KEY)
# Initialize Qdrant client.
qdrant_client = QdrantClient(url=VECTOR_DB_URL, api_key=QDRANT_API_KEY,)
embedding_model = SentenceTransformer('all-MiniLM-L6-v2') #('multi-qa-mpnet-base-dot-v1') - stronget model  
example_text = "Hello, how are you doing today?"
example_embedding = embedding_model.encode(example_text)
print(example_embedding.size)


https://dbcfa4fa-2d01-443f-a85d-41ab85d9f3ba.europe-west3-0.gcp.cloud.qdrant.io:6333
eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzQ2NjA3MjQ4fQ.FySJ6BMF5YC5rPfL-1xPP5rHocFrbpvdOPf8KvbjiJA
384


In [29]:
from qdrant_client import QdrantClient
from env_variables import get_qdrant_credentials

qdrant_url, qdrant_api_key = get_qdrant_credentials()
qdrant_client = QdrantClient(
    url=qdrant_url, #"https://dbcfa4fa-2d01-443f-a85d-41ab85d9f3ba.europe-west3-0.gcp.cloud.qdrant.io:6333", 
    api_key= qdrant_api_key
)

print(qdrant_client.get_collections())

collections=[CollectionDescription(name='hotels'), CollectionDescription(name='activities'), CollectionDescription(name='flights')]


In [30]:
collection_name = "local_flights"
vector_dim = 5
qdrant_client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=vector_dim, distance=Distance.COSINE),
    )
print(f"Collection '{collection_name}' created.")

Collection 'local_flights' created.


In [31]:
def clean_collection(collection_name, vector_dim=384):
    """
    Clean the specified Qdrant collection by deleting all its vectors.
    """
    qdrant_client.delete_collection(collection_name=collection_name)
    qdrant_client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=vector_dim, distance=Distance.COSINE),
    )
    print(f"Collection '{collection_name}' has been cleaned and recreated.")

def delete_collection(collection_name):
    """
    Delete the specified Qdrant collection.
    """
    qdrant_client.delete_collection(collection_name=collection_name)
    print(f"Collection '{collection_name}' has been deleted.")


def delete_all_collections():
    """
    Delete all Qdrant collections.
    """
    collections = qdrant_client.get_collections()
    for collection in collections.collections:
        collection_name = collection.name
        qdrant_client.delete_collection(collection_name=collection.name)
        print(f"Collection '{collection_name}' has been deleted.")



## Create and store the data


In [None]:
# create the collections and the syntetic data
def create_collection_if_not_exists(collection_name: str, vector_dim: int = 5):
    """
    Create a collection in Qdrant if it does not already exist.
    """
    try:
        # Try to retrieve the collection information.
        collection_info = qdrant_client.get_collection(collection_name=collection_name)
        if collection_info:
            print(f"Collection '{collection_name}' already exists.")
            return
    except Exception as e:
        # If an exception occurs, assume the collection does not exist.
        print(f"Collection '{collection_name}' not found. Proceeding to create it.")

    # Create the collection.
    qdrant_client.recreate_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=vector_dim, distance=Distance.COSINE),
    )
    print(f"Collection '{collection_name}' created.")
# Create collections for flights, hotels, and activities.
VECTOR_DIM = example_embedding.size  # dimension of dummy vector embeddings
create_collection_if_not_exists("flights", VECTOR_DIM)
create_collection_if_not_exists("hotels", VECTOR_DIM)
create_collection_if_not_exists("activities", VECTOR_DIM)


# Generate synthetic flight data.
def generate_flight_data(num_records: int):
    air_ports = ["JFK", "LHR", "CDG", "HND", "SYD", "TLV", "DXB", "SIN", "BKK", "LAX"]
    cities = ["New York", "London", "Paris", "Tokyo", "Sydney", "Tel Aviv", "Dubai", "Singapore", "Bangkok", "Los Angeles"]
    flights = []
    for _ in range(num_records):
        flight = {
            "id": str(uuid.uuid4()),
            "price($)": round(random.uniform(100, 2000), 2),
            "from city": random.choice(cities),
            "from airport": random.choice(air_ports),
            "to airport": random.choice(air_ports),
            "to city": random.choice(cities),
            "company_name": random.choice(["AirwaysX", "SkyHigh", "FlyFast", "JetSet"]),
            "company_rate": round(random.uniform(3.0, 5.0), 1),
            "seat_info": random.choice(["Economy", "Business", "First Class"]),
            "agent_commision": round(random.uniform(10, 200), 2),
        }
        flight_str = f"""{flight['from city']}({flight['from airport']}) to {flight['to city']}({flight['to airport']}) 
            flight by {flight['company_name']} - company_rate: {flight['company_rate']}, seat: {flight['seat_info']}, 
            price: ${flight['price($)']}, agent_commision: ${flight['agent_commision']}"""
        flight_vector = embedding_model.encode(flight_str)
        flight["vector"] = flight_vector
        flights.append(flight)
    return flights

# Generate synthetic hotel data.
def generate_hotel_data(num_records: int):
    hotels = []
    for _ in range(num_records):
        hotel = {
            "id": str(uuid.uuid4()),
            "price($)": round(random.uniform(50, 500), 2),
            "city": random.choice(["New York", "London", "Paris", "Tokyo", "Sydney"]),
            "hotel_name": random.choice(["HotelComfort", "StayEasy", "LuxuryLodge", "BudgetInn"]),
            "hotel_rate": round(random.uniform(3.0, 5.0), 1),
            "room_info": random.choice(["Single", "Double", "Suite"]),
            "room_size (m^2)": round(random.uniform(20, 200), 2),
            "commission": round(random.uniform(5, 100), 2),
        }
        hotel_str = f"{hotel['hotel_name']} in {hotel['city']} - room: {hotel['room_info']}, room_size: {hotel['room_size (m^2)']}m^2, price: ${hotel['price($)']}, commission: ${hotel['commission']}"
        hotel["vector"] = embedding_model.encode(hotel_str)
        hotels.append(hotel)
    return hotels

# Generate synthetic activity data.
def generate_activity_data(num_records: int):
    activities = []
    for _ in range(num_records):
        activity = {
            "id": str(uuid.uuid4()),
            "price($)": round(random.uniform(20, 300), 2),
            "location": random.choice(["New York", "London", "Paris", "Tokyo", "Sydney"]),
            "company_name": random.choice(["FunTimes", "AdventureX", "CityTours", "ExperienceIt"]),
            "company_rate": round(random.uniform(1.0, 5.0), 1),
            "activity_info": random.choice(["Museum visit", "City tour", "Concert ticket", "Theme park entry"]),
            "commission": round(random.uniform(5, 50), 2),
        }
        activity_str = f"{activity['activity_info']} in {activity['location']} by {activity['company_name']} - company_rate: {activity['company_rate']}, price: ${activity['price($)']}, commission: ${activity['commission']}"
        activity["vector"] = embedding_model.encode(activity_str)
        activities.append(activity)
    return activities

# Generate a fixed number of records for demonstration.
flights = generate_flight_data(10)
hotels = generate_hotel_data(10)
activities = generate_activity_data(10)

# Function to insert data into a specified Qdrant collection.
def insert_data_into_qdrant(collection_name: str, records: list):
    points = []
    for record in records:
        # Create a point where the vector and payload are set.
        point = PointStruct(
            id=record["id"],
            vector=record["vector"],
            payload={key: value for key, value in record.items() if key not in ["id", "vector"]}
        )
        points.append(point)
    # Upsert the points into the collection.
    try:
        response = qdrant_client.upload_points(collection_name=collection_name, points=points)
        print(f"✅ Inserted {len(points)} points into '{collection_name}' collection.")
    except Exception as e:
        print(f"🚨 ERROR: Failed to upload points - {e}")

# Insert the synthetic data into their respective Qdrant collections.
insert_data_into_qdrant("flights", flights)
insert_data_into_qdrant("hotels", hotels)
insert_data_into_qdrant("activities", activities)


Collection 'flights' not found. Proceeding to create it.


  qdrant_client.recreate_collection(


Collection 'flights' created.
Collection 'hotels' not found. Proceeding to create it.
Collection 'hotels' created.
Collection 'activities' not found. Proceeding to create it.
Collection 'activities' created.
point vector: [0.0848265141248703, -0.035609662532806396, -0.011766413226723671, 0.04116498678922653, -0.04114357382059097, 0.0037880134768784046, 0.028239095583558083, -0.030634624883532524, -0.04876832291483879, -0.002938648220151663, 0.018894556909799576, -0.06392373144626617, -0.026224590837955475, -0.014085163362324238, -0.03231102228164673, 0.04074921831488609, 0.04894621670246124, -0.07151076197624207, -0.01909833960235119, -0.010263597592711449, 0.07139193266630173, -0.04275971278548241, -0.017233464866876602, -0.019430141896009445, 0.07321050763130188, -0.023275118321180344, 0.05086842551827431, 6.357019447023049e-05, 0.003934634383767843, -0.03036872297525406, -0.000601458246819675, 0.09554342925548553, -0.06288205087184906, 0.014390612952411175, 0.02677217498421669, 0.05

In [None]:
# delete_all_collections()

Collection 'hotels' has been deleted.
Collection 'activities' has been deleted.
Collection 'flights' has been deleted.


In [38]:
# shows the collections
collections = qdrant_client.get_collections()
print(f"Available collections: {collections.collections}")
# Fetch and print an example from each collection
for collection in collections.collections:
    points = qdrant_client.scroll(collection_name=collection.name, limit=1)
    print(f"Example from collection '{collection.name}': {points}")

Available collections: [CollectionDescription(name='hotels'), CollectionDescription(name='activities'), CollectionDescription(name='flights')]
Example from collection 'hotels': ([Record(id='0340a863-8aa4-4687-a8ed-3a3ba90809bd', payload={'price($)': 142.85, 'city': 'Tokyo', 'hotel_name': 'StayEasy', 'hotel_rate': 4.7, 'room_info': 'Suite', 'room_size (m^2)': 100.31, 'commission': 80.04}, vector=None, shard_key=None, order_value=None)], '03535b31-9df6-46d9-b96e-b04ec310b160')
Example from collection 'activities': ([Record(id='00e5132a-ce38-46aa-a82f-a0e58964b527', payload={'price($)': 190.07, 'location': 'Paris', 'company_name': 'FunTimes', 'company_rate': 1.3, 'activity_info': 'Museum visit', 'commission': 16.08}, vector=None, shard_key=None, order_value=None)], '12deb1f8-8201-4c50-af66-a995b28ec172')
Example from collection 'flights': ([Record(id='116e2461-8789-4a36-a590-b29d348a489a', payload={'price($)': 947.46, 'from city': 'Tel Aviv', 'from airport': 'CDG', 'to airport': 'SIN', 't

In [39]:
collection_info = qdrant_client.get_collection(collection_name="flights")
print(collection_info)


status=<CollectionStatus.GREEN: 'green'> optimizer_status=<OptimizersStatusOneOf.OK: 'ok'> vectors_count=None indexed_vectors_count=0 points_count=10 segments_count=2 config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantization_config=None,

In [51]:
# Check the vector database
query_text = "luxury hotel in Sydney"
query_vector = embedding_model.encode(query_text)

results = qdrant_client.search(
    collection_name="flights",  # target collection
    query_vector=query_vector, # your query vector
    limit=5                  # number of nearest neighbors to return
)

# Process and print the results
for point in results:
    print(f"ID: {point.id}")
    print(f"Payload: {point.payload}")
    print(f"Simillarity Score: {point.score}")  # if Qdrant returns the distance score
    print("-----")

  results = qdrant_client.search(


ID: 69ce7ecd-d5e8-458e-a109-654e620f75bc
Payload: {'price($)': 545.51, 'from city': 'Sydney', 'from airport': 'LHR', 'to airport': 'LHR', 'to city': 'Tokyo', 'company_name': 'SkyHigh', 'company_rate': 3.0, 'seat_info': 'Economy', 'agent_commision': 156.13}
Simillarity Score: 0.41626132
-----
ID: f14e018c-737e-43c1-b6c3-826ee202284a
Payload: {'price($)': 973.36, 'from city': 'Paris', 'from airport': 'LHR', 'to airport': 'BKK', 'to city': 'Sydney', 'company_name': 'JetSet', 'company_rate': 4.5, 'seat_info': 'Economy', 'agent_commision': 51.22}
Simillarity Score: 0.38546693
-----
ID: 3ff734a9-8042-4e87-baf2-0caf550b756c
Payload: {'price($)': 1756.9, 'from city': 'New York', 'from airport': 'DXB', 'to airport': 'LHR', 'to city': 'Dubai', 'company_name': 'FlyFast', 'company_rate': 4.5, 'seat_info': 'Business', 'agent_commision': 63.97}
Simillarity Score: 0.24956343
-----
ID: c7d48429-5759-424c-a5e5-349523c96c8f
Payload: {'price($)': 477.17, 'from city': 'Tel Aviv', 'from airport': 'SIN', 't

### Mini Agent - Match making trip 

In [52]:
# Define a function to query Qdrant collections
def query_qdrant(collection_name, customer_profile):
    query = f"Find options in {collection_name} matching this profile: {customer_profile}"
    vec = embedding_model.encode(query)
    results = qdrant_client.search(
        collection_name=collection_name,
        query_vector=vec,
        limit=5
    )
    return [result.payload for result in results]

def get_trip_options(user_input):
    """
    Get trip options based on user input by querying Qdrant collections and using LLM to assemble full trip options.

    Args:
        user_input (str): The input prompt from the user describing their travel preferences.

    Returns:
        str: The assembled trip options from the LLM.
    """
    # Parse user input to extract key features
    customer_profile = parse_user_input(user_input)

    # Query Qdrant collections for flights, hotels, and activities
    flights_options = query_qdrant("flights", customer_profile)
    hotels_options = query_qdrant("hotels", customer_profile)
    activities_options = query_qdrant("activities", customer_profile)

    # Combine all options into a single list
    all_options = {
        "flights": flights_options,
        "hotels": hotels_options,
        "activities": activities_options
    }

    # Use LLM to assemble full trip options
    system_prompt = SystemMessage(content="You are a professional travel agent. Based on the user's trip request and the available flights, hotels, and activities, build the client 3 options for a trip.")
    formatted_prompt = prompt_template.format(input=f"customer_profile: {customer_profile}, options: {all_options}",)
    messages = [system_prompt, HumanMessage(content=formatted_prompt)]
    
    response = llm(messages=messages)
    write_token_usage_to_csv(response)

    return response.content


In [53]:
# Example usage
user_input = users_inputs[1]

trip_options = get_trip_options(user_input)

print("Trip Options:", trip_options)

  results = qdrant_client.search(


Trip Options: Based on your preferences for quiet and intimate experiences, beautiful views, good food, spa treatments, cooking classes, wine tasting, and exploration, I’ve curated three trip options for your 4-5 day getaway. All options prioritize relaxation, scenic locations, and immersive activities. Let's explore these options:

---

### **Option 1: Serene Escape in Sydney**
**Overview:** A chic retreat in Sydney, blending luxury, exploration, and relaxation.

- **Flights:**
  - From Paris (LHR) to Sydney (BKK) with JetSet
  - Price: $973.36 per person (Economy, 4.5-star airline rating)

- **Hotel:**
  - Stay at **HotelComfort**, Sydney (4.5 stars)
  - Room: Suite, 149.82 m²
  - Price: $229.81 per night

- **Activities:**
  1. **City Tour** by CityTours (3.7 stars)  
     Price: $206.22 per person  
     Explore iconic landmarks while enjoying Sydney’s vibrant cityscape.
  2. **Wine Tasting & Vineyard Tour** (custom activity recommendation)  
     Price: Approx. $150 per person  
 

# Idan Example usage


In [None]:
# Example text completion function
def generate_completion(user_input):
    """
    Generates a completion using the Azure OpenAI Chat API via LangChain.

    Args:
        user_input (str): The input prompt from the user.

    Returns:
        str: The completion response from the model.
    """
    # Render the prompt
    formatted_prompt = prompt_template.format(input=user_input)
    messages = [HumanMessage(content=formatted_prompt)]

    # Generate response
    response = llm(messages=messages)
    return response.content

if __name__ == "__main__":
    # Example usage
    user_prompt = "Write a short poem about the sea."
    completion = generate_completion(user_prompt)
    print("AI Response:", completion)

  response = chat(messages=messages)


AI Response: Beneath the sky so vast and free,  
Whispers the soul of the endless sea.  
Its waves, they dance, a timeless tune,  
Under the gaze of a silver moon.  

It carries secrets, old and deep,  
Dreams it cradles, memories it keeps.  
A canvas of blue, where stories flow,  
Eternal tides come, eternal tides go.  
