In [None]:
!pip install sentence-transformers faiss-gpu

Loading data and generating embedding and faiss indexes.

In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

data = pd.read_csv('data.csv')


model = SentenceTransformer('all-MiniLM-L6-v2')


embeddings = model.encode(data['text_for_embedding'].tolist(), convert_to_tensor=False)

data['embeddings'] = list(embeddings)


faiss_embeddings = np.array(embeddings).astype('float32')


index = faiss.IndexFlatL2(faiss_embeddings.shape[1])
index.add(faiss_embeddings)  # Add vectors to the index


In [None]:
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
Successfully installed openai-0.28.0


In [None]:
import openai

Retriving documents using cosine similarity

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def retrieve_itinerary_documents(query, index, model, data, k=10):
    query_embedding = model.encode([query], convert_to_tensor=False)
    query_embedding = np.array(query_embedding).astype('float32')
    D, I = index.search(query_embedding, k)
    retrieved_docs = data.iloc[I[0]]

    if 'embeddings' not in retrieved_docs.columns:
        raise KeyError("The 'embeddings' column is missing from the DataFrame.")


    doc_embeddings = np.vstack(retrieved_docs['embeddings'].apply(np.array))


    similarities = cosine_similarity(query_embedding.reshape(1, -1), doc_embeddings)
    # print("Similarities:", similarities)  # Debugging line to check similarities


    relevant_docs = retrieved_docs[similarities.flatten() > 0.3]  # Adjust threshold if needed

    return relevant_docs


In [None]:
# df['text_for_embedding'] = df['localName'] + ' ' + df['type']

Generating Itinerary

In [None]:
def generate_itinerary(retrieved_docs, api_key):
  if retrieved_docs.empty:
        return "I don't know"

  context = ' '.join(retrieved_docs['text_for_embedding'].tolist())
  full_prompt = f"Create a detailed 3-day itinerary for visiting Washington DC based on these locations: {context}"

  response = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=[{"role": "system", "content": "You are a travel assistant."},
                {"role": "user", "content": full_prompt}],
      api_key=api_key
  )
  return response['choices'][0]['message']['content']

In [None]:
query = "Give me 3 day itenary for visiting DC"
retrieved_docs = retrieve_itinerary_documents(query, index, model, data, k=10)
response = generate_itinerary(retrieved_docs, 'open ai api key')
print(response)

Similarities: [[0.43265986 0.41449198 0.39407778 0.3846485  0.37838867 0.37160242
  0.3679542  0.34996235 0.34931582 0.34903243]]
Day 1:
- Check in at The LINE DC HOTEL
- Visit the Smithsonian Institution Building
- Explore the National Mall and see iconic landmarks like the Lincoln Memorial, Washington Monument, and the U.S. Capitol
- Have lunch at a local food truck or restaurant near the National Mall
- Visit the United States Holocaust Memorial Museum
- Relax and enjoy dinner at a nearby restaurant
- Overnight stay at The LINE DC HOTEL

Day 2:
- Check out of The LINE DC HOTEL and check in at citizenM Washington DC Capitol HOTEL
- Take a guided tour of the Washington Monument
- Visit the White House and stroll through Lafayette Square
- Have lunch in the vibrant neighborhood of Adams Morgan
- Explore the vibrant streets and boutique shops of Georgetown
- Visit The Royal Sonesta Washington, DC Capitol Hill
- Dinner at a local restaurant in Capitol Hill
- Overnight stay at citizenM Wa