In [None]:
import pandas as pd
import os 
import requests
from dotenv import load_dotenv
load_dotenv()


def get_data(text, API_KEY):
  # Define the request parameters
  payload = {
      "textQuery": text
  }

  headers = {
      "Content-Type": "application/json",
      "X-Goog-Api-Key": API_KEY,
      "X-Goog-FieldMask": "places.name,places.id,places.types,places.formattedAddress,places.location,places.rating,places.userRatingCount,places.displayName,places.reviews"
  }

  # Send the POST request
  response = requests.post(
      "https://places.googleapis.com/v1/places:searchText", json=payload, headers=headers
  )

  # Check for successful response
  if response.status_code == 200:
    # Get the response data
    data = response.json()
  else:
    print(f"Error: {response.status_code}")
    raise ConnectionError(response.text)
  
  return data 



In [None]:
paths = ['raw', 'data']

for path in paths:
    os.makedirs(f"{path}", exist_ok=True)

In [None]:
# google places api query
query = "Healthcare in Dubai"

In [None]:
# Replace 'API_KEY' with your actual Google Places API Key
API_KEY = os.getenv('GOOGLE_MAPS_API_KEY')

data = get_data(query, API_KEY=API_KEY)

df = pd.DataFrame(data['places'])
df.to_csv(f"./raw/{query}", index=False)

# do some data transformation
df['latitude'] = df.location.apply(lambda x: x['latitude'])
df['longitude'] = df.location.apply(lambda x: x['longitude'])
df  = df.drop('location', axis=1)
df['reviews'] = df['reviews'].apply(lambda x: [review['text']['text'] if "text" in review
                        else "" for review in x if review])
separate_reviews = df.reviews.explode()
df = df.merge(separate_reviews, left_index=True, right_index=True, suffixes=('_drop', '' )).reset_index(drop=True)
df['query'] = query
df.drop(['name','formattedAddress', 'displayName', 'reviews_drop'], inplace=True,axis=1)
df.dropna(subset=['reviews'],inplace=True)
df.drop(df[df.reviews == ""].index, inplace=True)

# load transformed dataset to use in webapp
df.to_csv(f"./data/{query}", index=False)

In [None]:
# create document to load into vector store
from langchain_community.document_loaders import DataFrameLoader
loader = DataFrameLoader(df[['rating', 'userRatingCount', 'reviews', 'query']], page_content_column="reviews")
documents = loader.load()
documents

In [None]:
from langchain_together.embeddings import TogetherEmbeddings

embeddings = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval")

__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
import sqlite3
from langchain_chroma import Chroma

db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
db.add_documents(documents=documents)