## Connect

In [1]:
# Import libraries
import os
from dotenv import load_dotenv
import json
import weaviate
from weaviate.classes.config import Configure, Property, DataType
import warnings

warnings.filterwarnings('ignore')

# Load the environment variables
load_dotenv()

aoai_key=os.environ['AZURE_OPENAI_API_KEY']
aoai_endpoint=os.environ['AZURE_OPENAI_ENDPOINT']
aoai_deployment=os.environ['AZURE_OPENAI_DEPLOYMENT']
aoai_embedding=os.environ['AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT']

### Utility Funtions

In [2]:
# JSON print beautifier
def json_print(data):
    print(json.dumps(data, indent=2))

### Create a Weaviate Embedded DB

In [None]:
client = weaviate.connect_to_embedded(
    version="1.26.1",  # e.g. version="1.26.5"
    headers={
        "X-OpenAI-BaseURL": aoai_endpoint,
        "X-Azure-Api-Key": aoai_key
    },
)

json_print(client.is_ready())

In [None]:
# Show Weaviate DB metadata
json_print(client.get_meta())

### Create Collection (EU Destinations)

In [49]:
if client.collections.exists("eudestinations"):
    client.collections.delete("eudestinations")

In [None]:
class_obj = {
    "class": "eudestinations",
    "vectorizer": "text2vec-openai",  # Use OpenAI as the vectorizer
    "moduleConfig": {
        "text2vec-openai": {
            "model": "ada",
            "modelVersion": "002",
            "type": "text",
            "baseURL": aoai_endpoint
        }
    }
}

client.schema.create_class(class_obj)


### Read Europe Touristic Destinations

In [None]:
file_path = '../data/eu_destinations_n.json'

with open(file_path, "r") as file:
    data = file.read()

ds = json.loads(data)
json_print(ds[0])

### Create Embeddings

In [None]:
with client.batch.configure(batch_size=50) as batch:
    for i, d in enumerate(ds):
        
        print(f"importing: {i+1}")
        
        properties = {
            "destination": d["Destination"],
            "region": d["Region"],
            "country": d["Country"],
            "category": d["Category"],
            "annualtourists": d["Approximate Annual Tourists"],
            "foods": d["Famous Foods"],
            "language": d["Language"],
            "besttimevisit": d["Best Time to Visit"],
            "costliving": d["Cost of Living"],
            "cultural": d["Cultural Significance"],
            "description": d["Description"],
        }
        
        batch.add_data_object(
            data_object=properties,
            class_name="eudestinations"
        )

In [None]:
count = client.query.aggregate("eudestinations").with_meta_count().do()
json_print(count)