In [23]:
import weaviate
import os
import json
from dotenv import load_dotenv
import dateutil.parser

# Load environment variables from .env file
load_dotenv()

# Retrieve environment variables
cluster_url = os.getenv("WEAVIATE_CLUSTER_URL")
auth_api_key = os.getenv("WEAVIATE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")

# Initialize Weaviate client
client = weaviate.Client(
    url=cluster_url,  # Replace with your Weaviate endpoint
    auth_client_secret=weaviate.AuthApiKey(api_key=auth_api_key),  # Replace with your Weaviate instance API key
    additional_headers={
        "X-OpenAI-Api-Key": openai_api_key  # Replace with your inference API key
    }
)

# Define the schema for the "DetailedCountry22" class if it doesn't already exist
class_obj = {
    "class": "DetailedCountry22",
    "vectorizer": "text2vec-openai",  # If set to "none" you must always provide vectors yourself. Could be any other "text2vec-*" also.
    "moduleConfig": {
        "text2vec-openai": {},
        "generative-openai": {}  # Ensure the `generative-openai` module is used for generative queries
    },
    "properties": [
        {"name": "country", "dataType": ["string"]},
        {"name": "density", "dataType": ["string"]},
        {"name": "abbreviation", "dataType": ["string"]},
        {"name": "agricultural_land", "dataType": ["string"]},
        {"name": "land_area", "dataType": ["string"]},
        {"name": "armed_forces_size", "dataType": ["string"]},
        {"name": "birth_rate", "dataType": ["number"]},
        {"name": "calling_code", "dataType": ["number"]},
        {"name": "capital_major_city", "dataType": ["string"]},
        {"name": "co2_emissions", "dataType": ["string"]},
        {"name": "cpi", "dataType": ["number"]},
        {"name": "cpi_change", "dataType": ["string"]},
        {"name": "currency_code", "dataType": ["string"]},
        {"name": "fertility_rate", "dataType": ["number"]},
        {"name": "forested_area", "dataType": ["string"]},
        {"name": "gasoline_price", "dataType": ["string"]},
        {"name": "gdp", "dataType": ["string"]},
        {"name": "gross_primary_education_enrollment", "dataType": ["string"]},
        {"name": "gross_tertiary_education_enrollment", "dataType": ["string"]},
        {"name": "infant_mortality", "dataType": ["number"]},
        {"name": "largest_city", "dataType": ["string"]},
        {"name": "life_expectancy", "dataType": ["number"]},
        {"name": "maternal_mortality_ratio", "dataType": ["number"]},
        {"name": "minimum_wage", "dataType": ["string"]},
        {"name": "official_language", "dataType": ["string"]},
        {"name": "out_of_pocket_health_expenditure", "dataType": ["string"]},
        {"name": "physicians_per_thousand", "dataType": ["number"]},
        {"name": "population", "dataType": ["string"]},
        {"name": "labor_force_participation", "dataType": ["string"]},
        {"name": "tax_revenue", "dataType": ["string"]},
        {"name": "total_tax_rate", "dataType": ["string"]},
        {"name": "unemployment_rate", "dataType": ["string"]},
        {"name": "urban_population", "dataType": ["string"]},
        {"name": "latitude", "dataType": ["number"]},
        {"name": "longitude", "dataType": ["number"]},
        {"name": "wiki_title", "dataType": ["string"]},
        {"name": "wiki_summary", "dataType": ["string"]},
        {"name": "wiki_url", "dataType": ["string"]},
        {"name": "wiki_last_edited", "dataType": ["date"]},
        {"name": "wiki_detailed_content", "dataType": ["string"]}
    ]
}

# Add the class to the schema if it doesn't exist
if not client.schema.exists("DetailedCountry22"):
    client.schema.create_class(class_obj)

# Load JSON data from file
json_file_path = 'countries_with_wikipedia_sections_10.json'  # Replace with your JSON file path
with open(json_file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)  # Load data from JSON file

# Configure batch
client.batch.configure(batch_size=1)

# Function to validate and insert data into Weaviate
def insert_country_data(country_data):
    # Validate and format the wiki_last_edited field
    wiki_last_edited = country_data.get("wiki_last_edited", "")
    if wiki_last_edited:
        try:
            # Check if the date is in RFC3339 format
            dateutil.parser.isoparse(wiki_last_edited)
        except ValueError:
            wiki_last_edited = None  # Set to None if not valid
    else:
        wiki_last_edited = None  # Set to None if empty

    # Convert fields to appropriate data types
    def parse_float(value, default=0.0):
        try:
            return float(value)
        except (TypeError, ValueError):
            return default

    birth_rate = parse_float(country_data.get("Birth Rate"))
    calling_code = parse_float(country_data.get("Calling Code"))
    cpi = parse_float(country_data.get("CPI"))
    fertility_rate = parse_float(country_data.get("Fertility Rate"))
    infant_mortality = parse_float(country_data.get("Infant mortality"))
    life_expectancy = parse_float(country_data.get("Life expectancy"))
    maternal_mortality_ratio = parse_float(country_data.get("Maternal mortality ratio"))
    physicians_per_thousand = parse_float(country_data.get("Physicians per thousand"))
    latitude = parse_float(country_data.get("Latitude"))
    longitude = parse_float(country_data.get("Longitude"))

    properties = {
        "country": country_data.get("Country", ""),
        "density": country_data.get(r"Density\n(P\/Km2)", ""),
        "abbreviation": country_data.get("Abbreviation", ""),
        "agricultural_land": country_data.get("Agricultural Land( %)", ""),
        "land_area": country_data.get("Land Area(Km2)", ""),
        "armed_forces_size": country_data.get("Armed Forces size", ""),
        "birth_rate": birth_rate,
        "calling_code": calling_code,
        "capital_major_city": country_data.get(r"Capital\/Major City", ""),
        "co2_emissions": country_data.get("Co2-Emissions", ""),
        "cpi": cpi,
        "cpi_change": country_data.get("CPI Change (%)", ""),
        "currency_code": country_data.get("Currency-Code", ""),
        "fertility_rate": fertility_rate,
        "forested_area": country_data.get("Forested Area (%)", ""),
        "gasoline_price": country_data.get("Gasoline Price", ""),
        "gdp": country_data.get("GDP", ""),
        "gross_primary_education_enrollment": country_data.get("Gross primary education enrollment (%)"),
        "gross_tertiary_education_enrollment": country_data.get("Gross tertiary education enrollment (%)"),
        "infant_mortality": infant_mortality,
        "largest_city": country_data.get("Largest city", ""),
        "life_expectancy": life_expectancy,
        "maternal_mortality_ratio": maternal_mortality_ratio,
        "minimum_wage": country_data.get("Minimum wage", ""),
        "official_language": country_data.get("Official language", ""),
        "out_of_pocket_health_expenditure": country_data.get("Out of pocket health expenditure", ""),
        "physicians_per_thousand": physicians_per_thousand,
        "population": country_data.get("Population", ""),
        "labor_force_participation": country_data.get("Population: Labor force participation (%)", ""),
        "tax_revenue": country_data.get("Tax revenue (%)", ""),
        "total_tax_rate": country_data.get("Total tax rate", ""),
        "unemployment_rate": country_data.get("Unemployment rate", ""),
        "urban_population": country_data.get("Urban_population", ""),
        "latitude": latitude,
        "longitude": longitude,
        "wiki_title": country_data.get("wiki_title", ""),
        "wiki_summary": country_data.get("wiki_summary", ""),
        "wiki_url": country_data.get("wiki_url", ""),
        "wiki_last_edited": wiki_last_edited,
        "wiki_detailed_content": country_data.get("wiki_detailed_content", "")
    }

    client.data_object.create(properties, "DetailedCountry22")

# Batch import data
with client.batch as batch:
    for i, d in enumerate(data):
        print(f"Importing country: {i + 1}")
        insert_country_data(d)

print("Data import completed successfully.")

# Example query to fetch countries similar to a concept
response = (
    client.query
    .get("DetailedCountry22", ["country", "wiki_summary"])  # Fetch specific fields
    .with_near_text({"concepts": ["Football"]})  # Example query concept
    .with_limit(2)  # Limit the results to 2
    .do()
)

# Print the response in a formatted JSON
print(json.dumps(response, indent=4))

Importing country: 1
Importing country: 2
Importing country: 3
Importing country: 4
Importing country: 5
Importing country: 6
Importing country: 7
Importing country: 8
Importing country: 9
Importing country: 10
Importing country: 11
Importing country: 12
Importing country: 13
Importing country: 14
Importing country: 15
Importing country: 16
Importing country: 17
Importing country: 18
Importing country: 19
Importing country: 20
Importing country: 21
Importing country: 22
Importing country: 23
Importing country: 24
Importing country: 25
Importing country: 26
Importing country: 27
Importing country: 28
Importing country: 29
Importing country: 30
Importing country: 31
Importing country: 32
Importing country: 33
Importing country: 34
Importing country: 35
Importing country: 36
Importing country: 37
Importing country: 38
Importing country: 39
Importing country: 40
Importing country: 41
Importing country: 42
Importing country: 43
Importing country: 44
Importing country: 45
Importing country: 

In [26]:
import weaviate
import os
import json
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve environment variables
cluster_url = os.getenv("WEAVIATE_CLUSTER_URL")
auth_api_key = os.getenv("WEAVIATE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")

# Initialize Weaviate client
client = weaviate.Client(
    url=cluster_url,  # Replace with your Weaviate endpoint
    auth_client_secret=weaviate.AuthApiKey(api_key=auth_api_key),  # Replace with your Weaviate instance API key
    additional_headers={
        "X-OpenAI-Api-Key": openai_api_key  # Replace with your inference API key
    }
)

# List of all fields to fetch
fields = [
    "country", "density", "abbreviation", "agricultural_land", "land_area", 
    "armed_forces_size", "birth_rate", "calling_code", "capital_major_city", 
    "co2_emissions", "cpi", "cpi_change", "currency_code", "fertility_rate", 
    "forested_area", "gasoline_price", "gdp", "gross_primary_education_enrollment", 
    "gross_tertiary_education_enrollment", "infant_mortality", "largest_city", 
    "life_expectancy", "maternal_mortality_ratio", "minimum_wage", "official_language", 
    "out_of_pocket_health_expenditure", "physicians_per_thousand", "population", 
    "labor_force_participation", "tax_revenue", "total_tax_rate", "unemployment_rate", 
    "urban_population", "latitude", "longitude", "wiki_title", "wiki_summary", 
    "wiki_url", "wiki_last_edited", "wiki_detailed_content"
]

# Query the "DetailedCountry20" class
response = (
    client.query
    .get("DetailedCountry22", fields)  # Fetch all fields
    .with_near_text({"concepts": ["Thames"]})  # Example query concept
    .with_limit(2)  # Limit the results to 2
    .do()
)

# Print the response in a formatted JSON
print(json.dumps(response, indent=4))

{
    "data": {
        "Get": {
            "DetailedCountry22": [
                {
                    "abbreviation": "TH",
                    "agricultural_land": "43.30%",
                    "armed_forces_size": "455,000",
                    "birth_rate": 10.34,
                    "calling_code": 66,
                    "capital_major_city": "",
                    "co2_emissions": "283,763",
                    "country": "Thailand",
                    "cpi": 113.27,
                    "cpi_change": "0.70%",
                    "currency_code": "THB",
                    "density": "",
                    "fertility_rate": 1.53,
                    "forested_area": "32.20%",
                    "gasoline_price": "$0.71 ",
                    "gdp": "$543,649,976,166 ",
                    "gross_primary_education_enrollment": "99.80%",
                    "gross_tertiary_education_enrollment": "49.30%",
                    "infant_mortality": 7.8,
                    "labo

In [18]:
response = (
    client.query
    .get("Question", ["question", "answer", "category"])
    .with_near_text({"concepts": ["gold"]})
    .with_where({
        "path": ["category"],
        "operator": "Equal",
        "valueText": "ANIMALS"
    })
    .with_limit(2)
    .do()
)

In [19]:
print(json.dumps(response, indent=4))

{
    "data": {
        "Get": {
            "Question": [
                {
                    "answer": "the nose or snout",
                    "category": "ANIMALS",
                    "question": "The gavial looks very much like a crocodile except for this bodily feature"
                },
                {
                    "answer": "Elephant",
                    "category": "ANIMALS",
                    "question": "It's the only living mammal in the order Proboseidea"
                }
            ]
        }
    }
}


In [10]:
response = (
    client.query
    .get("Question", ["question", "answer", "category"])
    .with_near_text({"concepts": ["biology"]})
    .with_generate(single_prompt="Explain {answer} as you might to a five-year-old.")
    .with_limit(2)
    .do()
)

print(json.dumps(response, indent=4))

{
    "data": {
        "Get": {
            "Question": [
                {
                    "_additional": {
                        "generate": {
                            "error": null,
                            "singleResult": "DNA is like a recipe book that tells our bodies how to grow and work. It is made up of tiny instructions called genes that tell our bodies what color our hair and eyes will be, how tall we will grow, and lots of other things that make us who we are. Just like how a recipe book helps us make yummy food, DNA helps our bodies do all the amazing things they can do!"
                        }
                    },
                    "answer": "DNA",
                    "category": "SCIENCE",
                    "question": "In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance"
                },
                {
                    "_additional": {
                        "generate": {
                   