In [1]:
import pandas as pd
import weaviate
from weaviate.embedded import EmbeddedOptions
from sentence_transformers import SentenceTransformer
import openai
import json


  from tqdm.autonotebook import tqdm, trange


In [3]:
# Load the CSV file
df = pd.read_csv("../data/game-dataset.csv",sep = ',')

# Fill NaN values with empty strings to avoid errors
df = df.fillna('')

# Combine relevant fields into a single text field
def combine_fields(row):
    fields = [
        'gameName',
        'alternateNames',
        'subcategory',
        'level',
        'description',
        'playersMax',
        'ageRange',
        'duration',
        'equipmentNeeded',
        'objective',
        'skillsDeveloped',
        'setupTime',
        'place',
        'physicalIntensityLevel',
        'educationalBenefits',
        'category'
    ]
    return ' '.join(str(row[field]) for field in fields if row[field])

df['combined_text'] = df.apply(combine_fields, axis=1)


In [5]:
client = weaviate.Client("http://localhost:8080")


Python client v3 `weaviate.Client(...)` connections and methods are deprecated and will
            be removed by 2024-11-30.

            Upgrade your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.
                - For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
                - For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration

            If you have to use v3 code, install the v3 client and pin the v3 dependency in your requirements file: `weaviate-client>=3.26.7;<4.0.0`
  client = weaviate.Client("http://localhost:8080")


In [6]:
import os

from dotenv import load_dotenv
# Load environment variables
load_dotenv()

# Retrieve environment variables
#DATABASE_URL = os.getenv('DATABASE_URL')
WEAVIATE_URL = os.getenv('WEAVIATE_URL')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [7]:
from openai import OpenAI

client1 = OpenAI()

In [8]:
# Define the schema
game_class = {
    "class": "Game",
    "description": "A class representing a game",
    "vectorizer": "none",  # We'll provide our own embeddings
    "properties": [
        {
            "name": "gameName",
            "dataType": ["text"],
        },
        {
            "name": "combined_text",
            "dataType": ["text"],
        },
        # Add other properties as needed
    ]
}

# Delete the class if it already exists (optional)
if client.schema.exists("Game"):
    client.schema.delete_class("Game")

# Create the schema
client.schema.create_class(game_class)


In [9]:
# Load the pre-trained model for embeddings
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to generate embeddings and import data
def import_data(df):
    with client.batch as batch:
        batch.batch_size = 100
        for index, row in df.iterrows():
            properties = {
                "gameName": row["gameName"],
                "combined_text": row["combined_text"],
                # Add other properties as needed
            }
            embedding = embedding_model.encode(row["combined_text"]).astype('float32')

            client.batch.add_data_object(
                data_object=properties,
                class_name="Game",
                vector=embedding
            )
            print(f"Imported {index+1}/{len(df)}", end='\r')

# Import the data
import_data(df)




Imported 590/590

In [13]:
def query_games_instructor(query):
    # Step 1: Generate embedding for the query
    query_embedding = embedding_model.encode([query])[0].astype('float32')
    
    # Step 2: Use Weaviate's semantic search to find similar games
    response = client.query.get(
        class_name="Game",
        properties=["gameName", "combined_text"]
    ).with_near_vector({
        "vector": query_embedding.tolist(),
        "distance": 0.7  # Adjust the distance threshold as needed
    }).with_limit(3).do()
    
    # Extract the retrieved documents
    results = response["data"]["Get"]["Game"]
    
    # Step 3: Prepare the context for the LLM
    context = ''
    for res in results:
        context += f"{res['combined_text']}\n\n"
    
    # Step 4: Construct the messages for the ChatCompletion API
    messages = [
    {"role": "system", "content": "You are a helpful Games Instructor who answers questions using only the provided context. Do not use any outside knowledge."},
    {"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{query}\n\nAnswer:"}
]
    
   
    response = client1.chat.completions.create(
        model='gpt-4o-mini',
        messages= messages
    )
    
    answer = response.choices[0].message.content.strip()
    
    return answer


In [19]:
if __name__ == "__main__":
    user_query = "What are some outdoor team sports suitable for teenagers that promote teamwork?"
    answer = query_games_instructor(user_query)
    # Replace '. ' with '.\n' to insert a newline after each sentence
    formatted_answer = answer.replace('. ', '.\n')
    print("Answer:")
    # Print the formatted answer
    print(formatted_answer)
    
    

Answer:
Capture the Flag is an outdoor team sport suitable for teenagers that promotes teamwork.
It involves two teams trying to capture the opposing team's flag and return it to their own territory without being tagged.
This game enhances teamwork, strategic planning, and physical fitness.
Another option is Tag, where players must work together to avoid being tagged, promoting social interaction and agility.


In [20]:
if __name__ == "__main__":
    user_query = "I want a fun game that helps kids work together outside."
    answer = query_games_instructor(user_query)
    # Replace '. ' with '.\n' to insert a newline after each sentence
    formatted_answer = answer.replace('. ', '.\n')
    print("Answer:")
    # Print the formatted answer
    print(formatted_answer)


Answer:
You can try the Parachute Games Group Activity.
In this game, children hold the edges of a large parachute and can create waves or bounce balls on it.
This activity promotes teamwork and coordination while being engaging and enjoyable for kids outdoors.
It's suitable for beginners and can be played in an open area.
