In [17]:
import json
import random

import pandas as pd
import numpy as np
from IPython.display import display

from jsonschema import validate
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments

import os, dotenv, sys
from pathlib import Path

dotenv.load_dotenv("../.env", override=True)
print(f"{os.environ["VIRTUAL_ENV"]=}")
print(f"{os.environ["PYTHONSTARTUP"]=}")
parent = Path().resolve().parents[0].as_posix()
if parent not in sys.path:
    sys.path.insert(0, parent)

os.environ["VIRTUAL_ENV"]='/Users/nima/.local/share/virtualenvs/worldly-6Xoo3lh0'
os.environ["PYTHONSTARTUP"]='.repl.py'


In [None]:
from worldly.play import quiz_bank

q = quiz_bank()
display(q.population.group(np.log10))

display(q.continent.group())

Unnamed: 0_level_0,countries,l,p
log10,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,"[Palau, Nauru, Tuvalu]",3,1.704545
5,"[Barbados, Vanuatu, Sao Tome and Principe, Sam...",17,9.659091
6,"[Mongolia, Armenia, Albania, Lithuania, Jamaic...",34,19.318182
7,"[Malaysia, Uzbekistan, Mozambique, Nepal, Ghan...",84,47.727273
8,"[Indonesia, Brazil, Pakistan, Nigeria, Banglad...",36,20.454545
9,"[China, India]",2,1.136364


Unnamed: 0_level_0,countries,l,p
continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Antarctica,"[Antarctica, French Southern territories, Bouv...",5,2.12766
South America,"[Argentina, Bolivia, Brazil, Chile, Colombia, ...",14,5.957447
Oceania,"[American Samoa, Australia, Cocos (Keeling) Is...",27,11.489362
North America,"[Aruba, Anguilla, Netherlands Antilles, Antigu...",35,14.893617
Europe,"[Albania, Andorra, Austria, Belgium, Bulgaria,...",46,19.574468
Asia,"[Afghanistan, United Arab Emirates, Armenia, A...",49,20.851064
Africa,"[Angola, Burundi, Benin, Burkina Faso, Botswan...",59,25.106383


<worldly.dimensions.Dimension at 0x16b8dea50>

In [26]:
with open("../resources/states.txt", "r") as fh:
    secret = random.sample(fh.readlines(), k=1)[0].strip()
    print(secret)
# Select a random country and its dimensions
# random_state = q.sample(1)
# state_data = random_state.to_dict(orient="records")[0]  # Convert to dictionary
# print("Random State Data:", state_data)

Nicaragua


In [None]:
# Step 1: Fine-Tuning Dataset Preparation (Skip this if already fine-tuned)
# Training data for personality-specific responses (Pirate in this case)
train_data = [
    {
        "input": f"You are a Pirate NPC. Data: {state_data}.",
        "output": {
            "clues": [
                {"clue": "Arrr, matey! Where do barrels of coffee worth billions come from?", "difficulty": 3},
                {"clue": "Ye’ll find it in South America, next to the Amazon!", "difficulty": 6},
                {"clue": "Its name rhymes with 'Brazil'.", "difficulty": 9},
            ]
        },
    }
]


# Step 2: Tokenize the Training Data
def preprocess(data, tokenizer):
    return tokenizer(
        f"Input: {json.dumps(data['input'])} Output: {json.dumps(data['output'])}",
        truncation=True,
        padding="max_length",
        max_length=512,
    )


# Step 3: Fine-Tune the Model (Skip if already fine-tuned)
def fine_tune_model():
    tokenizer = AutoTokenizer.from_pretrained("mistral-7b")
    model = AutoModelForCausalLM.from_pretrained("mistral-7b")

    tokenized_data = [preprocess(d, tokenizer) for d in train_data]

    training_args = TrainingArguments(
        output_dir="./fine_tuned_mistral",
        per_device_train_batch_size=2,
        num_train_epochs=3,
        save_steps=500,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_data,
    )

    trainer.train()
    model.save_pretrained("./fine_tuned_mistral")
    tokenizer.save_pretrained("./fine_tuned_mistral")


# Uncomment this to fine-tune (only needs to be run once)
# fine_tune_model()


# Step 4: Load Fine-Tuned Model
def load_model():
    model = AutoModelForCausalLM.from_pretrained("./fine_tuned_mistral")
    tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_mistral")
    return model, tokenizer


model, tokenizer = load_model()


# Step 5: Generate Clues with Fine-Tuned Model
def generate_clues(state_data, personality="Pirate"):
    # Create a prompt for the fine-tuned model
    prompt = f"You are a {personality} NPC. Data: {state_data}. Task: Generate three clues with difficulty levels from 1 to 10."

    # Tokenize input and generate output
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=200)

    # Decode and parse response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response


# Generate clues for the random state
clues_response = generate_clues(state_data)
print("\nGenerated Clues:\n", clues_response)

# Step 6: Validate Generated Clues Against Schema
schema = {
    "type": "object",
    "properties": {
        "clues": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "clue": {"type": "string"},
                    "difficulty": {"type": "integer", "minimum": 1, "maximum": 10},
                },
                "required": ["clue", "difficulty"],
            },
        }
    },
    "required": ["clues"],
}


def validate_clues(clues_response):
    try:
        clues_json = json.loads(clues_response)
        validate(instance=clues_json, schema=schema)
        print("\nValidated Clues:", clues_json)
        return clues_json
    except Exception as e:
        print("\nValidation Error:", e)
        return None


validated_clues = validate_clues(clues_response)


# Step 7: Integration into the Game (Example Usage)
def game_engine(validated_clues):
    if validated_clues:
        for clue in validated_clues["clues"]:
            print(f"Clue: {clue['clue']} (Difficulty: {clue['difficulty']})")
    else:
        print("No valid clues generated. Try again!")


game_engine(validated_clues)