# Interactive Text Game
- v2: 25 Feb 2025

- Created: 24 Feb 2025

- You can come up with your own scenario, and the game will give you 3 options to choose to advance the story
- If you do not like the options, you can also come up with your own options

- Game keeps track of your location and your emotion for each game state

- Text and image will be generated at each step

- If you do not want the image generation, then choose "Use Default Image"

- Preparation: In your .env file, add in the OPENAI_API_KEY, or the API keys of the LLM you are using

- There is also an Images and Music folder

In [1]:
import os
import random
from dotenv import load_dotenv
import gradio as gr
from agentjo import strict_json
from openai import OpenAI  # Import OpenAI for both text and image generation
import requests
from io import BytesIO
from PIL import Image
import json

# Load environment variables (make sure OPENAI_API_KEY and PIXABAY_API_KEY are defined in your .env)
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# If not set, default to an empty string.
PIXABAY_API_KEY = os.getenv("PIXABAY_API_KEY", "")

# Instantiate a global OpenAI client.
client = OpenAI()

# LLM wrapper using OpenAI's API (always using model "gpt-4o-mini")
def llm(system_prompt: str, user_prompt: str) -> str:
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        temperature=0.9,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return response.choices[0].message.content.strip()

# Updated JSON schema with a condensed description field.
game_schema = {
    "description": "A narrative description of the future predicted situation, type: str",
    "options": "A list of three possible future moves, type: List[str]",
    "state": {
        "emotion": "Suitable emotion based on description, type: Enum['calm', 'excited', 'sleepy', 'happy', 'neutral', 'sad', 'scared', 'other']",
        "location": "A suitable physical location based on description, type: str",
        "summary": "Summary of events thus far, type: str"
    },
    "condensed_description": "A condensed description of up to ten words describing the location and the main action taken, type: str",
}

# Default persistent state: neutral emotion, unknown location.
default_state = {"emotion": "neutral", "location": "Unknown", "summary": ""}

# Updated system prompt with additional instructions.
initial_system_prompt = (
    "You are an interactive text adventure game. At each turn, output a narrative description of the future predicted situation, "
    "and list three distinct potential future moves. Also update the player's persistent state. "
    "The persistent state includes 'emotion' (the player's emotional state, which can shift dramatically as the story unfolds) and 'location' (the player's current location). "
    "For the starting scenario, use the player's input to set the scene and location. The input may indicate a mysterious dungeon, a bustling city, a futuristic space station, an enchanted glade, or any other engaging environment. "
    "You start with a neutral emotion and the chosen location. "
    "Some encounters may have unexpected effects on the player's mood. "
    "Be detailed and give names and personalities to people you meet. "
    "Remember to keep all narrative descriptions and future options consistent with the chosen scenario. "
    "Always output valid JSON exactly as specified by the following schema."
)

# Define a list of stock image URLs.
stock_image_urls = [
    "https://res.cloudinary.com/demo/image/upload/v1312461204/sample.jpg",
]

def get_stock_image_url():
    """
    Return a random stock image URL.
    Optionally, add logic here to check for local files in the 'Images' folder.
    """
    images_folder = "Images"
    if os.path.isdir(images_folder):
        local_images = [os.path.join(images_folder, f) for f in os.listdir(images_folder)
                        if f.lower().endswith((".png", ".jpg", ".jpeg", ".gif"))]
        if local_images:
            local_image = random.choice(local_images)
            return local_image
    return random.choice(stock_image_urls)

# Modified get_image_url function to accept an optional API key.
def get_image_url(keyword, API_KEY=None):
    if API_KEY is None or API_KEY == "":
        return None
    base_url = "https://pixabay.com/api/"
    params = {
        "key": API_KEY,
        "q": keyword[:100],
        "image_type": "photo",
        "safesearch": True
    }
    
    response = requests.get(base_url, params=params)
    if response.status_code != 200:
        print("Error:", response.text)
        return None

    data = response.json()
    hits = data.get("hits")
    if hits:
        return hits[0].get("webformatURL")
    else:
        print("No images found for the keyword:", keyword)
        return None

def generate_game_image(description, persistent_state):
    """
    Generate an image for the current game state.
    Based on the player's chosen image source, try the primary method and fall back in the specified order.
    """
    image_source = persistent_state.get("image_source", "dalle")
    condensed = persistent_state.get("condensed_description", description)
    
    if image_source == "dalle":
        try:
            response = client.images.generate(
                model="dall-e-2",
                prompt=description[:1000],
                size="1024x1024",
                quality="standard",
                n=1,
            )
            return response.data[0].url
        except Exception as e:
            print("DALL-E‑2 image generation failed, trying Pixabay. Error:", e)
            # Fallback to Pixabay using provided API key if available.
            pixabay_api_key = persistent_state.get("pixabay_api_key", None)
            image_url = get_image_url(condensed, API_KEY=pixabay_api_key)
            if image_url is not None:
                return image_url
            else:
                return get_stock_image_url()
    elif image_source == "pixabay":
        pixabay_api_key = persistent_state.get("pixabay_api_key", None)
        image_url = get_image_url(condensed, API_KEY=pixabay_api_key)
        if image_url is not None:
            return image_url
        else:
            return get_stock_image_url()
    elif image_source == "stock":
        return get_stock_image_url()

def get_image_from_url(url):
    if url.startswith("http://") or url.startswith("https://"):
        response = requests.get(url)
        return Image.open(BytesIO(response.content))
    else:
        return Image.open(url)

# Main game step function.
def game_step(chosen_input, state):
    conversation_history = state.get("conversation_history", [])
    persistent_state = state.get("persistent_state", default_state)
    current_options = state.get("options", [])

    conversation_history, persistent_state, current_options, narrative, opt_labels = update_game_state(
        conversation_history, persistent_state, current_options, chosen_input
    )
    music_file = get_music_file(persistent_state['emotion'])
    image_url = generate_game_image(narrative, persistent_state)
    pil_image = get_image_from_url(image_url)
    
    new_state = {
        "conversation_history": conversation_history,
        "persistent_state": persistent_state,
        "options": current_options
    }
    state_info = (
        f"**Location:** {persistent_state.get('location', 'N/A')}\n\n"
        f"**Emotion:** {persistent_state.get('emotion', 'N/A')}\n\n"
    )
    return narrative, state_info, pil_image, new_state, opt_labels[0], opt_labels[1], opt_labels[2], music_file

def submit_action(custom_text, state):
    narrative, state_info, image, new_state, opt1, opt2, opt3, audio = game_step(custom_text, state)
    return narrative, state_info, image, new_state, opt1, opt2, opt3, audio, ""

def option_click(index, state):
    option_text = state["options"][index]
    return submit_action(option_text, state)

def get_music_file(emotion: str) -> str:
    emotion = emotion.lower()
    if "calm" in emotion:
        return "Music/calm.mp3"
    elif "excited" in emotion:
        return "Music/exciting.mp3"
    elif "sleepy" in emotion:
        return "Music/lullaby.mp3"
    elif "happy" in emotion:
        return "Music/happy.mp3"
    elif "neutral" in emotion:
        return "Music/cinematic.mp3"
    elif "sad" in emotion:
        return "Music/sad.mp3"
    elif "scared" in emotion:
        return "Music/scared.mp3"
    else:
        return "Music/cinematic.mp3"

# Updated function to include image_source and pixabay_api_key.
def generate_initial_state_with_scenario(scenario_choice: str, image_source: str, pixabay_api_key: str):
    # If no key was provided via UI, use the environment variable.
    if image_source == "Use Pixabay" and not pixabay_api_key:
        pixabay_api_key = PIXABAY_API_KEY
    # Updated prompt now explicitly mentions the desired scenario.
    user_prompt = (
        f"Player's desired scenario is '{scenario_choice}'. "
        f"Begin the adventure with an engaging introduction set in this scenario."
    )
    res = strict_json(
        system_prompt=initial_system_prompt,
        user_prompt=user_prompt,
        output_format=game_schema,
        llm=llm
    )
    persistent_state = res["state"]
    # Store the scenario in the persistent state.
    persistent_state["scenario"] = scenario_choice
    # Set image source based on the user selection.
    if image_source == "Use DALL-E 2":
        persistent_state["image_source"] = "dalle"
    elif image_source == "Use Pixabay":
        persistent_state["image_source"] = "pixabay"
        persistent_state["pixabay_api_key"] = pixabay_api_key
    elif image_source == "Use Default Image":
        persistent_state["image_source"] = "stock"
    persistent_state["condensed_description"] = res["condensed_description"]
    conversation_history = [
        {"role": "system", "content": initial_system_prompt},
        {"role": "assistant", "content": str(res["description"])}
    ]
    return conversation_history, res["description"], res["options"], persistent_state

def update_game_state(history, persistent, current_options, chosen_input):
    history.append({"role": "user", "content": chosen_input})
    # Retrieve the desired scenario from persistent state.
    desired_scenario = persistent.get("scenario", "unspecified")
    # Updated prompt now includes the desired scenario in the context.
    user_prompt = (
        f"Conversation history:{history}\n"
        f"Persistent states:{persistent}\n"
        f"Player chose: {chosen_input}\n"
        f"Desired scenario: {desired_scenario}\n"
    )
    res = strict_json(
        system_prompt=initial_system_prompt,
        user_prompt=user_prompt,
        output_format=game_schema,
        llm=llm
    )
    new_state = res["state"]
    # Merge new state with persistent state; retain previous values if missing.
    for key in persistent:
        if key not in new_state or new_state[key] == "":
            new_state[key] = persistent[key]
    new_state["condensed_description"] = res["condensed_description"]
    new_state["conversation_history"] = history
    history.append({"role": "assistant", "content": str(res["description"])})
    
    if len(history) > 20:
        history = history[-20:]
        new_state["conversation_history"] = history
    
    narrative = res["description"]
    opt_labels = res["options"]
    while len(opt_labels) < 3:
        opt_labels.append("Wait")
    opt_labels = opt_labels[:3]
    return history, new_state, res["options"], narrative, opt_labels

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Interactive Text Adventure Game")
    
    with gr.Column():
        scenario_input = gr.Textbox(
            placeholder="Enter the type of scenario you want (e.g., mysterious dungeon, bustling city, futuristic space station, enchanted glade)...",
            label="Choose Your Scenario"
        )
        image_source_radio = gr.Radio(
            label="Select Image Generation Option",
            choices=["Use DALL-E 2", "Use Pixabay", "Use Default Image"],
            value="Use DALL-E 2"
        )
        # Create a container for the Pixabay API info and input.
        pixabay_container = gr.Column(visible=False)
        with pixabay_container:
            gr.HTML(
                '<p>Pixabay API Key (obtain from <a href="https://www.pixabay.com" target="_blank">www.pixabay.com</a> - it is free to use)</p>'
            )
            pixabay_api_key_input = gr.Textbox(
                placeholder="Enter your Pixabay API Key",
                label="Pixabay API Key",
                value=PIXABAY_API_KEY  # default from environment
            )
    
    # Toggle the visibility of the container based on the radio selection.
    def toggle_pixabay(api_choice):
        # If an API key is already set in the environment, keep the container hidden.
        if PIXABAY_API_KEY:
            return gr.update(visible=False)
        else:
            return gr.update(visible=(api_choice == "Use Pixabay"))
    
    image_source_radio.change(fn=toggle_pixabay, inputs=image_source_radio, outputs=pixabay_container)
    start_btn = gr.Button("Start Adventure")
    with gr.Row(visible=False) as game_ui:
        with gr.Column(scale=2):
            image_display = gr.Image(label="Game Scene", type="pil")
        with gr.Column(scale=1):
            narrative_box = gr.Markdown(label="Game Narrative")
            state_info_box = gr.Markdown(label="Current State")
            with gr.Row():
                option1_btn = gr.Button()
                option2_btn = gr.Button()
                option3_btn = gr.Button()
            user_input = gr.Textbox(
                placeholder="Enter your action here...",
                label="Your Action"
            )
            submit_btn = gr.Button("Submit Action")
    
    audio_player = gr.Audio(label="Background Music", type="filepath", interactive=False, autoplay=True)
    
    state_box = gr.State()

    def start_game(scenario_choice: str, image_source: str, pixabay_api_key: str):
        # If no key is provided via UI, use the environment variable.
        if image_source == "Use Pixabay" and not pixabay_api_key:
            pixabay_api_key = PIXABAY_API_KEY
        conversation_history, init_description, init_options, persistent_state = generate_initial_state_with_scenario(
            scenario_choice, image_source, pixabay_api_key
        )
        initial_music = get_music_file(persistent_state["emotion"])
        initial_image_url = generate_game_image(init_description, persistent_state)
        initial_image = get_image_from_url(initial_image_url)
        initial_state = {
            "conversation_history": conversation_history,
            "persistent_state": persistent_state,
            "options": init_options
        }
        state_info = (
            f"**Location:** {persistent_state.get('location', 'N/A')}\n\n"
            f"**Emotion:** {persistent_state.get('emotion', 'N/A')}\n\n"
        )
        return init_description, state_info, initial_image, initial_state, init_options[0], init_options[1], init_options[2], initial_music, gr.update(visible=True), ""
    
    start_btn.click(
        fn=start_game,
        inputs=[scenario_input, image_source_radio, pixabay_api_key_input],
        outputs=[narrative_box, state_info_box, image_display, state_box, option1_btn, option2_btn, option3_btn, audio_player, game_ui, scenario_input]
    )
    scenario_input.submit(
        fn=start_game,
        inputs=[scenario_input, image_source_radio, pixabay_api_key_input],
        outputs=[narrative_box, state_info_box, image_display, state_box, option1_btn, option2_btn, option3_btn, audio_player, game_ui, scenario_input]
    )
    
    submit_btn.click(
        fn=submit_action,
        inputs=[user_input, state_box],
        outputs=[narrative_box, state_info_box, image_display, state_box, option1_btn, option2_btn, option3_btn, audio_player, user_input]
    )
    user_input.submit(
        fn=submit_action,
        inputs=[user_input, state_box],
        outputs=[narrative_box, state_info_box, image_display, state_box, option1_btn, option2_btn, option3_btn, audio_player, user_input]
    )
    
    option1_btn.click(
        fn=lambda state: option_click(0, state),
        inputs=[state_box],
        outputs=[narrative_box, state_info_box, image_display, state_box, option1_btn, option2_btn, option3_btn, audio_player]
    )
    option2_btn.click(
        fn=lambda state: option_click(1, state),
        inputs=[state_box],
        outputs=[narrative_box, state_info_box, image_display, state_box, option1_btn, option2_btn, option3_btn, audio_player]
    )
    option3_btn.click(
        fn=lambda state: option_click(2, state),
        inputs=[state_box],
        outputs=[narrative_box, state_info_box, image_display, state_box, option1_btn, option2_btn, option3_btn, audio_player]
    )

demo.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


