In [14]:
%reload_ext autoreload
%autoreload 2

import getpass
import os
import json
import mlflow 
from openai import OpenAI

In [15]:
os.environ["MLFLOW_TRACKING_URI"] = "http://0.0.0.0:5001"
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter OPEN API key:")

openai = OpenAI()
mlflow.set_experiment("assignment-5-evaluate-llm")
mlflow.openai.autolog()

In [16]:
@mlflow.trace
def evaluation(prompt: str, deck_data: any):
    """
    Function to evaluate a Clash Royale deck using OpenAI API.
    """

    evaluation_prompt = """
#  Identity
You are a professional Clash Royale coach and deck analyst.

#  Instructions
Your task is to evaluate any 8-card Clash Royale deck using a structured and in-depth scoring system inspired by Deckshop. Your evaluation must include both **numeric ratings** and **detailed explanations** based on:

- Individual card roles
- Meta relevance
- Synergy potential (classic + emerging combos)
- Overall deck balance
- Spell composition

---

 **Rate the deck from 1 to 10** in the following categories:

1. **Overall Power** ‚Äì How viable the deck is across ladder, global tournaments, and special challenges.
2. **Defense** ‚Äì Assess how well the deck can handle:
   -  Air threats (e.g., Minions, Balloon, Lava Hound)
   -  Swarms (e.g., Skeleton Army, Bats)
   - Tanks (e.g., Giant, Royal Giant, Golem)
   - Splash resistance
   -  Spell defense (e.g., vs Goblin Barrel, Graveyard)
3. **Attack** ‚Äì Consider:
   - Presence of a clear **win condition**
   - Strength of **support troops**
   - **Breakthrough ability** (vs buildings, swarms)
   - **Pressure** (dual-lane, bridge spam, counter-push)
4. **Synergy** ‚Äì Evaluate:
   - Known combo effectiveness (e.g., Miner + Poison, Hog + Ice Spirit)
   - Cycle consistency and elixir pacing
   - Spell synergy (e.g., Log + Fireball)
   - Role diversity (tank, DPS, splash, control)
   - Potential for **new synergy discovery**
5. **Versatility** ‚Äì Rate:
   - Matchups vs all major archetypes (siege, bait, beatdown, etc.)
   - Recovery ability after a bad rotation
   - Adaptability in both ladder and competitive modes
   - Flexibility for switching between offense and defense
6. difficulty of the deck:
   - how easy it is to play this deck on different levels and how much experience is needed
   - 1 being the easiest and 10 being the hardest only pros can play

---

**Average Elixir Cost**
Return the float as provided (e.g., `3.50`).

---

 **Card Role & Spell Balance Guidelines**

Spells are divided into:
-  **Small Spells** (): Log, Zap, Snowball, Barbarian Barrel, Arrows, Goblin Curse
-  **Big Spells** (): Fireball, Poison, Lightning, Rocket, Earthquake

 Every well-balanced deck **usually  includes one small and one big spell.**


---
 **Deck Usage**
-in the end of the output, yu must give it as a comments:
1 liner explanation of where and when and how to use each card
-saying which cards are defenders, which ones are win condintions and which ones are support
format should be:
-card 1 explanation
-card 2 explanation, etc.

 **Deck Archetype Classification**
Choose the most accurate one:

- Beatdown
- Hybrid Beatdown
- Sparky Beatdown
- Air Beatdown
- Control
- Graveyard Control
- Royal Giant Control
- Splashyard
- Cycle
- Hog Cycle
- Mega Minion Cycle
- Miner Wall Breakers Cycle
- Bridge Spam
- Siege
- Spell Bait
- Off-Meta / Experimental
- Troll / Meme Deck

---

 **Deck Input Format (JSON)**:

Return a JSON object with these fields:
- deck_name: string
- average_elixir: number
- cards: list of strings (card names)
- comment: string (markdown summary of the deck‚Äôs analysis, including key points, pros, and cons)

Note: Keep `comment` brief‚Äîlimit to 6 or 7 sentences max.

**Evaluate this deck:**

{{deck_json}}

- After evaluating the deck, give the comment of explanation of strong and weak sides of deck, synergies, and how, where and when spawn the cards. Also say which are win condintions of the deck, what is synergetic duo/trios and etc.

---

### Output JSON Format:

```json
{
  "overall": X,
  "defense": X,
  "attack": X,
  "synergy": X,
  "versatility": X,
  "avg_elixir": X.XX,
  "difficulty": X,
  "deck_type": "Deck Archetype",
  "comments": "format the output as said here for some given deck:  **Deck Usage**
  comments:
Hog Rider - Win condition, apply pressure, punish opponent's mistakes
Mega Knight - Tank and splash damage, counter big pushes
Firecracker - Support, splash damage, anti-air defense
Skeletons - Cycle, distract, chip damage
Ice Spirit - Cycle, freeze, support Hog pushes
Tesla - Defensive building, distracts, counters tanks
Fireball - Spell, support for Hog pushes, eliminate swarms
The Log - Spell, clear swarms, push back units, support Hog pushes

This Hog Cycle Control deck excels in defense, with the Mega Knight and Tesla providing sturdy defense against various threats. The Hog Rider serves as the primary win condition, applying pressure and punishing mistakes. Firecracker adds splash damage and anti-air support, complementing the
Hog pushes. The deck has good synergy and cycle consistency, allowing for quick and effective gameplay. The versatile card selection enables adaptability in different matchups and scenarios, making it a solid choice for ladder and competitive play."
}
```
"""

    # Load the prompt instructions from a markdown file
    evaluation_prompt = evaluation_prompt.replace(
        "{{deck_json}}",
        json.dumps(deck_data),
    )

    completion = openai.chat.completions.create(
        model="gpt-4.1",
        response_format={"type": "json_object"},
        messages=[
            {
                "role": "system",
                "content": "You are a Clash Royale deck evaluation assistant.",
            },
            {
                "role": "user",
                "content": evaluation_prompt,
            }
        ],
    )

    content = completion.choices[0].message.content
    print("Raw model output:", repr(content))

    # Attempt to parse the content as JSON
    try:
        parsed = json.loads(content)
    except json.JSONDecodeError as e:
        print(f"Failed to parse evaluation response as JSON: {e}")
        print("Raw content:", content)
        raise

    with mlflow.start_run():
        mlflow.log_param("user prompt", prompt)
        mlflow.log_param("deck_name", deck_data["deck_name"])
        mlflow.log_param("average_elixir_cost", deck_data["average_elixir_cost"])
        mlflow.log_param("cards", deck_data["cards"])

        mlflow.log_metric("eval_overall", parsed["overall"])
        mlflow.log_metric("eval_defense", parsed["defense"])
        mlflow.log_metric("eval_attack", parsed["attack"])
        mlflow.log_metric("eval_synergy", parsed["synergy"])
        mlflow.log_metric("eval_versatility", parsed["versatility"])
        mlflow.log_metric("eval_avg_elixir", parsed["avg_elixir"])
        mlflow.log_metric("eval_difficulty", parsed["difficulty"])


@mlflow.trace
def ask_agent(user_input: str):
    """
    Function to ask for deck advice from the OpenAI API.
    """

    instruction_prompt = """
## Identity
You are a Clash Royale deck-building assistant. Provide concise, practical deck-building advice.
    
___

## Instructions
- Output only viable decks.
- Consider user's available cards if provided.
- Reference meta decks only if relevant.
- Include average elixir cost and key stats.
- Avoid unnecessary explanations.
- Output must be valid JSON matching the provided schema.

___

## Output Format

Return a JSON object with these fields:
- deck_name: string
- average_elixir_cost: number
- cards: list of strings (card names)
- comment: string (markdown summary of the deck‚Äôs analysis, including key points, pros, and cons)

Note: Keep `comment` brief‚Äîlimit to 6 or 7 sentences max.

___

## Current Meta Decks

```json
[
    {"deck_name": "Giant Wizard Control", "average_elixir_cost": 3.9, "cards": [ "Giant", "Mini P.E.K.K.A", "Wizard", "Arrows", "Bomber", "Musketeer", "Valkyrie", "Electro Spirit" ]},
    {"deck_name": "Anti-Swarm Control", "average_elixir_cost": 3.6, "cards": [ "Knight", "Archers", "Wizard", "Arrows", "Bomber", "Mini P.E.K.K.A", "Giant", "Fireball" ]},
]
```
"""

    response = openai.responses.create(
        model="gpt-4.1",
        input=[
            {"role": "system", "content": instruction_prompt },
            {"role": "user", "content": user_input},
        ],
        text={"format": {"type": "json_object"}}
    )
    content = response.output_text
    deck_data = json.loads(content)

    evaluation(user_input, deck_data)

In [17]:
ask_agent("""
I need a deck for Hog Cycle Control. I have the following cards available: Mega Knight, Firecracker, Tesla, Hog Rider, Skeletons, Ice Spirit, Fireball, The Log.
Please provide a deck that is effective in the current meta, with a focus on Hog Cycle Control.
""")

ask_agent("""
I want to counter a Giant deck with a Hog Cycle Control deck. I have the following cards available: Mega Knight, Firecracker, Tesla, Hog Rider, Skeletons, Ice Spirit, Fireball, The Log.
Please provide a deck that is effective in the current meta, with a focus on Hog Cycle Control.
""")

ask_agent("""
I need a deck for Wizard Control. I have the following cards available: Giant, Mini P.E.K.K.A, Wizard, Arrows, Bomber, Musketeer, Valkyrie, Electro Spirit.
Please provide a deck that is effective in the current meta, with a focus on Wizard Control.
""")

Raw model output: '{\n  "overall": 8,\n  "defense": 8,\n  "attack": 7,\n  "synergy": 8,\n  "versatility": 8,\n  "avg_elixir": 2.90,\n  "difficulty": 5,\n  "deck_type": "Hog Cycle",\n  "comments": "comments:\\nHog Rider - Win condition, apply pressure, punish opponent\'s mistakes\\nMega Knight - Tank and splash damage, counter big pushes\\nFirecracker - Support, splash damage, anti-air defense\\nSkeletons - Cycle, distract, chip damage\\nIce Spirit - Cycle, freeze, support Hog pushes\\nTesla - Defensive building, distracts, counters tanks\\nFireball - Spell, support for Hog pushes, eliminate swarms\\nThe Log - Spell, clear swarms, push back units, support Hog pushes\\n\\nThis Hog Cycle Control deck excels in defense, with the Mega Knight and Tesla providing sturdy defense against various threats. The Hog Rider serves as the primary win condition, applying pressure and punishing mistakes. Firecracker adds splash damage and anti-air support, complementing the Hog pushes. The deck has good