In [1]:
# Load your OpenAI API key from a .env file
from dotenv import load_dotenv
import os

load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


In [2]:
import logging
import random
from bespokelabs import curator

# Enable batch logging to monitor how scenarios are processed
logger = logging.getLogger("bespokelabs.curator")
logger.setLevel(logging.INFO)


In [3]:
from pydantic import BaseModel, Field

class MilitaryScenario(BaseModel):
    scenario: str = Field(description="A comprehensive tactical military scenario.")

class MilitaryScenarioGenerator(curator.LLM):
    """LLM-based generator for comprehensive tactical military scenarios."""
    response_format = MilitaryScenario

    def prompt(self, input: dict) -> str:
        index = input["index"]
        return (
            f"Generate a comprehensive tactical military scenario #{index} with the following structure and details:\n\n"
            "1. SCENARIO OVERVIEW:\n"
            "- Brief narrative context (geopolitical background, conflict type, strategic objectives)\n"
            "- Time parameters (year/era, time of day, season)\n"
            "- Duration of engagement (expected or ongoing)\n\n"
            "2. TERRAIN ANALYSIS:\n"
            "- Primary terrain type (urban, mountainous, jungle, desert, arctic, littoral, etc.)\n"
            "- Key terrain features (elevation changes, choke points, cover/concealment)\n"
            "- Mobility corridors and obstacles\n"
            "- Weather conditions and visibility\n"
            "- Civilian presence and infrastructure\n\n"
            "3. FORCE COMPOSITION:\n"
            "BLUE FORCE (Friendly/Attacker):\n"
            "- Unit type/size/organization\n"
            "- Weapons systems (small arms, crew-served, armored, air support, artillery)\n"
            "- Logistics status (supply lines, ammunition, fuel, medical)\n"
            "- Communications capabilities\n"
            "- Training level and experience\n"
            "- Morale and emotional state\n\n"
            "RED FORCE (Enemy/Defender):\n"
            "- Unit type/size/organization\n"
            "- Weapons systems and capabilities\n"
            "- Defensive preparations (fortifications, obstacles, ambush positions)\n"
            "- Known logistics status\n"
            "- Training level and experience\n"
            "- Morale and emotional state\n\n"
            "4. TACTICAL PARAMETERS:\n"
            "- Current phase of operation (approach, attack, defense, withdrawal)\n"
            "- Known intelligence on enemy positions/dispositions\n"
            "- Rules of Engagement constraints\n"
            "- Potential COA (Courses of Action) for both sides\n\n"
            "5. DECISION POINTS:\n"
            "- Identify 2-3 critical tactical decisions required\n"
            "- Highlight key variables affecting outcomes\n"
            "- Note potential second-order effects\n\n"
            "SPECIAL REQUIREMENTS:\n"
            "- Maintain realistic force ratios and capabilities\n"
            "- Include plausible fog of war elements\n"
            "- Consider combined arms effects where applicable\n"
            "- Allow for multiple valid solutions to tactical problems\n"
            "- Format with clear section headers for easy parsing\n"
            "- Include quantifiable metrics where possible (distances, time estimates, force sizes)\n\n"
            "The scenario should enable rigorous analysis of:\n"
            "- Relative combat power calculation\n"
            "- Terrain exploitation\n"
            "- Force employment options\n"
            "- Risk assessment\n"
            "- Decision advantage opportunities"
        )

    def parse(self, input: dict, response: MilitaryScenario) -> dict:
        return {
            "index": input["index"],
            "scenario_description": response.scenario
        }

generator = MilitaryScenarioGenerator(
    model_name="gpt-4o-mini",
    backend="openai",
    batch=False
)


In [4]:
# Generate 150 scenario prompts with unique indices
inputs = [{"index": i + 1} for i in range(150)]

# Use the generator to process all inputs
scenarios = generator(inputs)


Output()

In [5]:
# Pick and print one scenario at random for review
random_scenario = random.choice(scenarios)
print(f"Scenario #{random_scenario['index']}:\n\n{random_scenario['scenario_description']}")


Scenario #101:

#101 - Tactical Military Scenario

## SCENARIO OVERVIEW:
- **Narrative Context:** In 2024, tensions escalate in the Baltics as a major political rift opens between NATO member states and a resurgent Russia. Following a border skirmish, Russian forces have established a fortified foothold in Estonia, threatening the sovereignty of neighboring NATO allies. The blue force (NATO) aims to reclaim territory and deter further incursions while ensuring regional stability.
- **Time Parameters:** Year: 2024, Time of Day: early morning (0600 hours), Season: early spring.
- **Duration of Engagement:** Expected duration is 72 hours, with immediate engagements anticipated as forces close in 

## TERRAIN ANALYSIS:
- **Primary Terrain Type:** Mixed urban/wooded terrain transitioning towards open fields.
- **Key Terrain Features:** 
   - Elevation change at a hillock near the town of Tartu, providing overwatch.
   - Choke points identified on the main access routes to the city, particul

In [6]:
from together import Together

load_dotenv()
os.environ["TOGETHER_API_KEY"] = os.getenv("TOGETHER_API_KEY")

client = Together(api_key=os.environ["TOGETHER_API_KEY"])


In [7]:
import time
from tqdm import tqdm
from collections import deque

# Together rate limiting setup
max_qpm = 60
window_seconds = 60
request_timestamps = deque()

# Optional performance logging
response_times = []
token_lengths = []
token_limit_hits = []
error_count = 0
max_tokens = 2048

# Output storage
attack_reasonings = []
defense_reasonings = []

# Loop through all scenarios
for scenario in tqdm(scenarios, desc="Generating Reasoning with DeepSeek"):
    idx = scenario["index"]
    description = scenario["scenario_description"]

    for side in ["attacker", "defender"]:
        prompt = f"""
<think>
You are a tactical reasoning model. Do not provide final strategies. Only explain the internal tactical reasoning process for the {side.upper()} in the scenario below.

Scenario:
{description}

Do not describe the other side. Think only from the perspective of the {side.upper()}.

</think>
""".strip()

        # Rate limit enforcement
        now = time.time()
        request_timestamps.append(now)
        while len(request_timestamps) > max_qpm:
            if now - request_timestamps[0] < window_seconds:
                sleep_time = window_seconds - (now - request_timestamps[0])
                print(f"⏳ Rate limit hit — sleeping {sleep_time:.1f}s...")
                time.sleep(sleep_time)
            else:
                request_timestamps.popleft()

        # Primary attempt
        try:
            req_start = time.time()
            response = client.chat.completions.create(
                model="deepseek-ai/DeepSeek-R1",
                messages=[{"role": "user", "content": prompt}],
                stop=["</think>"],
                max_tokens=max_tokens
            )
            req_end = time.time()
            response_times.append(req_end - req_start)

            text = response.choices[0].message.content.strip()
            clean_text = text.replace("\n", " ").replace("\r", " ").strip() + " </think>"

        # Retry on error
        except Exception as e:
            error_count += 1
            print(f"⚠️ Error on scenario {idx} ({side}): {e}")
            print("⏱️ Retrying after 60 seconds...")
            time.sleep(60)
            try:
                retry_start = time.time()
                response = client.chat.completions.create(
                    model="deepseek-ai/DeepSeek-R1",
                    messages=[{"role": "user", "content": prompt}],
                    stop=["</think>"],
                    max_tokens=max_tokens
                )
                retry_end = time.time()
                response_times.append(retry_end - retry_start)

                text = response.choices[0].message.content.strip()
                clean_text = text.replace("\n", " ").replace("\r", " ").strip() + " </think>"

            except Exception as e2:
                error_count += 1
                print(f"❌ Retry failed on scenario {idx} ({side}): {e2}")
                clean_text = "<think>Error generating reasoning</think>"
                response_times.append(0.0)

        # Save result
        if side == "attacker":
            attack_reasonings.append(clean_text)
        else:
            defense_reasonings.append(clean_text)


Generating Reasoning with DeepSeek:  42%|████▏     | 63/150 [1:28:59<2:11:14, 90.52s/it] 

⚠️ Error on scenario 64 (attacker): Error communicating with API: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
⏱️ Retrying after 60 seconds...


Generating Reasoning with DeepSeek:  81%|████████▏ | 122/150 [2:57:23<31:29, 67.47s/it]  

⚠️ Error on scenario 123 (defender): Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Generating Reasoning with DeepSeek:  82%|████████▏ | 123/150 [2:59:51<41:16, 91.70s/it]

⚠️ Error on scenario 124 (attacker): Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...
❌ Retry failed on scenario 124 (attacker): Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}


Generating Reasoning with DeepSeek: 100%|██████████| 150/150 [3:31:41<00:00, 84.68s/it] 


In [10]:
import random

sample = random.choice(list(zip(scenarios, attack_reasonings, defense_reasonings)))

print(f"Scenario #{sample[0]['index']}:\n\n{sample[0]['scenario_description']}")
print("\n--- ATTACK REASONING ---\n")
print(sample[1])
print("\n--- DEFENSE REASONING ---\n")
print(sample[2])


Scenario #101:

#101 - Tactical Military Scenario

## SCENARIO OVERVIEW:
- **Narrative Context:** In 2024, tensions escalate in the Baltics as a major political rift opens between NATO member states and a resurgent Russia. Following a border skirmish, Russian forces have established a fortified foothold in Estonia, threatening the sovereignty of neighboring NATO allies. The blue force (NATO) aims to reclaim territory and deter further incursions while ensuring regional stability.
- **Time Parameters:** Year: 2024, Time of Day: early morning (0600 hours), Season: early spring.
- **Duration of Engagement:** Expected duration is 72 hours, with immediate engagements anticipated as forces close in 

## TERRAIN ANALYSIS:
- **Primary Terrain Type:** Mixed urban/wooded terrain transitioning towards open fields.
- **Key Terrain Features:** 
   - Elevation change at a hillock near the town of Tartu, providing overwatch.
   - Choke points identified on the main access routes to the city, particul

In [17]:
from huggingface_hub import HfApi, HfFolder
from datasets import Dataset

load_dotenv()

# Load HF API key
hf_api_key = os.getenv("HF_API_KEY")

# Save token to local Hugging Face config
HfFolder.save_token(hf_api_key)

# Create a Hugging Face API client
api = HfApi()


In [12]:
# Merge scenarios and generated reasonings
final_dataset = []

for i in range(len(scenarios)):
    final_dataset.append({
        "index": scenarios[i]["index"],
        "scenario_description": scenarios[i]["scenario_description"],
        "attack_reasoning": attack_reasonings[i],
        "defense_reasoning": defense_reasonings[i]
    })


In [13]:
import pandas as pd

df_preview = pd.DataFrame(final_dataset)

print(df_preview.head(3))
print("\nDataset columns:", df_preview.columns.tolist())


   index                               scenario_description  \
0      1  ### SCENARIO OVERVIEW:\n- **Narrative Context:...   
1      2  ### SCENARIO OVERVIEW:\n\n- **Narrative Contex...   
2      3  ### SCENARIO OVERVIEW:\n- **Context**: The yea...   

                                    attack_reasoning  \
0  <think> Okay, so I need to figure out the tact...   
1  <think> Okay, let's break down the tactical re...   
2  <think> Okay, so I need to figure out the tact...   

                                   defense_reasoning  
0  <think> Okay, so I need to figure out the tact...  
1  <think> Okay, so I need to figure out the tact...  
2  <think> Okay, so I need to figure out the tact...  

Dataset columns: ['index', 'scenario_description', 'attack_reasoning', 'defense_reasoning']


In [14]:
from datasets import Dataset

# Turn the list of dictionaries into a Hugging Face Dataset object
dataset = Dataset.from_list(final_dataset)

# Preview it quickly
dataset


Dataset({
    features: ['index', 'scenario_description', 'attack_reasoning', 'defense_reasoning'],
    num_rows: 150
})

In [19]:
# Push to Hugging Face Hub
dataset.push_to_hub("ZennyKenny/tactical-military-reasoning-v.1.0",
                    token=os.getenv("HF_API_KEY"))


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/ZennyKenny/tactical-military-reasoning-v.1.0/commit/5e032b94f78de6abcd96462067b7410c0620d28b', commit_message='Upload dataset', commit_description='', oid='5e032b94f78de6abcd96462067b7410c0620d28b', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/ZennyKenny/tactical-military-reasoning-v.1.0', endpoint='https://huggingface.co', repo_type='dataset', repo_id='ZennyKenny/tactical-military-reasoning-v.1.0'), pr_revision=None, pr_num=None)