<a href="https://colab.research.google.com/github/lonespear/ma206x/blob/main/emd_agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [48]:
class EMD:
    def __init__(self, n_soldiers=100, n_billets=40, seed=None):
        self.n_soldiers = n_soldiers
        self.n_billets = n_billets
        if seed:
            np.random.seed(seed)

        # reference categories
        self.bases = ['FBNC', 'JBLM', 'JBER', 'FHTX', 'FBGA']
        self.paygrades = ['E-3','E-4','E-5','E-6']
        self.mos_list = ['11B','68W','25U','35F','12B','92Y','88M','42A','15T','31B']
        self.clearances = ['None','Secret','TS']
        self.pme = ['None','BLC','ALC','SLC']
        self.languages = ['None','Spanish','Arabic','French']

        # generate all datasets upon instantiation
        self.soldiers = self._generate_soldiers()
        self.billets = self._generate_billets()
        self.pcs_costs = self._generate_pcs_costs()
        self.policies = self._generate_policies()

    # ------------------------
    # Soldier generator
    # ------------------------
    def _generate_soldiers(self):
        n = self.n_soldiers
        df = pd.DataFrame({
            "soldier_id": range(1, n+1),
            "base": np.random.choice(self.bases, n, p=[0.15,0.1,0.25,0.4,0.1]),
            "paygrade": np.random.choice(self.paygrades, n, p=[0.2,0.4,0.25,0.15]),
            "mos": np.random.choice(self.mos_list, n),
            "skill_level": np.random.choice([1,2,3,4,5], n, p=[0.3,0.4,0.2,0.08,0.02]),
            "clearance": np.random.choice(self.clearances, n, p=[0.1,0.75,0.15]),
            "pme": np.random.choice(self.pme, n, p=[0.3,0.35,0.25,0.1]),
            "airborne": np.random.choice([0,1], n, p=[0.7,0.3]),
            "pathfinder": np.random.choice([0,1], n, p=[0.95,0.05]),
            "ranger": np.random.choice([0,1], n, p=[0.95,0.05]),
            "umo": np.random.choice([0,1], n, p=[0.95,0.05]),
            "m4_score": np.round(np.clip(40*np.random.beta(10, 2, size=n), 23, 40)).astype(int),
            "acft_score": np.clip(np.random.normal(loc=450, scale=60, size=n), 360, 600),
            "body_composition_pass": np.random.choice([0,1], n, p=[0.1,0.9]),
            "asi_air_assault": np.random.choice([0,1], n, p=[0.85,0.15]),
            "asi_sniper": np.random.choice([0,1], n, p=[0.97,0.03]),
            "asi_jumpmaster": np.random.choice([0,1], n, p=[0.95,0.05]),
            "driver_license": np.random.choice(
                ["None","HMMWV","LMTV","JLTV"], n, p=[0.4,0.3,0.2,0.1]
            ),
            "med_cat": np.random.choice([1,2,3,4], n, p=[0.7,0.2,0.08,0.02]),
            "dental_cat": np.random.choice([1,2,3,4], n, p=[0.8,0.15,0.04,0.01]),
            "language": np.random.choice(self.languages, n, p=[0.7,0.15,0.1,0.05]),
            "dwell_months": np.random.randint(0, 37, n),
            "available_from": [
                (datetime.today() + timedelta(days=int(x))).strftime("%Y-%m-%d")
                for x in np.random.randint(0, 365, n)
            ]
        })
        return df

    # ------------------------
    # Billet generator
    # ------------------------
    def _generate_billets(self):
        m = self.n_billets
        df = pd.DataFrame({
            "billet_id": range(101, 101+m),
            "base": np.random.choice(self.bases, m, p=[0.2,0.15,0.2,0.35,0.1]),
            "priority": np.random.choice([1,2,3], m, p=[0.3,0.5,0.2]),
            "mos_required": np.random.choice(self.mos_list, m,
                        p=[0.2,0.15,0.1,0.1,0.15,0.1,0.1,0.05,0.03,0.02]),
            "min_rank": np.random.choice(['E-3','E-4','E-5'], m, p=[0.3,0.5,0.2]),
            "max_rank": np.random.choice(['E-4','E-5','E-6'], m, p=[0.2,0.5,0.3]),
            "skill_level_req": np.random.choice([1,2,3], m, p=[0.5,0.35,0.15]),
            "clearance_req": np.random.choice(self.clearances, m, p=[0.05,0.7,0.25]),
            "start_date": [
                (datetime.today() + timedelta(days=int(x))).strftime("%Y-%m-%d")
                for x in np.random.randint(0, 180, m)
            ]
        })
        return df

    # ------------------------
    # PCS cost matrix
    # ------------------------
    def _generate_pcs_costs(self):
        rows = []
        for i,a in enumerate(self.bases):
            for j,b in enumerate(self.bases):
                if a == b:
                    cost = 0
                else:
                    # symmetric cost
                    np.random.seed(i*10+j)
                    cost = np.random.randint(800, 3500)
                rows.append((a,b,cost))
        return pd.DataFrame(rows, columns=["from_base","to_base","pcs_cost_usd"])

    # ------------------------
    # Policy knobs
    # ------------------------
    def _generate_policies(self):
        return pd.DataFrame([
            ("min_dwell_months_for_pcs", 6),
            ("airborne_required_penalty", 1000),
            ("language_required_penalty", 1000),
            ("clearance_mismatch_penalty", 2000),
            ("preference_bonus_same_base", -150),
            ("preference_bonus_pref1", -100),
            ("preference_bonus_pref2", -50),
            ("priority_weight_high", 3),
            ("priority_weight_med", 2),
            ("priority_weight_low", 1),
        ], columns=["parameter","value"])


In [49]:
scenario = EMD(n_soldiers=200, n_billets=40, seed=1991)

In [52]:
scenario.soldiers

Unnamed: 0,soldier_id,base,paygrade,mos,skill_level,clearance,pme,airborne,pathfinder,ranger,...,body_composition_pass,asi_air_assault,asi_sniper,asi_jumpmaster,driver_license,med_cat,dental_cat,language,dwell_months,available_from
0,1,FHTX,E-4,35F,3,Secret,BLC,0,0,0,...,1,0,0,0,HMMWV,2,3,,9,2025-12-11
1,2,FBNC,E-5,92Y,4,TS,BLC,0,1,0,...,0,0,0,0,,1,1,,18,2026-06-22
2,3,JBER,E-5,35F,3,Secret,,1,0,0,...,1,0,0,0,,1,2,,12,2026-08-25
3,4,JBLM,E-4,11B,3,TS,,1,0,0,...,1,1,0,0,HMMWV,1,1,,23,2026-06-27
4,5,JBER,E-5,42A,2,TS,SLC,0,0,0,...,1,0,0,0,HMMWV,1,1,Arabic,10,2025-09-30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,196,JBER,E-6,88M,2,Secret,BLC,1,0,0,...,1,1,0,0,HMMWV,1,2,,32,2026-07-15
196,197,FHTX,E-4,31B,2,Secret,SLC,0,0,1,...,1,0,0,0,HMMWV,1,1,Spanish,23,2025-10-21
197,198,JBER,E-3,31B,3,Secret,BLC,1,0,0,...,1,0,0,1,HMMWV,3,1,Spanish,13,2026-06-13
198,199,FHTX,E-3,88M,2,Secret,BLC,0,0,0,...,1,0,0,0,,1,2,Arabic,12,2025-11-24


In [1]:


import numpy as np
import pandas as pd
from datetime import datetime, timedelta

class SyntheticManningData:
    def __init__(self, n_soldiers=100, seed=None):
        self.n_soldiers = n_soldiers
        if seed is not None:
            np.random.seed(seed)

        self.bases = ['FBNC','JBLM','JBER','FHTX','FBGA']
        self.mos_list = ['11B','68W','25U','35F','12B','92Y','88M','42A','15T','31B']
        self.languages = ['None','Spanish','Arabic','French']

        self.paygrade_mix = {"E-3":0.25, "E-4":0.4, "E-5":0.25, "E-6":0.1}
        self.params = paygrade_params  # attach the dict above

        self.soldiers = self._generate_soldiers()

    paygrade_params = {
    "E-3": {
        "acft_mean": 420, "acft_sd": 65,
        "m4_alpha": 8, "m4_beta": 3,
        "pme": ["None"], "pme_p": [1.0],
        "clearance": ["None","Secret","TS"], "clearance_p": [0.2,0.7,0.1],
        "asi_probs": {"airborne":0.2, "ranger":0.01, "pathfinder":0.02, "umo":0.05}
    },
    "E-4": {
        "acft_mean": 440, "acft_sd": 60,
        "m4_alpha": 9, "m4_beta": 2,
        "pme": ["None","BLC"], "pme_p": [0.5,0.5],
        "clearance": ["None","Secret","TS"], "clearance_p": [0.15,0.75,0.1],
        "asi_probs": {"airborne":0.25, "ranger":0.03, "pathfinder":0.05, "umo":0.1}
    },
    "E-5": {
        "acft_mean": 470, "acft_sd": 55,
        "m4_alpha": 10, "m4_beta": 1.8,
        "pme": ["BLC","ALC"], "pme_p": [0.6,0.4],
        "clearance": ["None","Secret","TS"], "clearance_p": [0.1,0.75,0.15],
        "asi_probs": {"airborne":0.35, "ranger":0.05, "pathfinder":0.07, "umo":0.15}
    },
    "E-6": {
        "acft_mean": 480, "acft_sd": 50,
        "m4_alpha": 11, "m4_beta": 1.5,
        "pme": ["ALC","SLC"], "pme_p": [0.7,0.3],
        "clearance": ["None","Secret","TS"], "clearance_p": [0.05,0.75,0.2],
        "asi_probs": {"airborne":0.4, "ranger":0.08, "pathfinder":0.1, "umo":0.2}
        }
    }

    def _make_soldier(self, idx, pg):
        p = self.params[pg]

        acft = int(np.clip(np.random.normal(p["acft_mean"], p["acft_sd"]),200,600))
        m4   = int(np.clip(round(40*np.random.beta(p["m4_alpha"], p["m4_beta"])),23,40))

        asi_flags = {asi: np.random.choice([0,1], p=[1-prob, prob]) for asi,prob in p["asi_probs"].items()}

        return {
            "soldier_id": idx,
            "paygrade": pg,
            "base": np.random.choice(self.bases, p=[0.15,0.1,0.25,0.4,0.1]),
            "mos": np.random.choice(self.mos_list),
            "clearance": np.random.choice(p["clearance"], p=p["clearance_p"]),
            "pme": np.random.choice(p["pme"], p=p["pme_p"]),
            "language": np.random.choice(self.languages, p=[0.7,0.15,0.1,0.05]),
            "dwell_months": np.random.randint(0,37),
            "available_from": (datetime.today() + timedelta(days=np.random.randint(0,365))).strftime("%Y-%m-%d"),
            "acft_score": acft,
            "m4_score": m4,
            **asi_flags
        }

    def _generate_soldiers(self):
        pg_choices = np.random.choice(list(self.paygrade_mix.keys()),
                                      size=self.n_soldiers,
                                      p=list(self.paygrade_mix.values()))
        return pd.DataFrame([self._make_soldier(i,pg) for i,pg in enumerate(pg_choices, start=1)])
