## Agent Based Model Restream Analysis

#### Code to run Model

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import pyarrow as pa
import json
import mesa

pd.options.display.max_rows = 100

In [1]:
import sys
sys.path.append('C:\\Users\\a.poghosyan\\Documents\\aircrew_pipeline\\aircrew_batch_run_workflow\\aircrew_simulator')


In [2]:
from aircrew_simulator.run import run_batch

from aircrew_simulator.stages import MIOT
from aircrew_simulator.structure import (
    PilotBase,
    PipelineModelBase,
    Stage,
    StageManager,
    State,
    Results
)
from aircrew_simulator.analysis import (
    make_average_path,
    get_agents_data,
    make_time_series,
    make_average_times_path,
    make_stage_state_times_table,
    make_quantile_path,
)
from aircrew_simulator.model import PipelineModel
from typing import List

ModuleNotFoundError: No module named 'aircrew_simulator'

In [None]:
model_params =  {
    "version": "2.0.3-test",
    "simulation": {"steps": 120, "start_month": 11, "iterations": 5, "failure":0},
    "fj_streaming": "med",
    "init_pilots": {
        "mags": {"progressing": 0, "hold": 0},
        "eft": {"progressing": 0, "hold": 0},
        "bft": {"progressing": 0, "hold": 0},
        "fjlin": {"progressing": 0, "hold": 0},
        "ajt1": {"progressing": 0, "hold": 0},
        "ajt2": {"progressing": 0, "hold": 0},
        "melin": {"progressing": 0, "hold": 0},
        "mept": {"progressing": 0, "hold": 0},
        "mexo": {"progressing": 0, "hold": 0},
        "brt": {"progressing": 0, "hold": 0},
        "art": {"progressing": 0, "hold": 0},
        "artmar": {"progressing": 0, "hold": 0},
    },
    "variance": {
        "miot": {"new_pilots": 0},
        "mags": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "eft": {
            "drop_out_stream": 0,
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "fjlin": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "bft": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "ajt1": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "ajt2": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "melin": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "mept": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "mexo": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "brt": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "art": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
        "artmar": {
            "drop_out_progressing": 0,
            "capacity_progressing": 0,
            "time_progressing": 0,
        },
    },
    "pipeline": {
        "miot": {"new_pilots": 11, "input_rate": 1, "time_hold": 120},
        "mags": {
            "drop_out_progressing": 0.12,
            "drop_out_hold": 0,
            "capacity_progressing": 21,
            "time_progressing": 2,
            "time_hold": 120,
        },
        "eft": {
            "drop_out_progressing": 0.09,
            "drop_out_stream": 0.6,
            "drop_out_hold": 0,
            "capacity_progressing": 11,
            "time_progressing": 6,
            "time_hold": 120,
        },
        "fjlin": {
            "drop_out_progressing": 0.0,
            "drop_out_hold": 0.0,
            "capacity_progressing": 10,
            "time_progressing": 2,
            "time_hold": 120,
        },
        "bft": {
            "drop_out_progressing": 0.05,
            "drop_out_hold": 0,
            "time_progressing": 13,
            "capacity_progressing": 4,
            "time_hold": 120,
        },
        "ajt1": {
            "drop_out_progressing": 0.2,
            "drop_out_hold": 0,
            "capacity_progressing": 4,
            "time_progressing": 10,
            "time_hold": 120,
        },
        "ajt2": {
            "drop_out_progressing": 0.05,
            "drop_out_hold": 0,
            "capacity_progressing": 4,
            "time_progressing": 6,
            "pathway_complete": "fj_complete",
            "time_hold": 120,
        },
        "melin": {
            "drop_out_progressing": 0.05,
            "drop_out_hold": 0,
            "capacity_progressing": 4,
            "time_progressing": 2,
            "time_hold": 120,
        },
        "mept": {
            "drop_out_progressing": 0.05,
            "drop_out_hold": 0,
            "capacity_progressing": 4,
            "time_progressing": 8,
            "pathway_complete": "me_complete",
            "time_hold": 120,
        },
        "mexo": {
            "drop_out_progressing": 0.05,
            "drop_out_hold": 0,
            "capacity_progressing": 2,
            "time_progressing": 6,
            "pathway_complete": "me_complete",
            "time_hold": 120,
        },
        "brt": {
            "drop_out_progressing": 0.05,
            "drop_out_hold": 0,
            "capacity_progressing": 10,
            "time_progressing": 6,
            "time_hold": 120,
        },
        "art": {
            "drop_out_progressing": 0.05,
            "drop_out_hold": 0,
            "capacity_progressing": 8,
            "time_progressing": 5,
            "time_hold": 120,
        },
        "artmar": {
            "drop_out_progressing": 0.05,
            "drop_out_hold": 0,
            "capacity_progressing": 9,
            "time_progressing": 2,
            "pathway_complete": "rw_complete",
            "time_hold": 120,
        },
    },
    "schedule": {
        "mags": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        "eft": [2, 3, 5, 6, 7, 8, 9, 10, 11],
        "fjlin": [1, 3, 4, 6, 7, 9, 11],
        "bft": [1, 3, 4, 5],
        "ajt1": [1, 2, 3, 5, 8, 10, 11],
        "ajt2": [2, 3, 4, 6, 7, 9, 10],
        "melin": [1, 3, 4, 5, 6, 8, 9, 10, 11],
        "mept": [1, 3, 4, 6, 8, 9, 11, 12],
        "mexo": [3, 7, 11],
        "brt": [1, 3, 6, 9, 11],
        "art": [2, 4, 7, 10, 12],
        "artmar": [2, 4, 5, 6, 8, 9, 11, 12],
    },
}

In [None]:
def flatten_pivot(d: pd.DataFrame, col_str: str) -> pd.DataFrame:
    d.columns = d.columns.to_series().str.join("_")
    d.columns = d.columns + "_" + col_str
    d = d.reset_index()
    return d

def get_agents_data(simulation_data: List[Results]) -> pd.DataFrame:
    df_tmp = pd.DataFrame(simulation_data).reset_index()[["RunId", "Step", "AgentID", "Stage", "State"]]

    df_tmp.RunId = pd.Series(df_tmp.RunId, dtype=pd.ArrowDtype(pa.int64()))
    df_tmp.Step = pd.Series(df_tmp.Step, dtype=pd.ArrowDtype(pa.int64()))
    df_tmp.AgentID = pd.Series(df_tmp.AgentID.astype(str), dtype=pd.ArrowDtype(pa.string()))
    df_tmp.Stage = pd.Series(df_tmp.Stage, dtype=pd.ArrowDtype(pa.string()))
    df_tmp.State = pd.Series(df_tmp.State, dtype=pd.ArrowDtype(pa.string()))
    
    return df_tmp


def get_stage_state_count_data(agent_data: pd.DataFrame) -> pd.DataFrame:
    return pd.DataFrame(
        agent_data.groupby(["RunId", "Step", "Stage", "State"]).size(),
        columns=["Count"],
    ).reset_index()


def get_time_series_data(stage_state_count_data: pd.DataFrame) -> pd.DataFrame:
    return stage_state_count_data.pivot(
        index=["RunId", "Step"], columns=["State", "Stage"], values="Count"
    ).fillna(0)


def make_time_series(simulation_data: List[Results]) -> pd.DataFrame:
    agents_data_df = get_agents_data(simulation_data)
    stage_state_count_data_df = get_stage_state_count_data(agents_data_df)
    time_series_data_df = get_time_series_data(stage_state_count_data_df)
    return time_series_data_df


def make_average_path(simulation_data: List[Results]) -> pd.DataFrame:
    time_series_data_df = make_time_series(simulation_data)
    return time_series_data_df.groupby("Step").mean()


def make_quantile_path(simulation_data: List[Results], quantile: float) -> pd.DataFrame:
    time_series_data_df = make_time_series(simulation_data)
    return time_series_data_df.groupby("Step").quantile(quantile)


def make_stage_state_times_table(agent_data: pd.DataFrame) -> pd.DataFrame:
    stage_state_df =pd.DataFrame(
        agent_data.groupby(["RunId", "Step", "Stage", "State"])['time_stage_state'].mean()
    ).reset_index()
    return stage_state_df.pivot(
        index=["RunId", "Step"], columns=["State", "Stage"], values="time_stage_state"
    ).fillna(0.0)


def make_average_times_path(agent_data) -> pd.DataFrame:
    time_series_data_df = make_stage_state_times_table(agent_data)
    return time_series_data_df.groupby("Step").mean()


In [None]:
sim_results = run_batch(model_params)
sim_dataframe = pd.DataFrame(sim_results).reset_index()

def get_agents_data(simulation_data) -> pd.DataFrame:
    return simulation_data[
        ["RunId", "Step", "AgentID", "Stage", "State"]
    ]

a_df = get_agents_data(sim_dataframe)

def make_average_path(simulation_data: List[Results]) -> pd.DataFrame:
    time_series_data_df = make_time_series(simulation_data)
    return time_series_data_df.groupby("Step").mean()

left_df = a_df.loc[a_df["State"] == "left_raf"]
left_df = left_df.drop_duplicates(subset=["RunId", "AgentID", "Stage", "State"])
left = make_average_path(left_df)
a_df['time_stage_state'] = pd.Series(a_df.groupby(['RunId','AgentID','Stage','State']).cumcount()+1, dtype=pd.ArrowDtype(pa.float64()))
average_progressing = make_average_times_path(a_df)
n_df = make_average_path(a_df.loc[a_df["State"] != "left_raf"])
# pivot table flatten
left = flatten_pivot(left, "count")
n_df = flatten_pivot(n_df, "count")
average_progressing = flatten_pivot(average_progressing, "time")
average_progressing = average_progressing.round(1)

flat_df = pd.merge(n_df, left, on="Step", how="outer").fillna(0)
flat_df = pd.merge(flat_df, average_progressing, on="Step", how="outer").fillna(
    0
)

for stage in Stage:
    if stage.value != Stage.MIOT.value:
        left_agents = a_df.loc[
            (a_df["State"] == "left_raf") & (a_df["Stage"] == stage.value)
        ]["AgentID"].tolist()
        progressing_df = a_df[a_df["AgentID"].isin(left_agents) == False]
        stage_df = progressing_df.loc[
            (progressing_df["Stage"] == stage.value)
            & (progressing_df["State"] == "progressing")
        ]
        stage_df = stage_df.drop_duplicates(
            subset=["RunId", "AgentID", "Stage", "State"], keep="last"
        )
        stage_exit = make_average_path(stage_df)
        stage_exit = flatten_pivot(stage_exit, "exit_count")
        col = stage_exit.columns.values.tolist()
        col.remove("Step")
        stage_exit[col] = stage_exit[col].shift(1)
        flat_df = pd.merge(flat_df, stage_exit, on="Step", how="outer").fillna(
            0
        )

#### Analysis

##### RW Time to Stabilise and Course  Progressing Cadence

In [None]:
flat_df[['progressing_eft_count', 'hold_art_count', 'progressing_art_count',
       'hold_artmar_count']].plot(figsize=(20,8));

##### RW Exit Counts

In [None]:
flat_df[['progressing_brt_exit_count','progressing_art_exit_count','progressing_artmar_exit_count']].plot(figsize=(20,8));

##### All Exit Counts

In [None]:
flat_df[['progressing_eft_exit_count','progressing_fjlin_exit_count','progressing_ajt1_exit_count','progressing_ajt2_exit_count',
       'progressing_melin_exit_count','progressing_mept_exit_count','progressing_mexo_exit_count',]].plot(figsize=(20,8));

##### RW Attrition

In [None]:
flat_df[['left_raf_brt_count','left_raf_art_count','left_raf_artmar_count']].plot(figsize=(20,8));

##### All Attrition

In [None]:
flat_df[['left_raf_mags_count','left_raf_eft_count', 'left_raf_melin_count', 'left_raf_bft_count',
       'left_raf_mept_count', 'left_raf_ajt1_count', 'left_raf_mexo_count',
       'left_raf_ajt2_count','left_raf_brt_count','left_raf_art_count','left_raf_artmar_count']].plot(figsize=(20,8));

#### RW Course Progression Checks

In [None]:
brt_exit = flat_df['progressing_brt_exit_count'].sum()
art_exit = flat_df['progressing_art_exit_count'].sum()
art_attr = flat_df['left_raf_art_count'].sum()
art_prog = flat_df['progressing_art_count'].iloc[120]
art_hold = flat_df['hold_art_count'].iloc[120]
art_mar_exit = flat_df['progressing_artmar_exit_count'].sum()
art_mar_attr = flat_df['left_raf_artmar_count'].sum()
art_mar_prog = flat_df['progressing_artmar_count'].iloc[120]
art_mar_hold = flat_df['hold_artmar_count'].iloc[120]


print('### BRT -> ART Checks ###')
print(f'art exit %: {art_exit/brt_exit: .2f}\nart attrition %: {art_attr/brt_exit: .2f}\nart hold %: {art_hold/brt_exit: .2f}\nart on course %: {art_prog/brt_exit: .2f}\nart combined students: {(art_attr+art_exit+art_prog+art_hold): .2f}\nbrt exit students: {brt_exit: .2f}')
print('\n---\n\n### ART -> ART (MAR) Checks ###')
print(f'art (mar) exit %: {art_mar_exit/art_exit: .2f}\nart (mar) attrition %: {art_mar_attr/art_exit: .2f}\nart (mar) hold %: {art_mar_hold/art_exit: .2f}\nart (mar) on course %: {art_mar_prog/art_exit: .2f}\nart (mar) combined students: {(art_mar_attr+art_mar_exit+art_mar_prog+art_mar_hold): .2f}\nart exit students: {art_exit: .2f}')

#### RW Ratio Checks

In [None]:
me_param = 0.0
fj_param = 0.0
rw_param = 0.0
rpas_param = 0.0

if model_params['fj_streaming'] == 'high':
    fj_param = 0.7
    me_param = 0.2
    rw_param = 0.1
    rpas_param = 0.0
elif model_params['fj_streaming'] == 'med':
    fj_param = 0.5
    me_param = 0.2
    rw_param = 0.1
    rpas_param = 0.1
elif model_params['fj_streaming'] == 'low':
    fj_param = 0.4
    me_param = 0.4
    rw_param = 0.1
    rpas_param = 0.1

In [None]:
me_ratio = (flat_df['progressing_melin_exit_count'].sum() + flat_df['left_raf_melin_count'].sum() + flat_df['hold_melin_count'].iloc[120] + flat_df['progressing_melin_count'].iloc[120]) / (flat_df['progressing_eft_exit_count'].sum())
fj_ratio = (flat_df['progressing_fjlin_exit_count'].sum() + flat_df['hold_fjlin_count'].iloc[120] + flat_df['progressing_fjlin_count'].iloc[120]) / (flat_df['progressing_eft_exit_count'].sum())
rw_ratio = (flat_df['progressing_brt_exit_count'].sum() + flat_df['left_raf_brt_count'].sum() + flat_df['hold_brt_count'].iloc[120] + flat_df['progressing_brt_count'].iloc[120]) / (flat_df['progressing_eft_exit_count'].sum())
restream_ratio = 1 - me_ratio - fj_ratio - rw_ratio

print('### RW Streaming Ratio Checks ###\n')
print(f'me ratio: param - {me_param}, modelled - {me_ratio: .2f}\nfj ratio: param - {fj_param}, modelled - {fj_ratio: .2f}\nrw ratio: param - {rw_param}, modelled - {rw_ratio: .2f}\nrpas ratio: param - {rpas_param}, modelled - {restream_ratio: .2f}')

In [None]:
flat_df.filter(like='complete_count', axis=1).tail(5)