In [2]:
using Serialization
using CSV
using DataFrames
using Plots

include("utils.jl")

In [None]:
# Clear results file from unfeasible runs

df = CSV.read("results.csv", DataFrame)
df = filter(row -> row[:XM] != 3 && row[:YM] != 5 && row[:aM] != 3 && row[:dM] != 5, df);   

df = filter(row -> !(row[:aM] >= 2 && row[:serM] >= 15), df)

CSV.write("results_clean.csv", df, append=true)

"results_clean.csv"

In [None]:
DM_l = [10, 15, 20, 25]
AM_l = [1, 2]

XM_l = [6, 9, 12, 15]            # max queue length 
YM_l = [10, 15, 20, 25]        # max buffer length before dropping calls

serM_nd_l = [5, 10, 15, 20, 25]        # max number of servers original model


df = CSV.read("results.csv", DataFrame);
df = filter(row -> row[:model_id] == "d_os" && row[:XM] != 3 && row[:YM] != 5 && row[:aM] != 3 && row[:dM] != 5, df);

In [21]:
result_col = "cost"

DM_col  = "dM"
AM_col  = "aM"
XM_col  = "XM"
YM_col  = "YM"
serM_col= "serM"

function calc_mean_and_nan_prop(df::SubDataFrame, mean_col, nan_col)
    m = mean_col isa Symbol ? mean_col : Symbol(mean_col)
    n = nan_col  isa Symbol ? nan_col  : Symbol(nan_col)

    col_mean = df[!, m]
    is_nan_val(x) = x isa AbstractFloat && isnan(x)

    vals = [x for x in col_mean if !ismissing(x) && !is_nan_val(x)]
    mean_val = isempty(vals) ? missing : sum(vals) / length(vals)

    col_nan = df[!, n]
    total = nrow(df)
    count_nan = sum(x -> (!ismissing(x) && (is_nan_val(x) || (x isa AbstractString && x == "NaN"))), col_nan)
    prop_nan = total == 0 ? missing : count_nan / total

    return mean_val, prop_nan
end

vars_list = [DM_col, AM_col, XM_col, YM_col, serM_col]

xs = vars_list
for n_vars in 1:4
    idx = collect(1:n_vars)
    combos = []
    while true
        push!(combos, [xs[i] for i in idx])
        # find rightmost index that can be incremented
        j = n_vars
        while j >= 1 && idx[j] == 5 - n_vars + j
            j -= 1
        end
        j == 0 && break
        idx[j] += 1
        for k in (j+1):n_vars
            idx[k] = idx[k-1] + 1
        end
    end
    for combo_cols in combos 
        groups = groupby(df, combo_cols)
            for group in groups 
                group_name = "" 
                for col in combo_cols
                    group_name = group_name*col*" "*string(group[!, col][1])*" "
                end 
                mean, prop = calc_mean_and_nan_prop(group, "count", "cost")
                println(group_name, " ", mean, " ", prop)

                cs_df = DataFrame(group_name=group_name, mean_count=mean, prop_infeasible=prop)
                CSV.write("borders_copy.csv", cs_df, append=true)
            end
    end
end


dM 10  0.4312499999999999 0.025
dM 15  0.046875 0.0
dM 20  0.0375 0.0
dM 25  0.021875 0.0
aM 1  0.07562500000000001 0.0
aM 2  0.193125 0.0125
XM 6  0.135 0.0125
XM 9  0.14562500000000003 0.00625
XM 12  0.13125000000000003 0.0
XM 15  0.12562500000000001 0.00625
YM 10  0.19 0.0125
YM 15  0.11687500000000003 0.0
YM 20  0.12562500000000001 0.0125
YM 25  0.10499999999999998 0.0
serM 5  0.0 0.0
serM 10  0.0 0.0
serM 15  0.059375 0.0
serM 20  0.24531250000000007 0.0
serM 25  0.36718750000000006 0.03125
dM 10 aM 1  0.25875000000000004 0.0
dM 10 aM 2  0.6037499999999999 0.05
dM 15 aM 1  0.028750000000000005 0.0
dM 15 aM 2  0.065 0.0
dM 20 aM 1  0.0 0.0
dM 20 aM 2  0.075 0.0
dM 25 aM 1  0.015000000000000003 0.0
dM 25 aM 2  0.028749999999999998 0.0
dM 10 XM 6  0.4375 0.05
dM 10 XM 9  0.47000000000000003 0.025
dM 10 XM 12  0.4074999999999999 0.0
dM 10 XM 15  0.41000000000000003 0.025
dM 15 XM 6  0.042499999999999996 0.0
dM 15 XM 9  0.052500000000000005 0.0
dM 15 XM 12  0.05 0.0
dM 15 XM 15  0.0424

In [None]:
# robustly apply the requested filter, resolving common column-name variants if needed

if !(@isdefined df)
    error("`df` is not defined in the notebook. Read the CSV into `df` before running this cell.")
end

function resolve_col(candidates, df)
    for c in candidates
        cs = c isa Symbol ? c : Symbol(c)
        if cs in names(df)
            return cs
        end
    end
    return nothing
end

model_col = resolve_col((:model_id, :model, "model_id", "model"), df)
aM_col    = resolve_col((:aM, :AM, "aM", "AM"), df)

model_col === nothing && error("Could not find a model id column in df. Available: $(names(df))")
aM_col    === nothing && error("Could not find an aM/AM column in df. Available: $(names(df))")

before = nrow(df)
#
after = nrow(df)

println("Filtered df (model = \"d_os\", aM != 3). Rows before = $before, after = $after.")
first(df, min(6, after))

In [4]:
# Analyze proportion of missing (NaN) results across all combinations of the 5 variables
# Assumes `df`, `DM_l`, `AM_l`, `XM_l`, `YM_l`, `serM_nd_l` are already defined in the notebook.

# --- configure which column holds the test result ---
# try to auto-detect a sensible result column from common names
# candidates = [:res, :result, :value, :cost, :av_ser, :av_blr, :metric]
# result_col = first([c for c in candidates if c in names(df)]; default=nothing)

# if result_col === nothing
#     error("Could not auto-detect a result column. Provide `result_col = :your_column` and re-run.")
# end

result_col = "cost"

DM_col  = "dM"
AM_col  = "aM"
XM_col  = "XM"
YM_col  = "YM"
serM_col= "serM"

# --- aggregate by the 5-way groups that actually exist in df ---
agg = combine(groupby(df, [DM_col, AM_col, XM_col, YM_col, serM_col])) do sub
    total = nrow(sub)
    n_missing = sum(ismissing.(sub[!, result_col]))
    n_nonmissing = total - n_missing
    return (total = total, n_missing = n_missing, n_nonmissing = n_nonmissing,
            prop_missing = total==0 ? missing : n_missing/total)
end


print(agg)

# --- build the full grid of all theoretical combinations from the lists ---
# uses the DM_l, AM_l, XM_l, YM_l, serM_nd_l variables already defined in the notebook
combos = collect(Iterators.product(DM_l, AM_l, XM_l, YM_l, serM_nd_l))

print(getindex.(combos,2))
full = DataFrame(
    DM_val = getindex.(combos,1),
    AM_val = getindex.(combos,2),
    XM_val = getindex.(combos,3),
    YM_val = getindex.(combos,4),
    serM_val = getindex.(combos,5),
)

# rename columns to match detected df column names so join works
rename!(full, Dict(:DM_val => DM_col, :AM_val => AM_col, :XM_val => XM_col,
                   :YM_val => YM_col, :serM_val => serM_col))

# left-join aggregated real data onto the full grid so missing combos are visible
full_stats = leftjoin(full, agg, on=[DM_col, AM_col, XM_col, YM_col, serM_col])

# For combos not present in df, set prop_missing = missing (already missing); keep total/n_missing as missing
first(full_stats, 8)  # show a sample

# --- per-variable summaries: proportion of missing results for each value of each variable ---
pervar = Dict{Symbol,DataFrame}()
for (col, list_values) in zip((DM_col, AM_col, XM_col, YM_col, serM_col),
                              (DM_l, AM_l, XM_l, YM_l, serM_nd_l))
    g = combine(groupby(df, col)) do sub
        total = nrow(sub)
        n_missing = sum(ismissing.(sub[!, result_col]))
        return (total = total, n_missing = n_missing, prop_missing = total==0 ? missing : n_missing/total)
    end

    # ensure all values from the original list appear (values absent in df will be missing in g)
    allvals = DataFrame(val = list_values)
    rename!(allvals, :val => col)
    fullg = leftjoin(allvals, g, on=col)
    pervar[col] = fullg
end

# --- display results ---
println("Analyzed result column: ", result_col)
println("\nSample of full grid with stats (first 8 rows):")
show(first(full_stats, 8); allrows=true, allcols=true)

println("\n\nPer-variable missing proportions:")
for (col, tbl) in pervar
    println("\nColumn: ", col)
    show(tbl; allrows=true, allcols=true)
end

# Optionally save results to CSV:
# CSV.write("full_combination_missing_stats.csv", full_stats)
# for (col, tbl) in pervar
#     CSV.write("missing_by_$(col).csv", tbl)
# end

[1m1875×9 DataFrame[0m
[1m  Row [0m│[1m dM    [0m[1m aM    [0m[1m XM    [0m[1m YM    [0m[1m serM  [0m[1m total [0m[1m n_missing [0m[1m n_nonmissing [0m[1m prop_missing [0m
      │[90m Int64 [0m[90m Int64 [0m[90m Int64 [0m[90m Int64 [0m[90m Int64 [0m[90m Int64 [0m[90m Int64     [0m[90m Int64        [0m[90m Float64      [0m
──────┼─────────────────────────────────────────────────────────────────────────────────
    1 │     5      1      3      5      5      1          0             1           0.0
    2 │     5      1      3      5     10      1          0             1           0.0
    3 │     5      1      3      5     15      1          0             1           0.0
    4 │     5      1      3      5     20      1          0             1           0.0
    5 │     5      1      3      5     25      1          0             1           0.0
    6 │     5      1      6      5      5      1          0             1           0.0
    7 │     5      

LoadError: ArgumentError: adding AbstractArray other than AbstractVector as a column of a data frame is not allowed