# Match result matrix

In [1]:
import numpy as np

# creat 4,4 matrix
matrix = np.full((4, 4), -1, dtype=int)

models = ["gpt4o", "gpt4omini", "gpt4o-mini", "gpt4o-mini-2024-07-18"]

match_results = [
    {
        "model_row": models[0],
        "model_col": models[1],
        "result": 0, # model_row wins
    },
    {
        "model_row": models[1],
        "model_col": models[2],
        "result": 1, # model_col wins
    },
    {
        "model_row": models[2],
        "model_col": models[3],
        "result": 2, # draw
    },
    
]

# update the matrix
for result in match_results:
    matrix[models.index(result["model_row"]), models.index(result["model_col"])] = result["result"]

print(matrix)

[[-1  0 -1 -1]
 [-1 -1  1 -1]
 [-1 -1 -1  2]
 [-1 -1 -1 -1]]


In [2]:
# add a new model
models.append("gpt4o-2024-08-06")

# add new row and col to the matrix
matrix = np.pad(matrix, pad_width=((0, 1), (0, 1)), mode="constant", constant_values=-1)
print(matrix)

[[-1  0 -1 -1 -1]
 [-1 -1  1 -1 -1]
 [-1 -1 -1  2 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]]


In [3]:

match_results = [
    {
        "model_row": models[4],
        "model_col": models[1],
        "result": 0, # model_row wins
    },
    {
        "model_row": models[1],
        "model_col": models[3],
        "result": 0, # model_row wins
    },
    {
        "model_row": models[3],
        "model_col": models[0],
        "result": 0, # model_row wins
    }
]

for result in match_results:
    matrix[models.index(result["model_row"]), models.index(result["model_col"])] = result["result"]

print(matrix)

[[-1  0 -1 -1 -1]
 [-1 -1  1  0 -1]
 [-1 -1 -1  2 -1]
 [ 0 -1 -1 -1 -1]
 [-1  0 -1 -1 -1]]


In [4]:
def get_indirect_match_result(matrix, model_row, model_col, verbose=False):
    n = matrix.shape[0]
    assert 0 <= model_row < n and 0 <= model_col < n

    # If direct edge exists, return it immediately
    direct = int(matrix[model_row, model_col])
    if direct != -1:
        if verbose:
            return direct, [model_row, model_col], [direct]
        return direct

    # DFS to collect all simple paths' elos from model_row to model_col
    def dfs(u, target, visited):
        candidates = []  # each candidate: (path_nodes_list starting at u, path_elos_list)
        for v in range(n):
            edge = int(matrix[u, v])
            if edge == -1 or v in visited:
                continue
            if v == target:
                candidates.append(([u, v], [edge]))
            else:
                visited.add(v)
                child_cands = dfs(v, target, visited)
                visited.remove(v)
                for child_nodes, child_elos in child_cands:
                    # build full path and elos
                    full_nodes = [u] + child_nodes
                    full_elos = [edge] + child_elos
                    candidates.append((full_nodes, full_elos))
        return candidates

    all_cands = dfs(model_row, model_col, {model_row})

    # filter monotonic paths and compute inferred elo
    def is_draw_or_win(seq):
        return all(seq[i] <= 1 for i in range(len(seq)))

    def is_draw_or_lose(seq):
        return all(seq[i] >= 1 for i in range(len(seq)))

    best = None  # (inferred_elo, nodes, elos)
    for nodes, elos in all_cands:
        if len(elos) == 0:
            continue
        if is_draw_or_lose(elos):
            inferred = min(elos)
        elif is_draw_or_win(elos):
            inferred = max(elos)
        else:
            # not monotonic -> skip
            continue

        if best is None or inferred > best[0]:
            best = (inferred, nodes, elos)

    if verbose:
        return None if best is None else best
    return None if best is None else best[0]


In [5]:
score = get_indirect_match_result(matrix, 0, 3, verbose=True)
print(score)


(0, [0, 1, 3], [0, 0])


In [6]:
score = get_indirect_match_result(matrix, 4, 0, verbose=True)
print(score)

(0, [4, 1, 3, 0], [0, 0, 0])


# ELO Matrix

In [7]:
# row model is the main model that is responsible for the elo score. Col model is the model that is being compared to the main model.
elo_matrix = np.full((5, 5), -1, dtype=int)

updated_elo = [
    {
        "model_row": models[4],
        "model_col": models[1],
        "elo": 101,  # model_row wins
    },
    {
        "model_row": models[1],
        "model_col": models[2],
        "elo": 99,  # model_row wins
    },
    {
        "model_row": models[2],
        "model_col": models[3],
        "elo": 99,  # model_row wins
    },
    {
        "model_row": models[2],
        "model_col": models[3],
        "elo": 90,  # model_row wins
    },
    {
        "model_row": models[3],
        "model_col": models[1],
        "elo": 93,  # model_row wins
    },
    {
        "model_row": models[1],
        "model_col": models[0],
        "elo": 101,  # model_row wins
    },
]

for result in updated_elo:
    elo_matrix[models.index(result["model_row"]), models.index(result["model_col"])] = (
        result["elo"]
    )

print(elo_matrix)

[[ -1  -1  -1  -1  -1]
 [101  -1  99  -1  -1]
 [ -1  -1  -1  90  -1]
 [ -1  93  -1  -1  -1]
 [ -1 101  -1  -1  -1]]


In [8]:
def get_indirect_score(elo_matrix, model_row, model_col, *, verbose=False):
    """
    Find an indirect (or direct) connection from model_row to model_col whose
    sequence of edge elos is monotonic (either non-increasing or non-decreasing).
    Returns the inferred elo value for the best monotonic path (int) or None
    if no monotonic path exists.

    Heuristic used to infer a single elo from a monotonic path:
      - If the sequence is non-increasing (a >= b >= c ...), return min(elos)
      - If the sequence is non-decreasing (a <= b <= c ...), return max(elos)
    Among multiple monotonic paths, the path with the largest inferred elo is chosen.

    If verbose=True, returns a tuple (inferred_elo_or_None, best_path_nodes, best_path_elos).
    Otherwise returns just the inferred elo (or None).
    """
    n = elo_matrix.shape[0]
    assert 0 <= model_row < n and 0 <= model_col < n

    # If direct edge exists, return it immediately
    direct = int(elo_matrix[model_row, model_col])
    if direct != -1:
        if verbose:
            return direct, [model_row, model_col], [direct]
        return direct

    # DFS to collect all simple paths' elos from model_row to model_col
    def dfs(u, target, visited):
        candidates = []  # each candidate: (path_nodes_list starting at u, path_elos_list)
        for v in range(n):
            edge = int(elo_matrix[u, v])
            if edge == -1 or v in visited:
                continue
            if v == target:
                candidates.append(([u, v], [edge]))
            else:
                visited.add(v)
                child_cands = dfs(v, target, visited)
                visited.remove(v)
                for child_nodes, child_elos in child_cands:
                    # build full path and elos
                    full_nodes = [u] + child_nodes
                    full_elos = [edge] + child_elos
                    candidates.append((full_nodes, full_elos))
        return candidates

    all_cands = dfs(model_row, model_col, {model_row})

    # filter monotonic paths and compute inferred elo
    def is_non_decreasing(seq):
        return all(seq[i] <= seq[i + 1] for i in range(len(seq) - 1))

    def is_non_increasing(seq):
        return all(seq[i] >= seq[i + 1] for i in range(len(seq) - 1))

    best = None  # (inferred_elo, nodes, elos)
    for nodes, elos in all_cands:
        if len(elos) == 0:
            continue
        if is_non_increasing(elos):
            inferred = min(elos)
        elif is_non_decreasing(elos):
            inferred = max(elos)
        else:
            # not monotonic -> skip
            continue

        if best is None or inferred > best[0]:
            best = (inferred, nodes, elos)

    if verbose:
        return None if best is None else best
    return None if best is None else best[0]


In [9]:
score = get_indirect_score(elo_matrix, 4, 1, verbose=True)
print(score)

(101, [4, 1], [101])


In [10]:
score = get_indirect_score(elo_matrix, 4,3, verbose=True)
print(score)


(90, [4, 1, 2, 3], [101, 99, 90])


In [11]:
score = get_indirect_score(elo_matrix, 2,0, verbose=True)
print(score)


(101, [2, 3, 1, 0], [90, 93, 101])
