In [1]:
import os
import pandas as pd
import requests

In [6]:
DATA_DIR = "data"
SUBMISSIONS_DIR = "submissions"

In [30]:
assert os.path.isdir(DATA_DIR)
assert os.path.isdir(SUBMISSIONS_DIR)

# 1. Loading the input data set

In [8]:
DATA_SET_NAME = "dataset.tsv"

In [59]:
data_set = pd.read_csv(f"{DATA_DIR}/{DATA_SET_NAME}", delimiter="\t", names=["city1", "city2", "latency", "cost", "messages"])
data_set.head(-1)

Unnamed: 0,city1,city2,latency,cost,messages
0,Ankara,Moscow,641,271372,135
1,Atlanta,PanamaCity,1724,1520655,0
2,Saskatoon,Yellowknife,831,1099498,102
3,Gaborone,Luanda,1437,248701,0
4,Chicago,SaltLakeCity,833,1856242,0
...,...,...,...,...,...
1995,Amsterdam,Chicago,0,0,7791
1996,Amsterdam,Sydney,0,0,11389
1997,Amsterdam,Mumbai,0,0,1104
1998,Chicago,Mumbai,0,0,14566


# 2. Scoring Function

This scoring function is ready to use and grades your submission, it returns the same score as what the Latency Challenge website would return. **Note:**, this doesn't actually submit your solution! You can use this as a utility but as long as you don't upload a solution this will not put you on the leaderboard.

In [65]:
def score_submission(submission: set[tuple[str, str]]) -> (float, str):
    """
    This is the scoring function you can use to determine how well your solution is doing
    """

    cities = set(data_set["city1"]) | set(data_set["city2"])
    edges_available = set(
        tuple(sorted(x))
        for _, x in data_set[data_set["latency"] > 0][["city1", "city2"]].iterrows()
    )
    edges_available = set((x, y) if x < y else (y, x) for (x, y) in edges_available)

    city_to_index = {city: i for i, city in enumerate(cities)}
    max_latency = 10_000
    distances = [[10 * max_latency] * len(cities) for _ in range(len(cities))]
    messages = [[0] * len(cities) for _ in range(len(cities))]

    edges_cost = 0
    for _, row in data_set.iterrows():
        i1 = city_to_index[row["city1"]]
        i2 = city_to_index[row["city2"]]

        print((row["city1"], row["city2"]), submission)
        if (row["city1"], row["city2"]) in submission or (row["city2"], row["city1"]) in submission:
            edges_cost += row["cost"]
            distances[i1][i2] = row["latency"]
            distances[i2][i1] = row["latency"]

        messages[i1][i2] = row["messages"]
        messages[i2][i1] = row["messages"]

    for j in range(len(cities)):
        for i in range(len(cities)):
            for k in range(len(cities)):
                distances[i][k] = min(distances[i][k], distances[i][j] + distances[j][k])

    profit = 0

    for i in range(len(cities)):
        for j in range(len(cities)):
            score_per_message = max(0, max_latency - distances[i][j])
            profit += score_per_message * messages[i][j]

    profit //= 2

    return profit - edges_cost

# 3. Running your solver

In [66]:
SOLVER_NAME = "all_edges"

In [67]:
solver_data_set = data_set.copy()
solver_data_set.head()

Unnamed: 0,city1,city2,latency,cost,messages
0,Ankara,Moscow,641,271372,135
1,Atlanta,PanamaCity,1724,1520655,0
2,Saskatoon,Yellowknife,831,1099498,102
3,Gaborone,Luanda,1437,248701,0
4,Chicago,SaltLakeCity,833,1856242,0


In [68]:
# this is an example solver that simply picks all edges
submission = solver_data_set[
    (solver_data_set["latency"] != 0) & (solver_data_set["cost"] != 0)
].drop(columns=["latency", "cost", "messages"])
submission.head()

Unnamed: 0,city1,city2
0,Ankara,Moscow
1,Atlanta,PanamaCity
2,Saskatoon,Yellowknife
3,Gaborone,Luanda
4,Chicago,SaltLakeCity


# 5. Score your submission

In [82]:
submission_records = list(map(tuple, submission.to_records(index=False)))
score_submission(submission_records)

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



214000085

This score should be equal to the score you get when submitting to the tech challenge website.

# 5. Submit your solution

In [83]:
submission.to_csv(f"{SUBMISSIONS_DIR}/{SOLVER_NAME}.tsv", sep="\t", header=False, index=False)

Now upload the solution from the [solutions](/solutions) folder to the Latency Challenge website.