In [None]:
%reset -sf

Here I present some simple data analysis and a sample submission.

The question uploaded to Kaggle with very similar to test case `d` in the qualifiers.

You can refer to discussion on how to solve `d` on Codeforces - https://codeforces.com/blog/entry/88188

In [None]:
import collections
import random

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

# Parse Input

Firstly we need to parse the input into a data structure

In [None]:
INPUT_DIR = "/kaggle/input/hashcode-2021-oqr-extension/hashcode.in"

def parse_input(input_dir):
    with open(input_dir) as f:
        arr = f.readlines()
    arr = arr[::-1]
    line = arr.pop().split()
    duration, num_intersections, num_streets, num_cars, fixed_score = [int(x) for x in line]
    
    streets = {}
    for _ in range(num_streets):
        line = arr.pop().split()
        start = int(line[0])
        end = int(line[1])
        street_name = line[2]
        length = int(line[-1])
        streets[street_name] = start,end,length
        
    cars = []
    for idx in range(num_cars):
        line = arr.pop().split()
        sequence = line[1:]  # [1:] because the first word is the length of the sequence
        cars.append(sequence)
        
    return duration, num_intersections, num_streets, num_cars, fixed_score, streets, cars

duration, num_intersections, num_streets, num_cars, fixed_score, streets, cars = parse_input(INPUT_DIR)

# `streets` is a dictionary that maps `street_name` to (start,end,length)
# Each element of `cars` is an array of `street_name` that the car traverses

In [None]:
duration, num_intersections, num_streets, num_cars, fixed_score

# Data analysis

In [None]:
distance_distbn = []

for car in cars:
    distance = 0
    for street_name in car:
        distance += streets[street_name][-1]  # {street_name:(start,end,length),...}

    distance_distbn.append(distance)

plt.figure(figsize=(14,2))
plt.hist(distance_distbn, bins=100)
plt.axvline(duration, color="r")
plt.title("Distribution of the distance to travel for each vehicle")
plt.show()
plt.close()

# Upper bound estimate

We want to estimate an upper bound so that we know the extent that we can improve.

In this upper bound estimate, we assume that cars do not wait at every intersection

In [None]:
impossible_cars = 0  # number of cars that could not finish
total_required_distance = 0  # total distance to be travelled by the car

for car in cars:
    distance = 0
    for street_name in car[1:]:  # [1:] because the car does not need to travel along the first street
        distance += streets[street_name][-1]

    if distance > duration:
        impossible_cars += 1
    else:
        total_required_distance += distance

# for completing before the end of simulation
completion_score = fixed_score * (num_cars-impossible_cars)
# bonus for completing early
time_bonus = duration * (num_cars-impossible_cars) - total_required_distance
total_score = completion_score + time_bonus

total_required_distance, impossible_cars, time_bonus, completion_score, total_score

# Sample solution

In this sample solution, we assign the duration of each green light to one, if there is a car coming from that direction.

In [None]:
# to discover which intersection each road leads to
map_street_to_dest = {}
map_street_to_source = {}   # unused
for street_name,(start,end,length) in streets.items():
    map_street_to_source[street_name] = start
    map_street_to_dest[street_name] = end

In [None]:
# for each intersection, count the amount of traffic from each incoming street
incoming_count = collections.defaultdict(collections.Counter)
for car in cars:
    for street_name in car:
        incoming_count[map_street_to_dest[street_name]][street_name] += 1

In [None]:
schedules = []
for i in range(num_intersections):
    total_count = sum(incoming_count[i].values())  # number of incoming streets
    num_incoming = len(incoming_count[i])  # amount of incoming traffic

    arr = list(incoming_count[i].items())
    random.shuffle(arr)  # shuffle the incoming streets for a randomised solution

    cycle = []
    for incoming, count in arr:
        time_fraction = 1  # all duration of green light is one
        cycle.append([incoming, 
                      time_fraction])
    schedules.append(cycle)

# Parse solution into submission

In [None]:
res = []
res.append([len(schedules)])
for i,cycle in enumerate(schedules):
    if not cycle:
        res[0][0] -= 1
        continue
    res.append([i])
    res.append([len(cycle)])
    for incoming, time_fraction in cycle:
        res.append([incoming, time_fraction])
        
result_string = "\n".join(" ".join([str(x) for x in row]) for row in res)
# print(result_string)

In [None]:
with open("submission.csv", "w") as text_file:
    text_file.write(result_string)

In [None]:
# check submission
!head submission.out