https://www.kaggle.com/kostyaatarik/not-a-5-and-5-halves-opt

In [7]:
import numpy as np
import pandas as pd
import numba
from sympy import isprime, primerange
from math import sqrt
from sklearn.neighbors import KDTree
from tqdm import tqdm_notebook as tqdm
from itertools import combinations, permutations
from functools import lru_cache

In [8]:
cities = pd.read_csv('input/cities.csv', index_col=['CityId'])
XY = np.stack((cities.X.astype(np.float32), cities.Y.astype(np.float32)), axis=1)
is_not_prime = np.array([0 if isprime(i) else 1 for i in cities.index], dtype=np.int32)

In [9]:
@numba.jit('f8(i8, i8, i8)', nopython=True, parallel=False)
def cities_distance(offset, id_from, id_to):
    xy_from, xy_to = XY[id_from], XY[id_to]
    dx, dy = xy_from[0] - xy_to[0], xy_from[1] - xy_to[1]
    distance = sqrt(dx * dx + dy * dy)
    if offset % 10 == 9 and is_not_prime[id_from]:
        return 1.1 * distance
    return distance


@numba.jit('f8(i4, i8[:])', nopython=True, parallel=False)
def score_chunk(offset, chunk):
    pure_distance, penalty = 0.0, 0.0
    penalty_modulo = 9 - offset % 10
    for path_index in numba.prange(chunk.shape[0] - 1):
        id_from, id_to = chunk[path_index], chunk[path_index+1]
        xy_from, xy_to = XY[id_from], XY[id_to]
        dx, dy = xy_from[0] - xy_to[0], xy_from[1] - xy_to[1]
        distance = sqrt(dx * dx + dy * dy)
        pure_distance += distance
        if path_index % 10 == penalty_modulo and is_not_prime[id_from]:
            penalty += distance
    return pure_distance + 0.1 * penalty


@numba.jit('f8(i8[:])', nopython=True, parallel=False)
def score_path(path):
    return score_chunk(0, path)


@numba.jit
def chunk_scores(chunk):
    scores = np.zeros(10)
    pure_distance = 0
    for i in numba.prange(chunk.shape[0] - 1):
        id_from, id_to = chunk[i], chunk[i+1]
        xy_from, xy_to = XY[id_from], XY[id_to]
        dx, dy = xy_from[0] - xy_to[0], xy_from[1] - xy_to[1]
        distance = sqrt(dx * dx + dy * dy)
        pure_distance += distance
        if is_not_prime[id_from]:
            scores[9-i%10] += distance
    scores *= 0.1
    scores += pure_distance
    return scores

In [10]:
@numba.jit('f8(i8, i8, i8[:], i8[:], i8[:], i8, f8[:,:], i8[:])', nopython=True, parallel=False)
def score_compound_chunk(offset, head, firsts, lasts, lens, tail, scores, indexes):
    score = 0.0
    last_city_id = head
    for i in numba.prange(len(indexes)):
        index = indexes[i]
        first, last, chunk_len = firsts[index], lasts[index], lens[index]
        score += cities_distance(offset, last_city_id, first)
        score += scores[index, (offset + 1) % 10]
        last_city_id = last
        offset += chunk_len
    return score + cities_distance(offset, last_city_id, tail)


@numba.jit('i8(i8, i8, i8[:], i8[:], i8[:], i8, f8[:,:], i8[:,:], f8)', nopython=True, parallel=False)
def best_score_permutation_index(offset, head, firsts, lasts, lens, tail, scores, indexes, best_score):
    best_index = -1
    for i in numba.prange(len(indexes)):
        score = score_compound_chunk(offset, head, firsts, lasts, lens, tail, scores, indexes[i])
        if score < best_score:
            best_index, best_score = i, score
    return best_index

In [11]:
kdt = KDTree(XY)

fives = set()
for i in tqdm(cities.index):
    dists, neibs = kdt.query([XY[i]], 9)
    for comb in combinations(neibs[0], 5):
        if all(comb):
            fives.add(tuple(sorted(comb)))
    neibs = kdt.query_radius([XY[i]], 10, count_only=False, return_distance=False)
    for comb in combinations(neibs[0], 5):
        if all(comb):
            fives.add(tuple(sorted(comb)))
            
print(f'{len(fives)} cities fives are selected.')

# sort fives by distance
@numba.jit('f8(i8[:])', nopython=True, parallel=False)
def sum_distance(ids):
    res = 0
    for i in numba.prange(len(ids)):
        for j in numba.prange(i + 1, len(ids)):
            res += cities_distance(0, ids[i], ids[j])
    return res

fives = np.array(list(fives))
distances = np.array(list(map(sum_distance, tqdm(fives))))
order = distances.argsort()
fives = fives[order]

HBox(children=(IntProgress(value=0, max=197769), HTML(value='')))


58610820 cities fives are selected.


HBox(children=(IntProgress(value=0, max=58610820), HTML(value='')))




In [12]:
path = np.array(pd.read_csv('submission.csv').Path)
score_path(path)

1516242.649857092

In [13]:
@lru_cache(maxsize=None)
def indexes_permutations(n):
    return np.array(list(map(list, permutations(range(n)))))


path_index = np.argsort(path[:-1])
print(f'Total score is {score_path(path):.2f}.')
for _ in range(2):
    for ids in tqdm(fives[:2 * 10**6]):
        i1, i2, i3, i4, i5 = np.sort(path_index[ids])
        head, tail = path[i1-1], path[i5+1]
        chunks = [path[i1:i1+1], path[i1+1:i2], path[i2:i2+1], path[i2+1:i3],
                  path[i3:i3+1], path[i3+1:i4], path[i4:i4+1], path[i4+1:i5], path[i5:i5+1]]
        chunks = [chunk for chunk in chunks if len(chunk)]
        scores = np.array([chunk_scores(chunk) for chunk in chunks])
        lens = np.array([len(chunk) for chunk in chunks])
        firsts = np.array([chunk[0] for chunk in chunks])
        lasts = np.array([chunk[-1] for chunk in chunks])
        best_score = score_compound_chunk(i1-1, head, firsts, lasts, lens, tail, scores, indexes_permutations(len(chunks))[0])
        index = best_score_permutation_index(i1-1, head, firsts, lasts, lens, tail, scores, indexes_permutations(len(chunks)), best_score)
        if index > 0:
            perm = [chunks[i] for i in indexes_permutations(len(chunks))[index]]
            path[i1-1:i5+2] = np.concatenate([[head], np.concatenate(perm), [tail]])
            path_index = np.argsort(path[:-1])
            print(f'New total score is {score_path(path):.3f}. Permutating path at indexes {i1}, {i2}, {i3}, {i4}, {i5}.')

Total score is 1516242.65.


HBox(children=(IntProgress(value=0, max=2000000), HTML(value='')))

New total score is 1516242.209. Permutating path at indexes 72473, 72482, 72483, 72485, 72486.
New total score is 1516242.127. Permutating path at indexes 72472, 72473, 72474, 72958, 72969.
New total score is 1516241.921. Permutating path at indexes 67162, 67163, 67173, 67174, 67178.
New total score is 1516241.700. Permutating path at indexes 22682, 22683, 22684, 22686, 22766.
New total score is 1516241.444. Permutating path at indexes 82017, 82025, 82026, 82027, 82028.
New total score is 1516241.221. Permutating path at indexes 22757, 22758, 22759, 22766, 22767.
New total score is 1516240.969. Permutating path at indexes 118124, 118125, 118152, 118153, 118158.
New total score is 1516240.474. Permutating path at indexes 31899, 31900, 32762, 32763, 32764.
New total score is 1516239.889. Permutating path at indexes 76065, 76069, 76070, 76071, 76207.
New total score is 1516239.664. Permutating path at indexes 43085, 43135, 43136, 43137, 43138.
New total score is 1516239.480. Permutating p

New total score is 1516211.222. Permutating path at indexes 119097, 119218, 119219, 119220, 119225.
New total score is 1516210.923. Permutating path at indexes 13541, 13546, 13547, 13563, 13564.
New total score is 1516210.806. Permutating path at indexes 84784, 85664, 85665, 85666, 85668.
New total score is 1516210.317. Permutating path at indexes 182243, 182249, 182250, 182251, 182263.
New total score is 1516209.682. Permutating path at indexes 51852, 51872, 51873, 51874, 51875.
New total score is 1516209.019. Permutating path at indexes 50164, 50174, 50175, 50176, 50302.
New total score is 1516208.868. Permutating path at indexes 85343, 85371, 85372, 85373, 85374.
New total score is 1516208.585. Permutating path at indexes 84785, 84787, 85666, 85667, 85673.
New total score is 1516208.155. Permutating path at indexes 111042, 111057, 111058, 111059, 111064.
New total score is 1516207.526. Permutating path at indexes 63948, 63952, 64517, 64518, 64519.
New total score is 1516207.521. Per

HBox(children=(IntProgress(value=0, max=2000000), HTML(value='')))

New total score is 1516193.726. Permutating path at indexes 86683, 86684, 110298, 110300, 110308.
New total score is 1516193.674. Permutating path at indexes 86683, 110300, 110306, 110307, 110309.



In [14]:
pd.DataFrame({'Path': path}).to_csv('submission.csv', index=False)