In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import sympy
from numba import jit
from sympy import isprime, primerange
from tqdm import tqdm_notebook as tqdm
np.random.seed(seed=42)
df = pd.read_csv('cuda_submission.csv') # 1516713.7676
cities = pd.read_csv('input/cities.csv')

In [2]:
primes = np.array(list(primerange(0, len(cities))))
complexes = (cities.X + 1j * cities.Y).values
path = df.Path.values
[primes.shape, complexes.shape, path.shape]

[(17802,), (197769,), (197770,)]

In [3]:
path_complexes = np.array([complexes[i] for i in path])
path_complexes.shape

(197770,)

In [4]:
score_np = np.sum(np.abs(np.diff(path_complexes)))
score_np

1503376.4431153932

In [5]:
penalty = 0
for i in range(0, len(path), 10):
    if i > 0 and i % 10 == 0 and isprime(path[i-1]) == False:
        #d = np.abs(path_complexes[i-1] - path_complexes[i])
        d = np.abs(complexes[path[i-1]] - complexes[path[i]])
        penalty += d * 0.1
penalty

13337.332923249347

In [6]:
score = score_np + penalty
score

1516713.7760386425

In [7]:
def dist(a, b):
    return np.abs(complexes[a] - complexes[b])

In [9]:
@jit('i4(i4)')
def closest_prime(base_index):
    min_dist = np.inf
    closest_index = -1
    for i in primes:
        d = dist(base_index, i)
        if d < min_dist and i != base_index:
            min_dist = d
            closest_index = i
    return closest_index

In [10]:
closest_prime(path[8])

38447

In [11]:
def score_between(path, a, b):
    c = np.array([complexes[path[i]] for i in range(a, b+1)])
    sc = np.sum(np.abs(np.diff(c)))
    pn = np.sum(np.array([np.abs(complexes[path[i-1]] - complexes[path[i]])*0.1 for i in range(a, b+1) if i > 0 and a != i and i % 10 == 0 and isprime(path[i-1]) == False]))
    return sc + pn

In [12]:
score_between(path, 0, len(path)-1)

1516713.7760386425

In [13]:
def opt_prime(path):
    #path_copy = path.copy()
    sum1 = 0
    sum2 = 0
    for i in tqdm(range(0, len(path), 10)):
        if i > 0:
            p = isprime(path[i-1])
            if p == False:
                closest_prime_index = closest_prime(path[i-2])
                closest_prime_path_index = np.where(path==closest_prime_index)[0][0]
                if closest_prime_path_index < i-1:
                    s1 = score_between(path, closest_prime_path_index-1, i)
                    path_copy = np.insert(path, i, closest_prime_index)
                    path_copy = np.delete(path_copy, closest_prime_path_index)
                    s2 = score_between(path_copy, closest_prime_path_index-1, i)
                    if s1 > s2:
                        sum1 += 1
                        path = path_copy
                        print('sum1:', sum1, i)
                elif closest_prime_path_index > i-1:
                    s1 = score_between(path, i-2, closest_prime_path_index+1)
                    path_copy = np.insert(path, i-1, closest_prime_index)
                    path_copy = np.delete(path_copy, closest_prime_path_index+1)
                    s2 = score_between(path_copy, i-2, closest_prime_path_index+1)
                    if s1 > s2:
                        sum2 += 1
                        path = path_copy
                        print('sum2:', sum2, i)
    return path

In [14]:
path = opt_prime(path)
score_between(path, 0, len(path)-1)

# 1516845.1618924788 --> 1516774.6045630828 (-70.55732939601876) --> 1516761.7762980566
# 1517948.690835177  11m 52s

HBox(children=(IntProgress(value=0, max=19777), HTML(value='')))




1516713.7760386425

In [83]:
pd.DataFrame({'Path': path}).to_csv('submission.csv', index=False)

In [84]:
score_between(path, 0, len(path)-1)
# 1517948.690835177
# 1516763.9051902506
# 1516761.7762980566

1516761.7762980566

In [15]:
def opt2(path):
    cnt = 0
    for i in tqdm(range(1, len(path)-2)):       
        if i % 10 != 9 and (i+1) % 10 != 9:
            copy = path.copy()
            sc1 = score_between(path, i-1, i+2)
            copy[i], copy[i+1] = copy[i+1], copy[i]
            sc2 = score_between(copy, i-1, i+2)
            
            if sc1 > sc2:
                cnt += 1
                print(cnt, i)
                path[i], path[i+1] = path[i+1], path[i]
    return path
    

In [16]:
path = opt2(path)
score_between(path, 0, len(path)-1)

HBox(children=(IntProgress(value=0, max=197767), HTML(value='')))




1516713.7760386425

In [17]:
def opt3(path):
    cnt = 0
    for i in tqdm(range(2, len(path)-3)):
        if i % 10 != 8 and i % 10 != 0:
            copy = path.copy()
            sc1 = score_between(path, i-2, i+2)
            copy[i-1], copy[i+1] = copy[i+1], copy[i-1]
            sc2 = score_between(copy, i-2, i+2)
            if sc1 > sc2:
                cnt += 1
                print(cnt, i)
                path[i-1], path[i+1] = path[i+1], path[i-1]
    return path

In [18]:
path = opt3(path)
score_between(path, 0, len(path)-1)

HBox(children=(IntProgress(value=0, max=197765), HTML(value='')))




1516713.7760386425

In [80]:
@jit('i8[:](i8[:], i4)')
def opt_rand_flip(path, iteration=1000):
    cnt = 0
    for i in tqdm(range(iteration)):
        n1 = np.random.randint(1, len(path)-3)
        n2 = np.random.randint(n1+1, len(path)-2)
        sc1 = score_between(path, n1-1, n2+1)
        copy = path.copy()
        rev = copy[n1: n2+1][::-1]
        copy = np.concatenate([copy[:n1], rev, copy[n2+1:]])
        sc2 = score_between(copy, n1-1, n2+1)
        if sc1 > sc2:
            path = copy.copy()
            cnt += 1
            print(cnt, i)
    return path


In [81]:
path = opt_rand_flip(path, 10000)
score_between(path, 0, len(path)-1)

HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))

1516761.7762980566

In [33]:
@jit('i8[:](i8[:], i4)')
def opt_rand(path, iteration=1000):
    cnt = 0    
    for i in tqdm(range(iteration)):
        n1 = np.random.randint(1, len(path)-4)
        n2 = np.random.randint(n1+1, len(path)-3)
        copy = path.copy()
        sc1 = score_between(path, n1-1, n2+1)
        copy[n1], copy[n2] = copy[n2], copy[n1]
        sc2 = score_between(copy, n1-1, n2+1)
        if sc1 > sc2:
            path[n1], path[n2] = path[n2], path[n1]
            cnt += 1
            print(cnt, i)         
    return path

In [34]:
score_between(path, 0, len(path)-1)

1516774.0593811055

In [35]:
path = opt_rand(path, 10000)
score_between(path, 0, len(path)-1)
# 100000: 1h 58m 4s

HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))




1516774.0593811055

In [31]:
pd.DataFrame({'Path': p}).to_csv('submission.csv', index=False)

In [32]:
# flip it: https://www.kaggle.com/matthewa313/flip-it

tour = pd.read_csv('submission.csv')['Path'].tolist()
cities = pd.read_csv('input/cities.csv')

# Define a function for length of a path:
import numpy as np
from sympy import primerange
primes = list(primerange(0, len(cities)))
def score_tour(tour):
    # length of any given tour with primes calculation
    df = cities.reindex(tour + [0]).reset_index()
    df['prime'] = df.CityId.isin(primes).astype(int)
    df['dist'] = np.hypot(df.X - df.X.shift(-1), df.Y - df.Y.shift(-1))
    df['penalty'] = df['dist'][9::10] * (1 - df['prime'][9::10]) * 0.1
    return df.dist.sum() + df.penalty.sum()

# Let's take a look at our tour
print("Tour path (0-5):",tour[0:5])
# And the flipped tour looks like:
tourflip = tour[::-1]
print("Flipped tour path (0-5):", tourflip[0:5])
# The scores of our tours are:
print("Score of original tour:", score_tour(tour))
print("Score of flipped tour:", score_tour(tourflip))

# If the flipped tour is quicker, change our tour:
if score_tour(tourflip) < score_tour(tour):
    print("The total improvement was:", abs(score_tour(tourflip) - score_tour(tour)))
    tour = tourflip 
    print("The better of the original/flipped tour is:", tour[0:5])

#pd.DataFrame({'Path': list(tour)}).to_csv('submission.csv', index=False)

Tour path (0-5): [0, 78934, 111804, 52086, 89712]
Flipped tour path (0-5): [0, 48816, 40230, 75405, 153911]
Score of original tour: 1516774.6045630828
Score of flipped tour: 1517077.9840253645
