In [1]:
import numpy as np
import pandas as pd

from itertools import permutations
from tqdm import tqdm

pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 999)

In [2]:
def perference_score(row, pref_columns):
    return np.mean(row[pref_columns].values)

In [3]:
pref_columns = ['preferences_mom', 'preferences_dad', 'preferences_son',
                'preferences_daughter', 'preferences_grandma', 'preferences_grandpa',
                'preferences_woofer_the_dog']

preferences = pd.read_csv('data/preferences.csv')
preferences = preferences.fillna(3)
preferences = preferences.drop([0, 1])
preferences.loc[:, pref_columns] = preferences.loc[:, pref_columns].astype(np.float64)
preferences['Mean preferences value'] = preferences.apply(
    lambda row: perference_score(row, pref_columns), axis=1)

family_preferences = dict(zip(preferences['preference'], preferences['Mean preferences value']))

In [4]:
preferences

Unnamed: 0,preference,preferences_mom,preferences_dad,preferences_son,preferences_daughter,preferences_grandma,preferences_grandpa,preferences_woofer_the_dog,Mean preferences value
2,likes monuments,3,3,5,1,4,5,1,3.142857
3,likes hiking,5,2,5,4,2,5,5,4.0
4,likes restaurants,4,5,4,4,5,3,1,3.714286
5,interested in history,4,4,5,2,4,5,1,3.571429
6,likes long journeys,4,2,5,4,1,4,1,3.0
7,is brave,4,2,5,5,3,5,5,4.142857
8,likes cycling,5,2,5,4,2,5,5,4.0
9,likes swimming,5,3,5,5,1,3,5,3.857143
10,likes crowds,3,1,3,5,1,3,1,2.428571
11,travels often,4,5,4,4,3,3,5,4.0


In [5]:
def score_city(row, family_preferences):
    return sum([family_preferences[k] * row.to_dict()[v] for k, v in MAPPING_CASTING.items()])

In [6]:
MAPPING_CASTING = {
    'likes monuments': 'Pomniki',
    'likes hiking': 'Wysokosc',
    'likes restaurants': 'Restauracje',
    'interested in history': 'Historia',
    'likes cycling': 'Rowery',
    'likes crowds': 'Tlum',
    'likes paintings': 'Sztuka',
    'likes going to the theatre': 'Teatr',
    'likes concerts': 'Imprezy',
    'likes spending time in the nature': 'Natura',
    'likes big cities': 'Wielkosc_miasta',
    'likes small towns': 'Male_miasta',
    'enjoys sleeping in high-class hotels': 'SuperHotele',
    'enjoys sleeping in hostels': 'Hostele', 
    'likes animals': 'Natura',
    'likes parks': 'Natura',
    'likes the sea': 'Morze', 
    'would like to show his/hers hometown': 'Miasto_rodzinne',
    'likes mountains': 'Wysokosc'
}

statystyki = pd.read_csv('data/statystyki.csv')
statystyki['Male_miasta'] = 1 - statystyki['Wielkosc_miasta']
statystyki['Score'] = statystyki.apply(lambda x: score_city(x, family_preferences), axis = 1)

In [7]:
encrypted_cities = pd.read_csv('data/Odszyfrowane_miasta.csv')
translation = dict(zip(encrypted_cities['city'], encrypted_cities['nazwa']))

distances = pd.read_csv('data/distances.csv')
DISTANCE_MATRIX = distances.drop(['city'], axis=1).fillna(0).astype(np.float64).values

name_mapping = distances['city'].to_dict()
NAME_MAPPING = {k: translation[v] for k, v in name_mapping.items()}
REVERSED_NAME_MAPPING = {v: k for k, v in name_mapping.items()}

score_mapping = dict(zip(statystyki['city'], statystyki['Score']))
SCORE_MAPPING = {REVERSED_NAME_MAPPING[k]: v for k, v in score_mapping.items()}


In [8]:
class Route:
    
    def __init__(self, all_cities):
        self.all_cities = all_cities     
    
    def compute_total_distance(self):
        N = len(self.all_cities) 
        self.total_distance_ = 0
        
        if N >= 2:
            for i in range(N - 1):
                self.total_distance_ += self.get_distance(self.all_cities[i], self.all_cities[i+1])
            
            # Z ostatniego miasta do pierwszego
            self.total_distance_ += self.get_distance(self.all_cities[-1], self.all_cities[0])
    
    
    @staticmethod
    def get_distance(i,j):
        return DISTANCE_MATRIX[i, j]
    
    def get_route_score(self):
        return sum([SCORE_MAPPING[city] for city in self.all_cities])
    
    def get_route_city_names(self):
        return [NAME_MAPPING[city] for city in self.all_cities]
        
    def __str__(self):
        return str(self.all_cities)
    
    def __repr__(self):
        return str(self.all_cities)
    

In [9]:
all_possible_routes = []

for repeat in range(2, 5):
    for cities_list in tqdm(permutations(range(66), r = repeat)):
        
        route = Route(cities_list)
        route.compute_total_distance()
        
        if route.total_distance_ <= 1500:
            all_possible_routes.append((route, route.get_route_score(), route.get_route_city_names(), route.total_distance_))

4290it [00:00, 215432.45it/s]
274560it [00:01, 167581.63it/s]
17297280it [01:37, 177819.67it/s]


In [10]:
score = pd.DataFrame(all_possible_routes)

In [11]:
score['unique route'] = score[2].apply(lambda row: ' '.join(sorted(row)))

In [12]:
unique_ids = score.groupby('unique route')[3].idxmin()

In [13]:
score.loc[unique_ids.values].sort_values(1, ascending=False).head(50)

Unnamed: 0,0,1,2,3,unique route
2882921,"(16, 46, 29, 48)",121.071258,"[krakow, warszawa, poznan, wroclaw]",1060.0,krakow poznan warszawa wroclaw
1414294,"(7, 29, 16, 46)",120.060904,"[gdansk, poznan, krakow, warszawa]",1426.0,gdansk krakow poznan warszawa
1442465,"(7, 46, 16, 48)",118.706794,"[gdansk, warszawa, krakow, wroclaw]",1411.0,gdansk krakow warszawa wroclaw
566757,"(2, 16, 46, 29)",116.794121,"[bielsko-biala, krakow, warszawa, poznan]",1105.0,bielsko-biala krakow poznan warszawa
2840121,"(16, 29, 22, 46)",116.74292,"[krakow, poznan, lodz, warszawa]",1100.0,krakow lodz poznan warszawa
566773,"(2, 16, 46, 48)",115.44001,"[bielsko-biala, krakow, warszawa, wroclaw]",975.0,bielsko-biala krakow warszawa wroclaw
2882541,"(16, 46, 22, 48)",115.38881,"[krakow, warszawa, lodz, wroclaw]",945.0,krakow lodz warszawa wroclaw
544087,"(2, 7, 46, 16)",114.429656,"[bielsko-biala, gdansk, warszawa, krakow]",1336.0,bielsko-biala gdansk krakow warszawa
1397344,"(7, 22, 16, 46)",114.378456,"[gdansk, lodz, krakow, warszawa]",1271.0,gdansk krakow lodz warszawa
427734,"(1, 16, 29, 46)",113.380943,"[bialystok, krakow, poznan, warszawa]",1380.0,bialystok krakow poznan warszawa


In [14]:
score_miasta = pd.Series({NAME_MAPPING[k]: v for k, v in SCORE_MAPPING.items()})

In [15]:
score_miasta.sort_values(ascending=False)

warszawa                41.010225
krakow                  32.426592
poznan                  24.494276
wroclaw                 23.140165
gdansk                  22.129811
bielsko-biala           18.863027
lodz                    18.811827
szczecin                15.685638
bialystok               15.449849
jelenia gora            14.959583
katowice                14.049576
lublin                  13.417696
sopot                   13.352661
torun                   13.258961
kielce                  13.100449
gdynia                  12.832735
opole                   11.874278
rzeszow                 11.704839
bydgoszcz               11.133261
elblag                  10.634694
sosnowiec               10.391932
czestochowa             10.310321
krosno                  10.155520
gliwice                  9.920423
walbrzych                9.780813
swinoujscie              9.256250
tarnow                   9.018023
olsztyn                  8.797938
dabrowa gornicza         8.608170
radom         