In [1]:
import pandas as pd
import numpy as np
import os
import math

In [779]:
cities = pd.read_excel(r'C:\Users\murat.aydin\Desktop\worldcities.xlsx')

In [780]:
def haversine_distance_in_km(lat1, lon1, lat2, lon2):
    # Radius of the Earth in kilometers
    earth_radius = 6371  # You can use 3959 for miles

    # Convert latitude and longitude from degrees to radians
    lat1 = math.radians(lat1)
    lon1 = math.radians(lon1)
    lat2 = math.radians(lat2)
    lon2 = math.radians(lon2)

    # Differences in latitude and longitude
    dlat = lat2 - lat1
    dlon = lon2 - lon1

    # Haversine formula
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = earth_radius * c

    return distance



In [795]:
cities = cities[cities['lat']>30].reset_index().drop(['index'],axis=1)

In [796]:
cities['distance_km'] = cities.apply(lambda row: haversine_distance_in_km(london_lat, london_lng, row['lat'], row['lng']), axis=1)
df = cities.sort_values('distance_km')

In [797]:
def check_population(population):
    if population > 200000:
        return 2
    else:
        return 0

In [798]:
def check_country(current_city_country,destination_country):
    if current_city_country != destination_country:
        return 2
    else:
        return 0
    

In [799]:
def get_travel_cost(index):
    res_dict = {0:2,1:4,2:8}
    return res_dict[index]

In [800]:
def calculate_cost(current_city_country,current_city_id,df):
    #current_city_country = df[df['city_id']==current_city_id]['country'].values[0]
    #df = df[df['city_id']!=current_city_id]
    #df = df.reset_index().drop('index',axis=1)
    
    df['cost_country'] = df.apply(lambda row: check_country(current_city_country,row['country']), axis=1)
    df['cost_pop'] = df.apply(lambda row: check_population(row['population']), axis=1)
    df['travel_cost'] = df.apply(lambda row: get_travel_cost(row.name), axis=1)


    return df
    

In [801]:
def sum_total_cost(row):
    return row['cost_country'] + row['cost_pop'] + row['travel_cost']



In [802]:
def choose_optimum_city(df):
    
    df = df.sort_values(by=['lng'], ascending=[False]).reset_index().loc[0]
    city_id = df['city_id']
    total_cost = df['total_cost']
    city_name = df['city_name']
    lat = df['lat']
    lng = df['lng']
    country= df['country']
    return {'city_id':city_id,'total_cost':total_cost,'city_name':city_name,'lat':lat,'lng':lng,'country':country}


In [803]:

df = df.rename(columns={'id':'city_id','city':'city_name'})
df = df.reset_index().drop('index',axis=1)
df

Unnamed: 0,city_name,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,city_id,distance_km
0,London,London,51.5072,-0.1275,United Kingdom,GB,GBR,"London, City of",primary,10979000.0,1826645935,0.000000
1,Holborn,Holborn,51.5172,-0.1182,United Kingdom,GB,GBR,Camden,,13023.0,1826657058,1.284766
2,Lambeth,Lambeth,51.4903,-0.1193,United Kingdom,GB,GBR,Lambeth,,9675.0,1826545364,1.963051
3,Spitalfields,Spitalfields,51.5166,-0.0750,United Kingdom,GB,GBR,Tower Hamlets,,10286.0,1826499077,3.780491
4,Kensington,Kensington,51.5000,-0.1900,United Kingdom,GB,GBR,Kensington and Chelsea,,64681.0,1826583042,4.399398
...,...,...,...,...,...,...,...,...,...,...,...,...
19382,Katsuura,Katsuura,35.1525,140.3211,Japan,JP,JPN,Chiba,,17324.0,1392044206,9636.254742
19383,Kisanuki,Kisanuki,31.3447,130.9453,Japan,JP,JPN,Kagoshima,,14495.0,1392204633,9638.538863
19384,Kushima,Kushima,31.4645,131.2284,Japan,JP,JPN,Miyazaki,,17363.0,1392989065,9639.374565
19385,Nishinoomote,Nishinoomote,30.7325,130.9975,Japan,JP,JPN,Kagoshima,,14980.0,1392981409,9700.971905


In [804]:
import random
random.seed(42)  # For reproducibility, you can change the seed or remove it
sampled_df = pd.concat([df.iloc[0:1], df.iloc[1:].sample(n=499, random_state=42)], ignore_index=True)
sampled_df = sampled_df.sort_values(by='distance_km')
sampled_df = sampled_df.reset_index().drop('index',axis=1)
sampled_df

Unnamed: 0,city_name,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,city_id,distance_km
0,London,London,51.5072,-0.1275,United Kingdom,GB,GBR,"London, City of",primary,10979000.0,1826645935,0.000000
1,Kensington,Kensington,51.5000,-0.1900,United Kingdom,GB,GBR,Kensington and Chelsea,,64681.0,1826583042,4.399398
2,Wanstead,Wanstead,51.5778,0.0286,United Kingdom,GB,GBR,Redbridge,,11543.0,1826002020,13.347853
3,East Barnet,East Barnet,51.6430,-0.1630,United Kingdom,GB,GBR,Barnet,,16137.0,1826125584,15.298259
4,Pinner,Pinner,51.5932,-0.3894,United Kingdom,GB,GBR,Harrow,,31130.0,1826466478,20.478690
...,...,...,...,...,...,...,...,...,...,...,...,...
495,Yuasa,Yuasa,34.0331,135.1786,Japan,JP,JPN,Wakayama,,11340.0,1392420914,9552.428391
496,Kamirenjaku,Kamirenjaku,35.6833,139.5594,Japan,JP,JPN,Tōkyō,,192489.0,1392889022,9554.535603
497,Zama,Zama,35.4886,139.4075,Japan,JP,JPN,Kanagawa,,130608.0,1392313741,9568.783454
498,Makinohara,Makinohara,34.7400,138.2247,Japan,JP,JPN,Shizuoka,,44111.0,1392331994,9600.638815


In [805]:
def calculate_cost(current_city_country,current_city_id,df):
    #current_city_country = df[df['city_id']==current_city_id]['country'].values[0]
    #df = df[df['city_id']!=current_city_id]
    df = df.reset_index().drop('index',axis=1)
    
    df['cost_country'] = df.apply(lambda row: check_country(current_city_country,row['country']), axis=1)
    df['cost_pop'] = df.apply(lambda row: check_population(row['population']), axis=1)
    df['travel_cost'] = df.apply(lambda row: get_travel_cost(row.name), axis=1)


    return df
    

In [806]:

city_ids = sampled_df.city_id.to_list()
info_cities = {}

for each_city_id in city_ids:
    lat = sampled_df[sampled_df['city_id']==each_city_id].lat.values[0]
    lng = sampled_df[sampled_df['city_id']==each_city_id].lng.values[0]    
    name = sampled_df[sampled_df['city_id']==each_city_id].city_name.values[0]
    country_name = sampled_df[sampled_df['city_id']==each_city_id].country.values[0]
    
    sampled_df['distance_km'] = sampled_df.apply(lambda row: haversine_distance_in_km(lat,lng, row['lat'], row['lng']), axis=1)
    df_sorted = sampled_df.sort_values(by='distance_km').reset_index().drop('index',axis=1)
    nearest_three = df_sorted[1:4]
    cost_df = calculate_cost(country_name,each_city_id,nearest_three)
    sum_df_calculated = cost_df.assign(total_cost=cost_df.apply(sum_total_cost, axis=1))
    city_info = {
        'city_names': nearest_three['city_name'].to_list(),
        'city_ids': nearest_three['city_id'].to_list(),
        'city_distances': nearest_three['distance_km'].to_list(),
        'city_latitudes':  nearest_three['lat'].to_list(),
        'city_longitudes':  nearest_three['lng'].to_list(),
        'city_pops': nearest_three['population'].to_list(),
        'city_countries': nearest_three['country'].to_list(),
        'city_total_cost' : sum_df_calculated['total_cost'].to_list()
    }
    
    info_cities[each_city_id] = city_info

In [807]:
len(info_cities)

500

In [808]:
total_cost = 0
coordinates_lat = []
coordinates_lng = []
city_and_country = []

for each_city in info_cities.keys():
    eastest_city_index = info_cities[each_city]['city_longitudes'].index(max(info_cities[each_city]['city_longitudes']))
    eastest_city_id = info_cities[each_city]['city_ids'][eastest_city_index]
    eastest_city_cost = info_cities[each_city]['city_total_cost'][eastest_city_index]
    eastest_city_lat = info_cities[each_city]['city_latitudes'][eastest_city_index]
    eastest_city_lng = info_cities[each_city]['city_longitudes'][eastest_city_index]
    eastest_city_country = info_cities[each_city]['city_countries'][eastest_city_index]
    eastest_city_name= info_cities[each_city]['city_names'][eastest_city_index]
    total_cost += eastest_city_cost
    coordinates_lat.append(eastest_city_lat)
    coordinates_lng.append(eastest_city_lng)
    city_and_country.append(eastest_city_country+'_'+eastest_city_name)
    
coordinates = zip(coordinates_lat,coordinates_lng)
    

In [809]:
city_and_country

['United Kingdom_Wanstead',
 'United Kingdom_London',
 'United Kingdom_London',
 'United Kingdom_Wanstead',
 'United Kingdom_London',
 'United Kingdom_London',
 'United Kingdom_Wanstead',
 'United Kingdom_East Barnet',
 'United Kingdom_Sevenoaks',
 'United Kingdom_Wheathampstead',
 'United Kingdom_Wheathampstead',
 'United Kingdom_Claygate',
 'United Kingdom_Horley',
 'United Kingdom_Birchington',
 'United Kingdom_Wheathampstead',
 'United Kingdom_Claygate',
 'United Kingdom_Bexhill',
 'United Kingdom_Sevenoaks',
 'United Kingdom_Heybridge',
 'United Kingdom_Wheathampstead',
 'United Kingdom_Heybridge',
 'United Kingdom_Wellingborough',
 'United Kingdom_Hook',
 'United Kingdom_Handsworth',
 'United Kingdom_East Retford',
 'United Kingdom_Lincoln',
 'United Kingdom_Wellington',
 'United Kingdom_Blackwood',
 'United Kingdom_East Retford',
 'France_Wasquehal',
 'United Kingdom_Mossley',
 'Belgium_Bruges',
 'United Kingdom_Blackwood',
 'United Kingdom_East Retford',
 'United Kingdom_Poynto

In [810]:


import folium


# Create a map centered at a specific location
m = folium.Map(location=[0, 0], zoom_start=2)

# Add markers for each coordinate
for coord in coordinates:
    folium.Marker(coord).add_to(m)

# Save the map to an HTML file or display it in a Jupyter Notebook
m.save("map.html")

In [568]:
print(len(sampled_df))


cost_df = calculate_cost(city_id_inception,nearest_three)
sum_df_calculated = cost_df.assign(total_cost=cost_df.apply(sum_total_cost, axis=1))
optimum_city = choose_optimum_city(sum_df_calculated)
city_id_inception = optimum_city['city_id']
total_cost+= optimum_city['total_cost']


df['distance_km'] = sampled_df.apply(lambda row: haversine_distance_in_km(optimum_city['lat'], optimum_city['lng'], row['lat'], row['lng']), axis=1)
nearest_three_sorted = df.sort_values(by='distance_km')
nearest_three = nearest_three_sorted[:4]

set1 = set(nearest_three.city_id.to_list())
set2 = set([city_id_inception])
difference = list(set1 - set2)
df = sampled_df[~sampled_df['city_id'].isin(difference)]

print(len(df))

route.append(optimum_city)

500
497


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['distance_km'] = sampled_df.apply(lambda row: haversine_distance_in_km(optimum_city['lat'], optimum_city['lng'], row['lat'], row['lng']), axis=1)


In [569]:
difference

[1826292521, 1826815474, 1826645935]

In [535]:
route

[{'city_id': 1826002020,
  'total_cost': 4,
  'city_name': 'Wanstead',
  'lat': 51.5778,
  'lng': 0.0286,
  'country': 'United Kingdom'}]

In [458]:
nearest_three

Unnamed: 0,city_name,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,city_id,distance_km
0,Wanstead,Wanstead,51.5778,0.0286,United Kingdom,GB,GBR,Redbridge,,11543.0,1826002020,0.0
1,Pinner,Pinner,51.5932,-0.3894,United Kingdom,GB,GBR,Harrow,,31130.0,1826466478,28.930526
4,Wheathampstead,Wheathampstead,51.812,-0.293,United Kingdom,GB,GBR,Hertfordshire,,6410.0,1826432060,34.198008
3,Sevenoaks,Sevenoaks,51.2781,0.1874,United Kingdom,GB,GBR,Kent,,29506.0,1826815474,35.096621


In [383]:
total_cost = 0
#df=sampled_df
city_id_inception = df['city_id'].values[0]
nearest_three = df[:4]

route=[]

for index,value in df.reset_index().iterrows():
   
    cost_df = calculate_cost(city_id_inception,nearest_three)
    sum_df_calculated = cost_df.assign(total_cost=cost_df.apply(sum_total_cost, axis=1))
    optimum_city = choose_optimum_city(sum_df_calculated)
    city_id_inception = optimum_city['city_id']
    total_cost+= optimum_city['total_cost']
    
    print(optimum_city['country'])

    set1 = set(nearest_three.city_id.to_list())
    set2 = set([city_id_inception])
    difference = list(set1 - set2)
    df = df[~df['city_id'].isin(difference)]
    df = df.drop('distance_km',axis=1)
    print(len(df))

    df['distance_km'] = df.apply(lambda row: haversine_distance_in_km(optimum_city['lat'], optimum_city['lng'], row['lat'], row['lng']), axis=1)
    nearest_ten_distance_sorted = df.sort_values(by='distance_km')
    nearest_three = nearest_ten_distance_sorted[:4]
    route.append(optimum_city)


United Kingdom
19384
United Kingdom
19381
United Kingdom
19378
United Kingdom
19375
United Kingdom
19372
United Kingdom
19369
United Kingdom
19366
United Kingdom
19363
United Kingdom
19360
United Kingdom
19357
United Kingdom
19354
United Kingdom
19351
United Kingdom
19348
United Kingdom
19345
United Kingdom
19342
United Kingdom
19339
United Kingdom
19336
United Kingdom
19333
United Kingdom
19330
United Kingdom
19327
United Kingdom
19324
United Kingdom
19321
United Kingdom
19318
United Kingdom
19315
United Kingdom
19312
United Kingdom
19309
United Kingdom
19306
United Kingdom
19303
United Kingdom
19300
United Kingdom
19297
United Kingdom
19294
United Kingdom
19291
United Kingdom
19288
United Kingdom
19285
United Kingdom
19282
United Kingdom
19279
United Kingdom
19276
United Kingdom
19273
United Kingdom
19270
United Kingdom
19267
United Kingdom
19264
United Kingdom
19261
United Kingdom
19258
United Kingdom
19255
United Kingdom
19252
United Kingdom
19249
United Kingdom
19246
United Kingdo

KeyboardInterrupt: 

In [384]:
route

[{'city_id': 1826499077,
  'total_cost': 8,
  'city_name': 'Spitalfields',
  'lat': 51.5166,
  'lng': -0.075,
  'country': 'United Kingdom'},
 {'city_id': 1826642497,
  'total_cost': 2,
  'city_name': 'Stepney',
  'lat': 51.5152,
  'lng': -0.0462,
  'country': 'United Kingdom'},
 {'city_id': 1826369701,
  'total_cost': 8,
  'city_name': 'West Ham',
  'lat': 51.5347,
  'lng': 0.0077,
  'country': 'United Kingdom'},
 {'city_id': 1826587417,
  'total_cost': 4,
  'city_name': 'East Ham',
  'lat': 51.5323,
  'lng': 0.0554,
  'country': 'United Kingdom'},
 {'city_id': 1826397253,
  'total_cost': 8,
  'city_name': 'Goodmayes',
  'lat': 51.5584,
  'lng': 0.1119,
  'country': 'United Kingdom'},
 {'city_id': 1826813082,
  'total_cost': 8,
  'city_name': 'Hornchurch',
  'lat': 51.5565,
  'lng': 0.2128,
  'country': 'United Kingdom'},
 {'city_id': 1826422421,
  'total_cost': 8,
  'city_name': 'South Ockendon',
  'lat': 51.5207,
  'lng': 0.2956,
  'country': 'United Kingdom'},
 {'city_id': 18269049

In [385]:
latitudes = [item['lat'] for item in route]
longitudes = [item['lng'] for item in route]

coordinates = zip(latitudes,longitudes)


import folium


# Create a map centered at a specific location
m = folium.Map(location=[0, 0], zoom_start=2)

# Add markers for each coordinate
for coord in coordinates:
    folium.Marker(coord).add_to(m)

# Save the map to an HTML file or display it in a Jupyter Notebook
m.save("map.html")

In [386]:
total_cost/24

49.5