In [1]:
import json
from geopy.geocoders import GoogleV3
from geopy import distance
import pandas as pd
import numpy as np
import random
import glob
from scipy.cluster.hierarchy import dendrogram, linkage
import os

In [2]:
with open("maps.key", 'r') as f:
    maps_key = f.read()
geolocator = GoogleV3(api_key=maps_key)

In [3]:
# city_list = [d.split('/')[-1] for d in glob.glob('hub_gpx/*')]

with open(f"hub_umap_urls.json", "r") as infile:
    city_urls = json.load(infile)
    
city_list =list(city_urls.keys())

In [4]:
# city_list = """New York-Newark-Bridgeport, NY-NJ-CT-PA CSA
# Los Angeles-Long Beach-Riverside, CA CSA
# Chicago-Naperville-Michigan City, IL-IN-WI CSA
# Washington DC
# Boston-Worcester-Manchester, MA-RI-NH CSA
# San Francisco Bay Area
# Dallas-Fort Worth, TX CSA
# Philadelphia-Camden-Vineland, PA-NJ-DE-MD CSA
# Houston-Baytown-Huntsville, TX CSA
# Atlanta-Sandy Springs-Gainesville, GA-AL CSA
# Miami-Fort Lauderdale-Pompano Beach, FL MSA
# Detroit-Warren-Flint, MI CSA
# Seattle-Tacoma-Olympia, WA CSA
# Minneapolis-St. Paul-St. Cloud, MN-WI CSA
# Denver-Aurora-Boulder, CO CSA
# San Diego-Carlsbad-San Marcos, CA MSA
# St. Louis-St. Charles-Farmington, MO-IL CSA
# Cleveland-Akron-Elyria, OH CSA
# Orlando-Deltona-Daytona Beach, FL CSA
# Tampa-St. Petersburg-Clearwater, FL MSA
# Sacramento–Arden-Arcade–Yuba City, CA-NV CSA
# Pittsburgh-New Castle-Weirton, PA-WV-OH CSA
# Charlotte-Gastonia-Salisbury, NC-SC CSA
# Portland-Vancouver-Hillsboro, OR-WA MSA
# San Antonio-New Braunfels, TX MSA
# Cincinnati-Middletown-Wilmington, OH-KY-IN CSA
# Kansas City-Overland Park-Kansas City, MO-KS CSA
# Indianapolis-Anderson-Columbus, IN CSA
# Austin-Round Rock-Marble Falls, TX CSA
# Raleigh-Durham-Cary, NC CSA
# Salt Lake City-Ogden-Clearfield, UT CSA
# Milwaukee-Racine-Waukesha, WI CSA
# Virginia Beach-Norfolk-Newport News, VA-NC MSA
# Greensboro–Winston-Salem–High Point, NC CSA
# Jacksonville, FL MSA
# Oklahoma City-Shawnee, OK CSA
# Hartford-East Hartford, CT CSA
# Grand Rapids-Muskegon-Holland, MI CSA
# Memphis, TN-MS-AR MSA
# Greenville-Spartanburg-Anderson, SC CSA
# Richmond, VA MSA
# New Orleans-Metairie-Bogalusa, LA CSA
# Buffalo-Cheektowaga-Olean, NY CSA
# Birmingham-Hoover-Cullman, AL CSA
# Albany-Schenectady-Amsterdam, NY CSA
# Rochester-Batavia-Seneca Falls, NY CSA
# Fresno-Madera, CA CSA
# Tucson, AZ MSA
# Omaha-Council Bluffs-Fremont, NE-IA CSA
# Albuquerque, NM MSA
# Montreal Canada
# Vancouver Canada""".split('\n')

### places with no amtrak
# Louisville/Jefferson County–Elizabethtown–Bardstown, KY-IN CSA
# Honolulu, HI MSA
# Knoxville-Sevierville-La Follette, TN CSA
# Las Vegas-Paradise-Pahrump, NV CSA
# Nashville-Davidson–Murfreesboro–Columbia, TN CSA
# San Juan-Caguas-Fajardo, PR CSA
# Tulsa-Bartlesville, OK CSA
# Columbus-Marion-Chillicothe, OH CSA
# Dayton-Springfield-Greenville, OH CSA


In [5]:
city_loc = []
for w in city_list:
    name = w.split('-')[0]
    location = geolocator.geocode(w.split('-')[0])
    if location:
        city_loc.append({'name': name, 'location': location[1]})

In [12]:
radius = 60
feature_dict = [
    {
      "type": "Feature",
      "properties": {
        "_umap_options": {
          "opacity": 0.9,
          "fill_opacity": 0.9,
          "color": "#"+''.join([random.choice('ABCDEF0123456789') for i in range(6)])
        },
        "description": "",
        "name": f"[[https://solarpunktravel.org/hubs/{i['name']}|{i['name'].replace('_',' ').title()}]]" 
      },
      "geometry": {
        "type": "Polygon",
        "coordinates": [[
            [distance.distance(miles=radius).destination(i['location'],bearing=b)[1],
             distance.distance(miles=radius).destination(i['location'], bearing=b)[0]]
            for b in range(0,350,10)]]
      }
    } for i in city_loc]

In [13]:
with open("main_hub_map.geojson", "w") as outfile:
    json.dump({"type": "FeatureCollection", "features": feature_dict}, outfile, indent=4)

In [9]:
# utility functions
def get_bb(loc,dist):
    min_lat = distance.distance(miles=dist).destination(loc, bearing=180)[0]
    max_lat = distance.distance(miles=dist).destination(loc, bearing=0)[0]
    min_lon = distance.distance(miles=dist).destination(loc, bearing=270)[1]
    max_lon = distance.distance(miles=dist).destination(loc, bearing=90)[1]
    return [min_lat,max_lat,min_lon,max_lon]

# distance function
def mydiff(a,b):
    return distance.distance(a,b).miles


### load campgrounds for each city

In [10]:
# load all the campgrounds
camps = pd.concat([pd.read_csv(f"csv_dbs/{s}", encoding = 'unicode_escape', header=None) for s in ['WestCamp.csv','MidwestCamp.csv','SouthCamp.csv','NortheastCamp.csv','SouthwestCamp.csv','WestCamp.csv','CanadaCamp.csv']])

city_dict = []
dist = 75
min_dist = 10
for c in city_loc:
    loc = c['location']
    min_lat = distance.distance(miles=dist).destination(loc, bearing=180)[0]
    max_lat = distance.distance(miles=dist).destination(loc, bearing=0)[0]
    min_lon = distance.distance(miles=dist).destination(loc, bearing=270)[1]
    max_lon = distance.distance(miles=dist).destination(loc, bearing=90)[1]
    tst = camps.loc[(camps[0]>min_lon)&(camps[0]<max_lon)&(camps[1]>min_lat)&(camps[1]>max_lon)].reset_index(drop=True)
    loc_array = np.array(tst[[1,0]])
    meas = np.array([distance.distance(loc,l).miles for l in loc_array])
    subset = tst.loc[(meas<dist) & (meas>min_dist) & (~tst[5].isin(['MIL',' ']))].reset_index(drop=True)
    city_dict.append({'name': c['name'], 'df':subset, 'lat': c['location'][0], 'lon': c['location'][1]})


In [11]:
assert len([city_ind for city_ind in range(len(city_dict)) if city_dict[city_ind]['df'].shape[0]==0])==0

In [12]:
# colors = ["#"+''.join([random.choice('ABCDEF0123456789') for i in range(6)]) for j in range(len(clust_dict))]
from unique_color import unique_color as uc
bad_colors = ['#fff2ec', '#faebd7', '#cae7e7', '#e8def6']
colors = [col for col in uc.unique_color_hex() if col not in bad_colors]

### iterate through hubs, build camp and route clusters

In [15]:
dont_overwrite = False
for city_ind in range(len(city_dict)):
    city_pretty = city_dict[city_ind]['name']
    city_name = city_dict[city_ind]['name'].lower().replace(' ','_')
    city_df = city_dict[city_ind]['df']
    city_lat = city_dict[city_ind]['lat']
    city_lon = city_dict[city_ind]['lon']
    if not os.path.exists(f"hub_gpx/{city_name}/routes.json") or city_df.shape[0]==0 or city_name=='detroit':
        print(f"error {city_name}")
        continue
    if dont_overwrite and os.path.exists(f"hub_gpx/{city_name}/umap_routes_simple.geojson"):
        print(f"skipping {city_name}")
        continue
    print(f"running {city_name}")
    with open(f"hub_gpx/{city_name}/routes.json", "r") as infile:
        routes = json.load(infile)
    
    ###
    #  Cluster the campgrounds together
    ###
    # generate the linkage matrix
    X = city_df[[1,0]].values
    Z = linkage(X, method='complete', metric=mydiff)
    from scipy.cluster.hierarchy import fcluster
    max_d = 10      
    clusters = fcluster(Z, max_d, criterion='distance')
    city_df['cluster']=clusters
    city_clust = city_df.groupby('cluster').mean()[[0,1]]
    
    ###
    #  reduce the number of points in the route
    ###
    
    max_diff_lim = 3
    for i in range(len(routes)):
        max_diff = 100
        div = 4
        while max_diff>max_diff_lim and len(routes[i]['coord'])/div>1:
            tmp = routes[i]['coord'][::int(len(routes[i]['coord'])/div)]
            max_diff = max([mydiff(tmp[j][::-1],tmp[j+1][::-1]) for j in range(len(tmp)-1)])
            div+=5
        routes[i]['simpl']=tmp
    clust_dict = [{'name': city_clust.iloc[j].name, 
                   'loc': city_clust.iloc[j][[0,1]],
                   'routes': []}
                  for j in range(city_clust.shape[0])]
    
    ###
    #  keep routes that are close enough to the city center
    ###
    city_dist_thres=10
    to_keep = []
    for i in range(len(routes)):
        tmp2 = routes[i]['simpl']
        min_tst = min([mydiff(tmp2[k][::-1],[city_lat,city_lon]) for k in range(len(tmp2))])
        if min_tst<city_dist_thres:
            to_keep.append(routes[i])
    routes = to_keep
    
    ###
    #  add routes to clusters if they are close enough to cluster
    #  also clip the route so it doesnt extend beyond the cluster
    ###
    min_thres = 5
    cluster_routes = []
    for i in range(len(routes)):
        tmp2 = routes[i]['simpl']
        for j in range(city_clust.shape[0]):
            min_tst = min([mydiff(tmp2[k][::-1],city_clust.iloc[j][[1,0]]) for k in range(len(tmp2))])
            if min_tst<min_thres:
                c_dist = mydiff([city_lat,city_lon],city_clust.iloc[j][[1,0]])+min_thres/2
#                 bb = get_bb([city_lat,city_lon],c_dist)
#                 new_route = [l for l in routes[i]['simpl'] if (l[1]>bb[0]) & (l[1]<bb[1]) & (l[0]>bb[2]) & (l[0]<bb[3])]
                new_route = [l for l in routes[i]['simpl'] if mydiff([city_lat,city_lon],[l[1],l[0]]) < c_dist]
                clust_dict[j]['routes'].append(new_route)
                ims = [im for im in routes[i]['images'] if 'secret' in im]
                r_desc = [d for d in routes[i]['description']]
                mean_lat = (city_clust.iloc[j][1]+city_lat)/2
                mean_lon = (city_clust.iloc[j][0]+city_lon)/2
                cluster_routes.append({'cluster':j, 'name':routes[i]['name'], 'description':r_desc[0] if len(r_desc)>0 else '', 'image':ims[0] if len(ims)>0 else '', 'url':routes[i]['url'],'lat':mean_lat,'lon':mean_lon})
    rout_db = pd.DataFrame.from_records(cluster_routes)
    
#     # only pick out the good clusters
    good_clusts = {j:i for i, j in enumerate([j for j in range(len(clust_dict)) if len(clust_dict[j]['routes'])>0])}
    
    rout_db['journey'] = rout_db['cluster'].apply(lambda j: good_clusts[j])
    rout_db.to_csv(f"hub_gpx/{city_name}/rout_db.csv")
    
    # real quick save of the data in city_df for good clusters
    camp_df = city_df.loc[city_df['cluster'].isin(list(good_clusts.keys()))]
    camp_df['journey'] = camp_df['cluster'].apply(lambda j: good_clusts[j])
    camp_df.to_csv(f"hub_gpx/{city_name}/camp_db.csv")
    
    
    ###
    #  draw everything
    ###
    
    radius = 3
    tiny_inc = 0.01
    feature_dict = [
        {
          "type": "Feature",
          "properties": {
            "name": f"[[https://solarpunktravel/hubs/{city_name}?journey={good_clusts[j]}|{city_pretty} Journey {good_clusts[j]+1}]]" ,
            "description": '',
            "_umap_options": {
              "color": colors[good_clusts[j]],
              "weight": "15",
              "opacity": "0.7"
            }
          },
          "geometry": {
            "type": "LineString",
            "coordinates": (np.array(r)+
                            np.repeat([[tiny_inc*(np.random.random()-0.5),
                                        tiny_inc*(np.random.random()-0.5)]],len(r),axis=0)).tolist()
          }
        } for j in good_clusts for r in clust_dict[j]['routes'] ]

    feature_dict += [
        {
          "type": "Feature",
          "properties": {
            "_umap_options": {
              "opacity": 0.7,
              "fillOpacity": 0.7,
              "color": colors[good_clusts[j]]
            },
            "description": "",
            "name": f"[[https://solarpunktravel/hubs/{city_name}?journey={good_clusts[j]}|{city_pretty} Journey {good_clusts[j]+1}]]"
          },
          "geometry": {
            "type": "Polygon",
            "coordinates": [[
                [distance.distance(miles=radius).destination(clust_dict[j]['loc'].values[::-1],bearing=b)[1],
                 distance.distance(miles=radius).destination(clust_dict[j]['loc'].values[::-1], bearing=b)[0]]
                for b in range(0,350,10)]]
          }
        } for j in good_clusts]
    
    feature_dict += [
        {
          "type": "Feature",
          "properties": {
            "_umap_options": {
              "iconClass": "Drop",
              "iconUrl": "/uploads/pictogram/campsite-24-white.png",
              "color": colors[good_clusts[j]]
            },
            "description": "",
            "name": f"[[https://solarpunktravel/hubs/{city_name}?journey={good_clusts[j]}|{city_pretty} Journey {good_clusts[j]+1}]]" 
          },
          "geometry": {
            "type": "Point",
            "coordinates": [
              i[1],
              i[2]
            ]
          }
        }  for j in good_clusts for i in city_df.loc[city_df['cluster']==clust_dict[j]['name']].to_records()  ]

    with open(f"hub_gpx/{city_name}/umap_routes_simple.geojson", "w") as outfile:
        json.dump({"type": "FeatureCollection", "features": feature_dict}, outfile, indent=4)

running vancouver


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


running austin
running los_angeles
running memphis
running seattle
running madison
running chicago
running montreal
running whitefish
running toronto
running raleigh
running oklahoma_city
running minneapolis
running tampa
running miami
running philadelphia
running cincinnati
running portland
running atlanta
running savannah
running pittsburgh
running new_orleans
running new_york
running denver
running tucson
running boston
running charleston
error detroit
running washington_dc
running san_francisco
running st_louis
running virginia_beach
running albuquerque
running salt_lake_city


In [16]:
# load all the other resources
food = pd.read_csv('csv_dbs/food_coops.csv')
bike = pd.read_csv('csv_dbs/bike_coops.csv')
gear = pd.read_csv('csv_dbs/used_gear.csv')
food_dict = {}
bike_dict = {}
gear_dict = {}
dist = 50
for c in city_loc:
    city_name = c['name']
    loc = c['location']
    min_lat = distance.distance(miles=dist).destination(loc, bearing=180)[0]
    max_lat = distance.distance(miles=dist).destination(loc, bearing=0)[0]
    min_lon = distance.distance(miles=dist).destination(loc, bearing=270)[1]
    max_lon = distance.distance(miles=dist).destination(loc, bearing=90)[1]
    # food
    tst = food.loc[(food['lon']>min_lon)&(food['lon']<max_lon)&(food['lat']>min_lat)&(food['lat']>max_lon)].reset_index(drop=True)
    loc_array = np.array(tst[['lat','lon']])
    meas = np.array([distance.distance(loc,l).miles for l in loc_array])
    subset = tst.loc[(meas<dist)].reset_index(drop=True)
    subset.to_csv(f"hub_gpx/{city_name}/food_db.csv",index=False)
    food_dict[c['name']]=[{'name':r[3],'url':r[6],'lat':r[9],'lon':r[10]} for r in subset.to_records()]
    # bike
    tst = bike.loc[(food['lon']>min_lon)&(food['lon']<max_lon)&(food['lat']>min_lat)&(food['lat']>max_lon)].reset_index(drop=True)
    loc_array = np.array(tst[['lat','lon']])
    meas = np.array([distance.distance(loc,l).miles for l in loc_array])
    subset = tst.loc[(meas<dist)].reset_index(drop=True)
    subset.to_csv(f"hub_gpx/{city_name}/bike_db.csv",index=False)
    bike_dict[c['name']]=[{'name':r[2],'url':r[6],'lat':r[8],'lon':r[9]} for r in subset.to_records()]
    # gear
    tst = gear.dropna(subset=['lat','lon']).loc[(food['lon']>min_lon)&(food['lon']<max_lon)&(food['lat']>min_lat)&(food['lat']>max_lon)].reset_index(drop=True)
    loc_array = np.array(tst[['lat','lon']])
    meas = np.array([distance.distance(loc,l).miles for l in loc_array])
    subset = tst.loc[(meas<dist)].reset_index(drop=True)
    subset.to_csv(f"hub_gpx/{city_name}/gear_db.csv",index=False)
    gear_dict[c['name']]=[{'name':r[2],'url':r[6],'lat':r[8],'lon':r[9]} for r in subset.to_records()]


In [17]:
dont_overwrite = False
for city_ind in range(len(city_dict)):
    city_pretty = city_dict[city_ind]['name']
    city_name = city_dict[city_ind]['name'].lower().replace(' ','_')
    city_lat = city_dict[city_ind]['lat']
    city_lon = city_dict[city_ind]['lon']
    if dont_overwrite and os.path.exists(f"hub_gpx/{city_name}/umap_resources.geojson"):
        print(f"skipping {city_name}")
        continue
    print(f"running {city_name}")
    
    feature_dict = [
        {
          "type": "Feature",
          "properties": {
            "_umap_options": {
              "iconClass": "Drop",
              "iconUrl": "/uploads/pictogram/grocery-24_1.png",
              "color": "Black"
            },
            "description": "",
            "name": f"[[{i['url']}|{i['name']}]]" 
          },
          "geometry": {
            "type": "Point",
            "coordinates": [
              i['lon'],
              i['lat']
            ]
          }
        }  for i in food_dict[city_name]]
    
    feature_dict += [
        {
          "type": "Feature",
          "properties": {
            "_umap_options": {
              "iconClass": "Drop",
              "iconUrl": "/uploads/pictogram/bicycle-24-white.png",
              "color": "Black"
            },
            "description": "",
            "name": f"[[{i['url']}|{i['name']}]]" 
          },
          "geometry": {
            "type": "Point",
            "coordinates": [
              i['lon'],
              i['lat']
            ]
          }
        }  for i in bike_dict[city_name]]
    
    feature_dict += [
        {
          "type": "Feature",
          "properties": {
            "_umap_options": {
              "iconClass": "Drop",
              "iconUrl": "/uploads/pictogram/shop-24_1.png",
              "color": "Black"
            },
            "description": "",
            "name": f"[[{i['url']}|{i['name']}]]" 
          },
          "geometry": {
            "type": "Point",
            "coordinates": [
              i['lon'],
              i['lat']
            ]
          }
        }  for i in gear_dict[city_name]]

    with open(f"hub_gpx/{city_name}/umap_resources.geojson", "w") as outfile:
        json.dump({"type": "FeatureCollection", "features": feature_dict}, outfile, indent=4)

running vancouver
running austin
running los_angeles
running memphis
running seattle
running madison
running chicago
running montreal
running whitefish
running toronto
running raleigh
running oklahoma_city
running minneapolis
running tampa
running miami
running philadelphia
running cincinnati
running portland
running atlanta
running savannah
running pittsburgh
running new_orleans
running new_york
running denver
running tucson
running boston
running charleston
running detroit
running washington_dc
running san_francisco
running st_louis
running virginia_beach
running albuquerque
running salt_lake_city


In [18]:
city_ims = {}
for city_ind in range(len(city_dict)):
    city_name = city_dict[city_ind]['name'].lower().replace(' ','_')
    with open(f"hub_gpx/{city_name}/routes.json", "r") as infile:
        routes = json.load(infile)
    ims = [r['images'][1] for r in routes if len(r['images'])>1]
    city_ims[city_name]=ims
with open(f"hub_gpx/city_ims.json", "w") as outfile:
    json.dump(city_ims, outfile, indent=4)