In [1]:
import geopandas as gpd
import os
from glob import glob
import pandas as pd
import requests
import zipfile
import time
import json
import shapely

In [2]:
project_folder_path = r"E:\Geofabrik"

In [3]:
download_folder_path = os.path.join(project_folder_path, "download")

In [4]:
unzipped_folder_path = os.path.join(project_folder_path, "unzipped")

In [5]:
organized_data_folder_path = os.path.join(project_folder_path, "organized_landmarks")

In [6]:
combined_data_folder_path = os.path.join(project_folder_path, "organized_landmarks_combined")

In [7]:
geo_map_path = r"C:\Users\jixin\Documents\Bitbucket\geofabrik_landmark_map\map\Amenity List - combined_GF_cleaned.csv"

In [8]:
base_url = "http://download.geofabrik.de/north-america/us/"

In [9]:
update = False

In [10]:
# create root folder
def create_folder(p):
    if not os.path.exists(p):
        os.makedirs(p)

### 1. Download, unzip, and store Geofabrik data from all states

In [11]:
# download
def get_shp_file_link(state):
    if state == 'united states virgin islands':
        state_name = "us-virgin-islands"
    elif state == 'norcal' or state == 'socal':
        state_name = "california/"+state
    else:
        state_name = state.replace(" ", "-")
    return base_url + state_name + "-latest-free.shp.zip"


In [12]:
def download_state_zipfile(state, url):
    r = requests.get(url, allow_redirects=True)
    save_path = os.path.join(download_folder_path, state+'.shp.zip')
    open(save_path, 'wb').write(r.content)
    return save_path

In [13]:
def unzip_file(zipfile_path, zip_to_path):
    with zipfile.ZipFile(zipfile_path, 'r') as zip_ref:
        zip_ref.extractall(zip_to_path)

In [14]:
l = "alabama, alaska, arizona, arkansas, norcal, socal, colorado, connecticut, delaware, district of columbia, florida, georgia, hawaii, idaho, illinois, indiana, iowa, kansas, kentucky, louisiana, maine, maryland, massachusetts, michigan, minnesota, mississippi, missouri, montana, nebraska, nevada, new hampshire, new jersey, new mexico, new york, north carolina, north dakota, ohio, oklahoma, oregon, pennsylvania, puerto rico, rhode island, south carolina, south dakota, tennessee, texas, united states virgin islands, utah, vermont, virginia, washington, west virginia, wisconsin, wyoming"
state_list = l.split(", ")

In [15]:
start = time.time()

create_folder(download_folder_path)
create_folder(unzipped_folder_path)
for state in state_list:
    print(state)
    # update or not
    zip_to_path = os.path.join(unzipped_folder_path, state)
    if not update: # skip state if unzipped file exists
        if os.path.exists(zip_to_path):
            print("skip because unzipped file exists")
            continue
        
    
    # download
    url = get_shp_file_link(state)
    download_save_path = download_state_zipfile(state, url)
    print("    "+url)
    print("    "+"Downloaded to : " + download_save_path)
    
    # unzip
    zipfile_path = os.path.join(download_folder_path, state + '.shp.zip')
    unzip_file(zipfile_path, zip_to_path)
    print("    "+"Unzipped to : " + zip_to_path)
    
end = time.time()
print(f"Runtime of the program is {end - start}")

alabama
skip because unzipped file exists
alaska
skip because unzipped file exists
arizona
skip because unzipped file exists
arkansas
skip because unzipped file exists
norcal
skip because unzipped file exists
socal
skip because unzipped file exists
colorado
skip because unzipped file exists
connecticut
skip because unzipped file exists
delaware
skip because unzipped file exists
district of columbia
skip because unzipped file exists
florida
skip because unzipped file exists
georgia
skip because unzipped file exists
hawaii
skip because unzipped file exists
idaho
skip because unzipped file exists
illinois
skip because unzipped file exists
indiana
skip because unzipped file exists
iowa
skip because unzipped file exists
kansas
skip because unzipped file exists
kentucky
skip because unzipped file exists
louisiana
skip because unzipped file exists
maine
skip because unzipped file exists
maryland
skip because unzipped file exists
massachusetts
skip because unzipped file exists
michigan
skip be

In [16]:
923/60

15.383333333333333

### 2. Parse Geofabrik map

In [17]:
# first run "clean_csv_map" and use "Amenity List - combined_GF_clean.csv"

In [18]:
# convert csv map to json map
map_dict = {"unique_list": [], "map": {}}

with open(geo_map_path) as fp:
    Lines = fp.readlines()
    for line in Lines:  # skip head line
        line_list = line.split(",")
        label_1st = line_list[0]
        label_2nd = line_list[1]
        label_3rd = line_list[2]
        if len(label_1st) > 0:
            level1 = label_1st
            map_dict['map'][level1] = dict()
            map_dict['unique_list'].append(label_1st) 
        if len(label_2nd) > 0:
            level2 = label_2nd
            map_dict['map'][level1][level2] = dict()
            map_dict['unique_list'].append(label_2nd) 
        if len(label_3rd) > 0:
            line_list_lvl3 = [x for x in line_list if (len(x) > 0) and (x != '\n')]
            map_dict['map'][level1][level2][label_3rd] = line_list_lvl3[1:]
            map_dict['unique_list']+= line_list_lvl3
        

In [19]:
map_dict

{'unique_list': ['residential',
  'residential',
  'apartment',
  'studio',
  'dormitory',
  'dorm',
  'fraternity_house',
  'fraternity',
  'house',
  'carriage_house',
  'deckhouse',
  'semidetached_house',
  'townhouse',
  'condominium',
  'condo',
  'condominum',
  'affordable_housing',
  'pavillion',
  'residential',
  'bungalow',
  'cabin',
  'hut',
  'lodge',
  'mansion',
  'mobile_home',
  'residence',
  'static_caravan',
  'group_housing',
  'housing',
  'home',
  'building',
  'building',
  'historic_building',
  'county_building',
  'department_building',
  'music_building',
  'skyscraper',
  'cultural_center',
  'engineering_building',
  'organization',
  'maintenance_building',
  'service',
  'education',
  'college',
  'college_building',
  'childcare',
  'childcare_facility',
  'day_care',
  'preschool',
  'kindergarten',
  'school',
  'high_school',
  'music_school',
  'classroom',
  'library',
  'university_library',
  'university',
  'education',
  'academic',
  'acad

### 3. Extract and store Geofabrik data based on label map

In [20]:
layers = ["buildings", "landuse", "natural", "places", "pofw", "pois", "railways", "roads", "traffic", "transport", "water", "waterways"]

In [21]:

# def extract_data_for_label(labels, lvl3_path, category):
#     if category == "point":
#         suffix = "_free_*.shp"
#         filename_suffix = "_point"
#     elif category == "polygon":
#         suffix = "_a_free_*.shp"
#         filename_suffix = "_polygon"
#     else:
#         print("wrong type of file requested")
#         return
    
#     layer_df = pd.DataFrame()
#     for layer in layers:
#         for state in state_list:
#             state_folder_path = os.path.join(unzipped_folder_path, state)
#             state_shapefile_path = os.path.join(state_folder_path, "gis_osm_" + layer + suffix)
#             shapefile_finding_list = list(glob(state_shapefile_path))
#             if len(shapefile_finding_list) > 1:
#                 print("{} shapefiles found for {} for state {}".format(len(shapefile_finding_list), labels, state))

#             for p_shapefile in shapefile_finding_list:
#                 shpfile_df = gpd.read_file(p_shapefile)
#                 column_to_check = ["fclass", "type"]
#                 column_to_check = list(set(shpfile_df.columns).intersection(column_to_check))

                
#                 for label in labels:
#                     for colname in column_to_check:
#                         shpfile_df_label = shpfile_df[shpfile_df[colname] == label]
#                         layer_df = layer_df.append(shpfile_df_label)

#     # save extraction to shapefile
#     if layer_df.shape[0] > 0:
#         layer_df.to_file(lvl3_path + os.sep + labels[0] + filename_suffix + ".shp")
#     else:
#         print("                  No {} file found for {}".format(category, labels[0]))

# # extract function
# def organize_data_for_label(lvl3_label, label_map_lvl2, lvl3_path):
#     synonyms = label_map_lvl2[lvl3_label]
#     labels = [x for x in synonyms]
#     labels.insert(0, lvl3_label)
#     # point
#     extract_data_for_label(labels, lvl3_path, "point")
#     # polygon
#     extract_data_for_label(labels, lvl3_path, "polygon")

# # extract pipeline
# label_map = map_dict['map']
# create_folder(organized_data_folder_path)
# for lvl1_label in label_map: # from top level to bottom level
#     print("Level 1 : "+lvl1_label)
#     lvl1_path = organized_data_folder_path + os.sep + lvl1_label
#     create_folder(lvl1_path)
#     for lvl2_label in label_map[lvl1_label]:
#         print("  Level 2 : "+lvl2_label)
#         lvl2_path = lvl1_path + os.sep + lvl2_label
#         create_folder(lvl2_path)
#         for lvl3_label in label_map[lvl1_label][lvl2_label]:
#             print("    Level 3 : "+lvl3_label)
#             lvl3_path = lvl2_path + os.sep + lvl3_label
#             if not update:
#                 if len(os.listdir(path)) > 0:
#                     continue
#             create_folder(lvl3_path)
#             organize_data_for_label(lvl3_label, label_map[lvl1_label][lvl2_label], lvl3_path)

In [22]:
# extract 
# state - point/polygon - layer - label

In [23]:
def clean_tag_column(tag_series):
    
    cleaned_series = tag_series.str.lower()
    cleaned_series = cleaned_series.str.replace(" ", "_", case = False)
    cleaned_series = pd.Series([x[:-1] if x.endswith('s') else x for x in cleaned_series])
    
    return cleaned_series

In [24]:
def extract_all_landmarks_from_shapefile(label_map, organized_data_folder_path, shpfile_df_single_tag, shpfile_df_multi_tag, column_to_check, state, category, layer, shpfile_num):
    included_num = 0
    for lvl1_label in label_map: # from top level to bottom level
    #     print("Level 1 : "+lvl1_label)
        lvl1_path = organized_data_folder_path + os.sep + lvl1_label
        create_folder(lvl1_path)
        for lvl2_label in label_map[lvl1_label]:
    #         print("  Level 2 : "+lvl2_label)
            lvl2_path = lvl1_path + os.sep + lvl2_label
            label_map_lvl2 = label_map[lvl1_label][lvl2_label]
            create_folder(lvl2_path)
            for lvl3_label in label_map[lvl1_label][lvl2_label]:
    #             print("    Level 3 : "+lvl3_label)
                lvl3_path = lvl2_path + os.sep + lvl3_label
                lvl3_file_name = "_".join([lvl3_label, state, layer, shpfile_num, category+".shp"])
                lvl3_file_path = os.path.join(lvl3_path, lvl3_file_name)
                if not update:
                    if os.path.exists(lvl3_file_path):
                        continue
                create_folder(lvl3_path)
                synonyms = label_map_lvl2[lvl3_label]
                labels = [x for x in synonyms]
                labels.insert(0, lvl3_label)

                df_label = pd.DataFrame()
                for label in labels:
                    shpfile_df_single_tag_label = shpfile_df_single_tag[shpfile_df_single_tag[column_to_check] == label]
                    shpfile_df_multi_tag_label = shpfile_df_multi_tag[shpfile_df_multi_tag[column_to_check].str.contains(label)]
                    df_label = df_label.append(shpfile_df_single_tag_label)
                    df_label = df_label.append(shpfile_df_multi_tag_label)

                # save extraction to shapefile
                if df_label.shape[0] > 0:
                    df_label.to_file(lvl3_file_path)
                    included_num += df_label.shape[0]
    return included_num

In [25]:
state_list

['alabama',
 'alaska',
 'arizona',
 'arkansas',
 'norcal',
 'socal',
 'colorado',
 'connecticut',
 'delaware',
 'district of columbia',
 'florida',
 'georgia',
 'hawaii',
 'idaho',
 'illinois',
 'indiana',
 'iowa',
 'kansas',
 'kentucky',
 'louisiana',
 'maine',
 'maryland',
 'massachusetts',
 'michigan',
 'minnesota',
 'mississippi',
 'missouri',
 'montana',
 'nebraska',
 'nevada',
 'new hampshire',
 'new jersey',
 'new mexico',
 'new york',
 'north carolina',
 'north dakota',
 'ohio',
 'oklahoma',
 'oregon',
 'pennsylvania',
 'puerto rico',
 'rhode island',
 'south carolina',
 'south dakota',
 'tennessee',
 'texas',
 'united states virgin islands',
 'utah',
 'vermont',
 'virginia',
 'washington',
 'west virginia',
 'wisconsin',
 'wyoming']

In [38]:
start = time.time()

#
label_map = map_dict['map']
create_folder(organized_data_folder_path)

#
label_count_dict = dict()
for layer in layers:
    label_count_dict[layer] = {"none": 0, "labeled": 0, "included": 0}

#
for state in state_list:
    print(state)
    start_state = time.time()
    state_folder_path = os.path.join(unzipped_folder_path, state)
    for category in ["point", "polygon"]:
        print("  "+category)
        if category == "point":
            suffix = "_free_*.shp"
            filename_suffix = "_point"
        elif category == "polygon":
            suffix = "_a_free_*.shp"
            filename_suffix = "_polygon"
        else:
            pass
        
        for layer in layers:          
            print("    "+layer)
            shapefile_pattern = os.path.join(state_folder_path, "gis_osm_" + layer + suffix)
            shapefile_finding_list = list(glob(shapefile_pattern))
            if len(shapefile_finding_list) > 1:
                print("{} shapefiles found for {} for state {}".format(len(shapefile_finding_list), labels, state))

            for p_shapefile in shapefile_finding_list:
                shpfile_df = gpd.read_file(p_shapefile)
                label_count_dict[layer]["none"] += shpfile_df.shape[0]
                column_to_check = ["fclass", "type"]
                column_to_check = list(set(shpfile_df.columns).intersection(column_to_check))
                if len(column_to_check) > 1:
                    if len(shpfile_df['fclass'].unique()) > len(shpfile_df['type'].unique()):
                        column_to_check = "fclass"
                    else:
                        column_to_check = "type"
                else:
                    column_to_check = column_to_check[0]
                print("      column to check : "+column_to_check)
#                 column_to_check = list(set(shpfile_df.columns).intersection(column_to_check))
                shpfile_num = p_shapefile.split("_free_")[1].strip(".shp")
    
                # clean label columns: lower case, replace space with underscore, remove ending s, split by ";"
                shpfile_df["tag_col_type"] = ["none" if x is None else "str" for x in shpfile_df[column_to_check]]
                shpfile_df = shpfile_df[shpfile_df["tag_col_type"]!="none"]
                label_count_dict[layer]["labeled"] += shpfile_df.shape[0]
                shpfile_df.reset_index(inplace=True, drop=True)
                shpfile_df[column_to_check] = clean_tag_column(shpfile_df[column_to_check])
                shpfile_df[column_to_check] = shpfile_df[column_to_check].astype(str)
                shpfile_df_single_tag = shpfile_df[~shpfile_df[column_to_check].str.contains(";")]
                shpfile_df_single_tag.reset_index(inplace=True, drop=True)
                shpfile_df_multi_tag = shpfile_df[shpfile_df[column_to_check].str.contains(";")]
                shpfile_df_multi_tag.reset_index(inplace=True, drop=True)
                
                included_num = extract_all_landmarks_from_shapefile(label_map, organized_data_folder_path, shpfile_df_single_tag, shpfile_df_multi_tag, column_to_check, state, category, layer, shpfile_num)
                label_count_dict[layer]["included"] += included_num
    end = time.time()
    print(f"Runtime for {state} is {end - start_state}")
                
end = time.time()
print(f"Total runtime of the program is {end - start}")

alabama
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for alabama is 351.070111989975
alaska
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for alaska is 349.9743502140045
arizona
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for arizona is 806.4459872245789
arkansas
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for arkansas is 219.23210310935974
norcal
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type






    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for norcal is 1198.057992219925
socal
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for socal is 1958.9410653114319
colorado
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for colorado is 749.5618016719818
connecticut
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for connecticut is 349.02028155326843
delaware
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for delaware is 96.22400140762329
district of columbia
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass


  """


    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for district of columbia is 73.53095483779907
florida
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for florida is 1337.3786442279816
georgia
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for georgia is 828.4684479236603
hawaii
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for hawaii is 81.7249186038971
idaho
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for idaho is 212.11324405670166
illinois
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for illinois is 835.6649358272552
indiana
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for indiana is 520.6965026855469
iowa
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for iowa is 353.3661346435547
kansas
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for kansas is 318.1258533000946
kentucky
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for kentucky is 358.76313614845276
louisiana
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for louisiana is 282.9899973869324
maine
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for maine is 173.7560007572174
maryland
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for maryland is 549.5889971256256
massachusetts
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for massachusetts is 587.5617454051971
michigan
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for michigan is 926.4600129127502
minnesota
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for minnesota is 645.3840279579163
mississippi
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for mississippi is 216.0882179737091
missouri
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for missouri is 472.8564991950989
montana
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for montana is 190.99060463905334
nebraska
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for nebraska is 244.04424023628235
nevada
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for nevada is 210.8523600101471
new hampshire
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for new hampshire is 169.18773412704468
new jersey
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for new jersey is 447.1407036781311
new mexico
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for new mexico is 268.9413504600525
new york
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type






    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for new york is 1171.3337559700012
north carolina
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for north carolina is 893.650999546051
north dakota
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for north dakota is 224.1100001335144
ohio
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for ohio is 883.1849994659424
oklahoma
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for oklahoma is 385.7540011405945
oregon
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for oregon is 477.03399991989136
pennsylvania
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for pennsylvania is 802.2789287567139
puerto rico
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for puerto rico is 231.20795392990112
rhode island
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for rhode island is 124.33527302742004
south carolina
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for south carolina is 381.53382396698
south dakota
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for south dakota is 128.11101150512695
tennessee
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for tennessee is 505.89897179603577
texas
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type






    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for texas is 1944.2571330070496
united states virgin islands
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass




    railways
      column to check : fclass


  """


    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass
  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass
    pofw
      column to check : fclass




    pois
      column to check : fclass




    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for united states virgin islands is 46.83299970626831
utah
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for utah is 306.2599995136261
vermont
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for vermont is 101.38899993896484
virginia
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for virginia is 944.336000919342
washington
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for washington is 702.3729994297028
west virginia
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for west virginia is 178.08899998664856
wisconsin
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for wisconsin is 658.613285779953
wyoming
  point
    buildings
    landuse
    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
      column to check : fclass
    roads
      column to check : fclass




    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
    waterways
      column to check : fclass




  polygon
    buildings
      column to check : type




    landuse
      column to check : fclass




    natural
      column to check : fclass




    places
      column to check : fclass




    pofw
      column to check : fclass




    pois
      column to check : fclass






    railways
    roads
    traffic
      column to check : fclass




    transport
      column to check : fclass




    water
      column to check : fclass




    waterways
Runtime for wyoming is 138.63462233543396
Total runtime of the program is 27613.426696062088


In [27]:
map_file_path = os.path.abspath(os.path.join(geo_map_path, os.pardir))

label_count_path = os.path.join(map_file_path, "label_count_dict.json")

with open(label_count_path, 'w') as fp:
    json.dump(label_count_dict, fp)

NameError: name 'label_count_dict' is not defined

In [40]:
10311.742320775986/60/60

2.8643728668822184

In [41]:
label_count_dict

{'buildings': {'none': 54022556, 'labeled': 17350545, 'included': 15497353},
 'landuse': {'none': 3653811, 'labeled': 3653811, 'included': 3636876},
 'natural': {'none': 2259112, 'labeled': 2259112, 'included': 2264993},
 'places': {'none': 205764, 'labeled': 205764, 'included': 512},
 'pofw': {'none': 237943, 'labeled': 237943, 'included': 237943},
 'pois': {'none': 2991164, 'labeled': 2991164, 'included': 2621679},
 'railways': {'none': 402013, 'labeled': 402013, 'included': 0},
 'roads': {'none': 38316529, 'labeled': 38316529, 'included': 29376717},
 'traffic': {'none': 5728897, 'labeled': 5728897, 'included': 1105575},
 'transport': {'none': 279300, 'labeled': 279300, 'included': 279300},
 'water': {'none': 2252830, 'labeled': 2252830, 'included': 2220559},
 'waterways': {'none': 4817131, 'labeled': 4817131, 'included': 286639}}

In [42]:
print("included/labeled")
for layer in label_count_dict:
    print(f"  {layer} : {round(label_count_dict[layer]['included']/label_count_dict[layer]['labeled'],3)}")

included/labeled
  buildings : 0.893
  landuse : 0.995
  natural : 1.003
  places : 0.002
  pofw : 1.0
  pois : 0.876
  railways : 0.0
  roads : 0.767
  traffic : 0.193
  transport : 1.0
  water : 0.986
  waterways : 0.06


### 4. New unclassified labels 

In [28]:
old_list = set(map_dict['unique_list'])

In [29]:
start = time.time()
new_list = set()

for state in state_list:
    print(state)
    state_folder_path = os.path.join(unzipped_folder_path, state)
    state_shapefile_path = os.path.join(state_folder_path, "gis_osm_*.shp")
    shapefile_finding_list = list(glob(state_shapefile_path))

    for p_shapefile in shapefile_finding_list:
        shpfile_df = gpd.read_file(p_shapefile)
        column_to_check = ["fclass", "type"]
        column_to_check = list(set(shpfile_df.columns).intersection(column_to_check))
        for colname in column_to_check:
            new_labels = set(shpfile_df[colname].unique())
            new_list = new_list.union(new_labels)
            
end = time.time()
print(f"Runtime of the program is {end - start}")

alabama


KeyboardInterrupt: 

In [None]:
new_list - old_list

In [None]:
map_file_path = os.path.abspath(os.path.join(geo_map_path, os.pardir))

In [None]:
pd.DataFrame({"new_label":list(new_list - old_list)}).to_csv(os.path.join(map_file_path, "new_label_list.csv"))

#### 4.1 New unclassified labels in layer

In [None]:
layer_to_check = ["buildings","pois"]

In [None]:
label_dict = {"raw_data_labels": {}, "organized_data_labels": {}}

In [None]:
start = time.time()
for layer in layer_to_check:
    print(layer)
    raw_list = set()
    organized_list = set()
    
    # raw data label
    for state in state_list:
        print(state)
        state_folder_path = os.path.join(unzipped_folder_path, state)
        state_shapefile_path = os.path.join(state_folder_path, "gis_osm_"+ layer + "*.shp")
        shapefile_finding_list = list(glob(state_shapefile_path))

        for p_shapefile in shapefile_finding_list:
            shpfile_df = gpd.read_file(p_shapefile)
            column_to_check = ["fclass", "type"]
            column_to_check = list(set(shpfile_df.columns).intersection(column_to_check))

            if len(column_to_check) > 1:
                if len(shpfile_df['fclass'].unique()) > len(shpfile_df['type'].unique()):
                    column_to_check = "fclass"
                else:
                    column_to_check = "type"
            else:
                column_to_check = column_to_check[0]
                
            new_labels = set(shpfile_df[column_to_check].unique())
            raw_list = raw_list.union(new_labels)
            
    # organized data label
    for lvl1 in os.listdir(organized_data_folder_path):
        print(lvl1)
        lvl1_path = os.path.join(organized_data_folder_path, lvl1)
        for lvl2 in os.listdir(lvl1_path):
            lvl2_path = os.path.join(lvl1_path, lvl2)
            for lvl3 in os.listdir(lvl2_path):
                lvl3_path = os.path.join(lvl2_path, lvl3)
                for target_file in glob(os.path.join(lvl3_path, "*"+layer+"*.shp")):
                    shpfile_df = gpd.read_file(target_file)
                    column_to_check = ["fclass", "type"]
                    column_to_check = list(set(shpfile_df.columns).intersection(column_to_check))

                    if len(column_to_check) > 1:
                        if len(shpfile_df['fclass'].unique()) > len(shpfile_df['type'].unique()):
                            column_to_check = "fclass"
                        else:
                            column_to_check = "type"
                    else:
                        column_to_check = column_to_check[0]

                    new_labels = set(shpfile_df[column_to_check].unique())
                    organized_list = organized_list.union(new_labels)
                    
    label_dict["raw_data_labels"][layer] = list(raw_list)
    label_dict["organized_data_labels"][layer] = list(organized_list)
    
end = time.time()
print(f"Runtime of the program is {end - start}")

In [None]:
label_dict.keys()

In [None]:
label_dict['raw_data_labels'].keys()

In [None]:
def compare_label_list(raw_list, organized_list):
    excluded_list = []
    raw_list_clean = clean_tag_column(pd.Series(raw_list))
    for rlabel in raw_list_clean:
        for olabel in organized_list:
            if olabel in rlabel:
                break
        else:
            excluded_list.append(rlabel)
    return excluded_list

In [None]:
label_dict['excluded_data_labels'] = dict()
for layer in layer_to_check:
    raw_list = label_dict['raw_data_labels'][layer]
    raw_list = [x for x in raw_list if x is not None]
    organized_list = label_dict['organized_data_labels'][layer]
    label_dict['excluded_data_labels'][layer] = compare_label_list(raw_list, organized_list)
    

In [None]:
map_file_path = os.path.abspath(os.path.join(geo_map_path, os.pardir))

excluded_label_dict_path = os.path.join(map_file_path, "excluded_label_dict.json")

with open(excluded_label_dict_path, 'w') as fp:
    json.dump(label_dict, fp)

In [None]:
label_dict['excluded_data_labels']['buildings']

### 5. Combine files for states and layers

In [32]:
df_point_path

'E:\\Geofabrik\\organized_landmarks_combined\\residential\\residential\\residential\\residential_point.shp'

In [33]:
lvl3_path

'E:\\Geofabrik\\organized_landmarks\\residential\\residential\\residential'

In [34]:
df_line

Unnamed: 0,osm_id,code,fclass,name,ref,oneway,maxspeed,layer,bridge,tunnel,tag_col_ty,geometry
0,6197464,5122,residential,Sugar Creek Court,,B,0,0,F,F,str,"LINESTRING (-86.43325 32.45608, -86.43314 32.4..."
1,6197466,5122,residential,Odell Street,,B,0,0,F,F,str,"LINESTRING (-86.44540 32.46633, -86.44518 32.4..."
2,6197468,5122,residential,Dodgers Drive,,B,0,0,F,F,str,"LINESTRING (-86.42030 32.43132, -86.42022 32.4..."
3,6197476,5122,residential,Shady Valley Road,,B,0,0,F,F,str,"LINESTRING (-86.78818 32.43809, -86.78826 32.4..."
4,6197480,5122,residential,Berry Lane,,B,0,0,F,F,str,"LINESTRING (-86.44990 32.52011, -86.44998 32.5..."
...,...,...,...,...,...,...,...,...,...,...,...,...
9443368,1066607301,5122,residential,,,B,0,0,F,F,str,"LINESTRING (-106.32537 42.86349, -106.32497 42..."
9443369,1066607886,5122,residential,Bryan-Evansville Road,,B,0,0,F,F,str,"LINESTRING (-106.29750 42.86081, -106.29739 42..."
9443370,1066607887,5122,residential,Bryan-Evansville Road,,B,0,0,F,F,str,"LINESTRING (-106.28212 42.86093, -106.28059 42..."
9443371,1067698912,5122,residential,,,B,0,0,F,F,str,"LINESTRING (-105.41849 44.34893, -105.41836 44..."


In [35]:
start = time.time()
for lvl1 in os.listdir(organized_data_folder_path):
    print(lvl1)
    lvl1_path = os.path.join(organized_data_folder_path, lvl1)
    for lvl2 in os.listdir(lvl1_path):
        print("  "+lvl2)
        lvl2_path = os.path.join(lvl1_path, lvl2)
        for lvl3 in os.listdir(lvl2_path):
            print("    "+lvl3)
            lvl3_path = os.path.join(lvl2_path, lvl3)
            df_point = pd.DataFrame()
            df_polygon = pd.DataFrame()
            df_folder_path = os.path.join(combined_data_folder_path, lvl1, lvl2, lvl3)
            create_folder(df_folder_path)
            df_point_path = os.path.join(df_folder_path, lvl3+"_point.shp")
            df_line_path = os.path.join(df_folder_path, lvl3+"_line.shp")
            df_polygon_path = os.path.join(df_folder_path, lvl3+"_polygon.shp")
            
            if not os.path.exists(df_point_path):
                for target_file in glob(os.path.join(lvl3_path, "*point.shp")):
                    shpfile_df = gpd.read_file(target_file)
                    df_point = df_point.append(shpfile_df)
                if df_point.shape[0] >0:
                    df_point['geo_type'] = [type(x) for x in df_point.geometry]
                    df_line = df_point[df_point['geo_type'] == shapely.geometry.linestring.LineString]
                    if df_line.shape[0] >0:
                        df_point = df_point[df_point['geo_type'] == shapely.geometry.point.Point]
                        df_line.reset_index(inplace=True, drop=True)
                        df_point.reset_index(inplace=True, drop=True)
                        df_point = df_point.drop('geo_type', 1)
                        df_line = df_line.drop('geo_type', 1)
                        df_line.to_file(df_line_path)
                        if len(df_point) > 0:
                            df_point.to_file(df_point_path)
                        
                    else:
                        df_point = df_point.drop('geo_type', 1)
                        df_point.reset_index(inplace=True, drop=True)
                        df_point.to_file(df_point_path)

            
            if not os.path.exists(df_polygon_path):
                for target_file in glob(os.path.join(lvl3_path, "*polygon.shp")):
                    shpfile_df = gpd.read_file(target_file)
                    df_polygon = df_polygon.append(shpfile_df)
                if df_polygon.shape[0] >0:
                    df_polygon.reset_index(inplace=True, drop=True)
                    df_polygon.to_file(df_polygon_path)
                
end = time.time()
print(f"Runtime of the program is {end - start}")

busines
  busines
    company
    convention_center
    factory
    industrial
    office
commercial
  food
    bakery
    beverage
    cafe
    deli
    dining_hall
    fast_food
    food
    restaurant
  leisure
    aquarium
    bar
    casino
    cinema
    museum
    theatre
    theme_park
    zoo
  lifestyle
    beauty_shop
    car_wash
    hotel
    laundry
    recycling
    self_storage
    stadium
    travel_agent
    veterinary
  other
    commercial
  shopping
    kiosk
    mall
    shop
    supermarket
recreational
  indoor
    sports_centre
    swimming_pool
  outdoor
    attraction
    bicycle_rental
    camp_site
    country_club
    dog_park
    farm
    golf_course
    graveyard
    hunting_stand
    ice_rink
    nature
    park
    picnic_site
    pitch
    playground
    shelter
    tennis_court
    tourism
    track
    viewpoint
    water
residential
  building
    building
  residential
    apartment
    dormitory
    home
    house
    residential




service
  education
    childcare
    college




    education
    kindergarten




    library




    preschool
    school




    university




  finance
    atm




    bank




    insurance
  health
    clinic




    dentist




    doctor




    health
    health_center
    hospital




    medical
    mental_health_clinic
    nursing_home




    pharmacy




  other
    community_centre




    fire_station




    government




    police




    post_office




    prison




    public_building




    telephone




  religion
    religion




  transportation
    airport




    bus_station




    car_rental




    ferry_terminal




    fuel




    parking




    parking_bicycle




    taxi




    train_station




    transportation
Runtime of the program is 11383.987137794495


In [None]:
map_dict['map']['service']

### 6. check point and polygon shpfile relationship

In [None]:
# start = time.time()
# for lvl1 in os.listdir(organized_data_folder_path):
#     print(lvl1)
#     lvl1_path = os.path.join(organized_data_folder_path, lvl1)
#     for lvl2 in os.listdir(lvl1_path):
#         print("  "+lvl2)
#         lvl2_path = os.path.join(lvl1_path, lvl2)
#         for lvl3 in os.listdir(lvl2_path):
#             print("    "+lvl3)
#             lvl3_path = os.path.join(lvl2_path, lvl3)
#             df_point = pd.DataFrame()
#             df_polygon = pd.DataFrame()
#             df_folder_path = os.path.join(combined_data_folder_path, lvl1, lvl2, lvl3)

#             df_point_path = os.path.join(df_folder_path, lvl3+"_point.shp")
#             df_line_path = os.path.join(df_folder_path, lvl3+"_line.shp")
#             df_polygon_path = os.path.join(df_folder_path, lvl3+"_polygon.shp")
            
#             if not os.path.exists(df_point_path):
#                 for target_file in glob(os.path.join(lvl3_path, "*point.shp")):
#                     shpfile_df = gpd.read_file(target_file)
#                     df_point = df_point.append(shpfile_df)
                    
#             if not os.path.exists(df_polygon_path):
#                 for target_file in glob(os.path.join(lvl3_path, "*polygon.shp")):
#                     shpfile_df = gpd.read_file(target_file)
#                     df_polygon = df_polygon.append(shpfile_df)
                    
#             break
#         break
#     break
# end = time.time()
# print(f"Runtime of the program is {end - start}")

In [None]:
amenity = "university"

In [None]:
point_file_path = r"D:\data\Geofabrik\organized_landmarks_combined\service\education\university\university_point.shp"
df_point = gpd.read_file(point_file_path)
polygon_file_path = r"D:\data\Geofabrik\organized_landmarks_combined\service\education\university\university_polygon.shp"
df_polygon = gpd.read_file(polygon_file_path)

In [None]:
df_point

In [None]:
df_polygon

In [None]:
point_set = set(df_point.osm_id.unique())

In [None]:
polygon_set = set(df_polygon.osm_id.unique())

In [None]:
point_set.intersection(polygon_set)

In [None]:
df_point[df_point['name'] == 'Laramie County Community College']