## import and install necessary things

In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
import os
import wget
import selenium.webdriver

## download data

all of this comes from `https://www.a2gov.org/services/data/Pages/default.aspx`, and will probably need to be updated in time. For now it should work nicely though

In [4]:
def _datatable_loader(driver):
    
    driver.get('https://www.a2gov.org/services/data/Pages/default.aspx')

    rowsOdd = driver.find_elements_by_class_name('ms-rteTableOddRow-1')
    rowsEven = driver.find_elements_by_class_name('ms-rteTableEvenRow-1')
    rows = []

    for i in range(len(rowsOdd) + len(rowsEven)):
        if i%2:
            rows.append(rowsEven[i//2])
        else:
            rows.append(rowsOdd[i//2])
    
    df_out = []
    for row_raw in rows:
        row = row_raw.find_elements_by_tag_name('a')
        if len(row) > 2:
            if row[0].text == 'Police Department':
                df_out.append([row[2].text, None, row[2].get_attribute('href')])
                df_out.append([row[3].text, None, row[3].get_attribute('href')])
            else:
                if len(row) > 3:
                    df_out.append([row[1].text, row[1].get_attribute('href'), row[3].get_attribute('href')])                    
                else:
                    df_out.append([row[1].text, row[1].get_attribute('href'), row[2].get_attribute('href')])
    df_out = pd.DataFrame(df_out, columns=['name', 'metadata', 'link']).set_index('name')
    df_out['id'] = df_out.index.str.replace('\n', '_').str.replace('/','').str.lower().str.split(' ').str.join('_').values    
    df_out = df_out.reset_index().set_index('id')
    return df_out

def get_datatable(driver, reload=False, path='data/datatable_master.csv'):
    if not os.path.exists(path):
        reload = True
    if reload:
        df_out = _datatable_loader(driver)
        df_out.to_csv(path)
    return pd.read_csv(path).set_index('id')

# _user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
_headers={'User-Agent': 'Mozilla/5.0'}

def download_rows(*rows, table, reload=False, headers=None):
    if headers is None:
        headers = _headers
    outpaths = {}
    for ID in rows:
        if ID not in table.index:
            print('ERROR: requested row \'{}\' not contained in the datatable'.format(ID))
            continue
        row = table.loc[ID]
        link = row.link
        outpath = '{}.{}'.format(row.name, row.link.split('.')[-1])
        fullpath = 'data/{}/{}'.format(ID, outpath)
        try:
            if (not os.path.exists(fullpath)) or reload:
                print('loading ' + ID, outpath)
                os.system('mkdir data/{}'.format(ID))
                os.system('wget {} -O data/{}/{}'.format(link, ID, outpath))
 
            outpaths[ID] = 'data/{}/{}'.format(ID, outpath)
        
        except Exception as e:
            print('ERROR: failed to load ID \'{}\' at link <{}>'.format(ID, link))
            print(e)
            outpaths[ID] = None
    return outpaths

In [5]:


options = selenium.webdriver.chrome.options.Options()
options.add_argument('--headless')
# window_size = '400,300'
# options.add_argument('--window-size={}'.format(window_size))
driver = selenium.webdriver.Chrome('drivers/chromedriver', options=options)



table = get_datatable(driver,)
table

Unnamed: 0_level_0,name,metadata,link
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aata_bus_stops,AATA Bus Stops,http://data.a2gov.org/feeds/GIS/AATA%20BusStop...,http://data.a2gov.org/feeds/GIS/AATA%20BusStop...
building_footprints,Building Footprints,http://data.a2gov.org/feeds/GIS/AA%20Building%...,http://data.a2gov.org/feeds/GIS/AA%20Building%...
catch_basins,Catch Basins,http://data.a2gov.org/feeds/GIS/Catchbasins/A2...,http://data.a2gov.org/feeds/GIS/Catchbasins/A2...
city_boundary,City Boundary,http://data.a2gov.org/feeds/GIS/AA%20City%20Bo...,http://data.a2gov.org/feeds/GIS/AA%20City%20Bo...
city_owned_land,City Owned Land,http://data.a2gov.org/feeds/GIS/AA%20City%20Ow...,http://data.a2gov.org/feeds/GIS/AA%20City%20Ow...
dda_boundary,DDA Boundary,http://data.a2gov.org/feeds/GIS/AA%20DDA%20Bou...,http://data.a2gov.org/feeds/GIS/AA%20DDA%20Bou...
floodplain,Floodplain,http://data.a2gov.org/feeds/GIS/AA%20Floodplai...,http://data.a2gov.org/feeds/GIS/AA%20Floodplai...
floodway,Floodway,http://data.a2gov.org/feeds/GIS/AA%20Floodway/...,http://data.a2gov.org/feeds/GIS/AA%20Floodway/...
future_landuse,Future LandUse,http://data.a2gov.org/feeds/GIS/AA%20Future%20...,http://data.a2gov.org/feeds/GIS/AA%20Future%20...
greenbelt_boundary,Greenbelt Boundary,http://data.a2gov.org/feeds/GIS/AA%20Greenbelt...,http://data.a2gov.org/feeds/GIS/AA%20Greenbelt...


In [8]:
paths = download_rows(
    'aata_bus_stops', 'building_footprints', 'invoice_aging_report',
    'calls_for_service_past365days', 'construction_permits', 'university', 'landuse', 'future_landuse', 'zoning_districts',
    'group_ab_crime_past365days', 'wards_and_precincts', 'dda_boundary',
    table=table, reload=0
)

In [5]:
def load_data(paths):
    output = {}
    for ID, path in paths.items():
        d,f = os.path.dirname(path), os.path.basename(path)
        if f.endswith('.zip'):
            os.system('unzip {} -d {}'.format(path, d))
#             if not os.path.exists(path.replace('.kmz', '.kml')):
#                 os.system('unzip {} -d {}'.format(path, d))
#                 os.system('mv {}/*.png {}/{}.png'.format(d, d, ID))
#                 os.system('mv {}/*.xsl {}/{}.xsl'.format(d, d, ID))
#                 os.system('mv {}/doc.kml {}/{}.kml'.format(d, d, ID))
#             output[ID] = gpd.read_file('{}/{}.kml'.format(d, ID), driver='KML')
#         elif f.endswith('.csv'):

    return output

load_data(paths)

{}

In [2]:
import geopandas as gpd
df = gpd.GeoDataFrame.from_file('data/aata_bus_stops/AATABusStops.shp')

In [None]:
import pathlib

gpd.read_file('data/building_footprints/AA_building_footprints.shp')