In [231]:
import os
import glob
import numpy as np
import pandas as pd
from shapely.geometry import Point
import matplotlib.pyplot as plt
import geocoder
import geopandas as gp

In [119]:
def geoencode(address):
    '''Get lat/long from Google API

    Parameters:
    ----------
    address: string,
        The address to lookup (ex. 30-27 Greenpoint Avenue LIC, NY)
    '''
    landfill = " ".join([address['Facility Address'], address['Town'], ', ', address['State']])

    g = geocoder.google(landfill)
    
    if g.confidence >= 7:
        return pd.Series({'lat':g.lat, 'lng':g.lng})
    else:
        return pd.Series({'lat': None, 'lng': None})

In [120]:
df = pd.read_csv('data/Connecticut_Active_Landfills.csv')
df['State'] = 'CT'
data = pd.concat([df,df.apply(geoencode, axis=1)], axis=1)

In [121]:
#data.to_csv('data/Connecticut_Active_Landfills.csv', index=False)

---

In [183]:
new_crs = {'init':'epsg:4326'}

fpath = ['construction_demo_debris_NYS/construction_demo_debris.shp',
         'active_msw_landfills_NYS/active_msw_landfills.shp',
         'industrial_commercial_NYS/industrial_commercial.shp',
         'Landfill_Sites_in_New_Jersey/Landfill_Sites_in_New_Jersey.shp', 
         'privately_owned_landfills_NYS/privately_owned_landfills_NYS.shp',
         'Connecticut_Active_Landfills.csv']

In [260]:
def create_geom(coords):
    return Point(coords['lng'], coords['lat'])

def add_lat_lng(fpath):
    fname = os.path.split(fpath)[1]
    
    if fpath[-3:] == 'csv':
        lf = pd.read_csv(fpath)
        lf['geometry'] = lf.apply(create_geom, axis=1)
        lf = gp.GeoDataFrame(lf)
        lf.crs = new_crs
        lf = lf.dropna(subset=['lat','lng'])
        fname = fname[:-3] + 'shp'
    else:
        lf = gp.read_file(fpath)  
        lf = lf.to_crs(new_crs)
        
        if 'lat' not in lf.columns or 'lng' not in lf.columns:
            lf['lng'] = lf['geometry'].map(lambda point: point.x)
            lf['lat'] = lf['geometry'].map(lambda point: point.y)
        
    fout = os.path.join('data/output', fname)
    cols = [n.lower() for n in lf.columns]
    lf.columns = cols
    lf.to_file(fout)
    return

In [261]:
# run
[add_lat_lng(os.path.join('data/',i)) for i in fpath]

[None, None, None, None, None, None]

---

In [262]:
data = []
for i in glob.glob('data/output/*.shp'):
    f = gp.read_file(i)
    data.append(f)

In [287]:
# Clean and Standardize
d0 = data[0][['lat','lng','waste_type','name']]
d1 = data[1][['lat','lng','waste type','owner']]
d2 = data[2][['lat','lng','waste_type','company']]
d3 = data[3][['lat','lng','waste_type','company']]
d4 = data[4][['lat','lng','lfname']]
d5 = data[5][['lat','lng','name']]

d1 = d1.rename(columns={'waste type': 'waste_type', 'owner':'name'})

d2 = d2.rename(columns={'company':'name'})
d3 = d3.rename(columns={'company':'name'})

d4 = d4.rename(columns={'lfname':'name'})

d4['waste_type'] = 'unknown'
d5['waste_type'] = 'unknown'

d0['desc'] = 'NY msw'
d1['desc'] = 'CT'
d2['desc'] = 'NY construction'
d3['desc'] = 'NY industrial/commercial'
d4['desc'] = 'NJ'
d5['desc'] = 'NY private'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [291]:
pd.concat([d0,d1,d2,d3,d4,d5], ignore_index=True).to_csv('data/output/landfills_master.csv', index=False)