In [1]:
# Find the closest station to downtown

from sqlalchemy import create_engine
from shapely import wkb
import os
import requests
POSTGRESURI = 'postgresql://urbis:urbis@ontoserv:5434/urbisdata01'
engine = create_engine(POSTGRESURI)

import pickle
sampleplaces = {}
for fname in os.listdir('Dabbage/pickles'):
    with open(os.path.join('Dabbage', 'pickles', fname), 'rb') as fin:
        tempobj = pickle.load(fin)
        sampleplaces[tempobj['placeid']] = tempobj



In [2]:
print sampleplaces.values()[0]

{u'usgsplacename': u'Fort Wayne', 'earthenv': {'wgs84': <shapely.geometry.multipolygon.MultiPolygon object at 0x118d7db50>, 'ruralgeomwgs84': <shapely.geometry.multipolygon.MultiPolygon object at 0x118d7db90>, 'geom': <shapely.geometry.multipolygon.MultiPolygon object at 0x118d7d910>, 'ruralgeom': <shapely.geometry.multipolygon.MultiPolygon object at 0x118d7dc10>}, u'statefips': u'18', u'usgsplacegeomstr': '\x01\x04\x00\x00 \x11\x0f\x00\x00\x01\x00\x00\x00\x01\x01\x00\x00\x00x$\t\xaa2\x13b\xc1\x18\x06w7\xb21SA', u'placeid': 7890, 'usgsplacegeom': <shapely.geometry.multipoint.MultiPoint object at 0x118d7dc50>, u'usgspopulation': 253691.0, u'gnisid': 434689.0, u'usgsplacegeomwgs84str': '\x01\x04\x00\x00 \xe6\x10\x00\x00\x01\x00\x00\x00\x01\x01\x00\x00\x00kQ\xc1<?HU\xc0\xae\x1a\x97\xa2\xb7\x90D@', 'usgsplacegeomwgs84': <shapely.geometry.multipoint.MultiPoint object at 0x118d7dd10>, u'countryfips': u'003', 'censusurb': {'processbuffer': <shapely.geometry.multipolygon.MultiPolygon object at

In [3]:
def log_progress(sequence, every=None, size=None):
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = size / 200     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{index} / ?'.format(index=index)
                else:
                    progress.value = index
                    label.value = u'{index} / {size}'.format(
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = str(index or '?')

In [4]:
from functools import partial
import pyproj
from shapely.ops import transform

#project to wgs for availabilty
projector = partial(
    pyproj.transform,
    pyproj.Proj(init='epsg:3857'),
    pyproj.Proj(init='epsg:4326'))

for s in log_progress(sampleplaces.values(), every=1):
    if not s['censusurb'].get('processbuffer', False):
        print "skipping", s['usgsplacename']
        continue
    else:
        s['censusurb']['processbufferwgs'] = transform(projector, s['censusurb']['processbuffer'])

skipping San Antonio
skipping Omaha
skipping Lincoln


In [7]:
# get urban and rural stations
from shapely.geometry import Point
import acis
import json

for s in log_progress(sampleplaces.values(), every=1):
    if s.get('allstations', False):
        continue
    if not s['censusurb'].get('processbufferwgs', False):
        print "skipping", s[u'usgsplacename'], "with no processbufferwgs"
        continue

    bbox = ",".join([str(x) for x in s['censusurb']['processbufferwgs'].bounds])

    closeststation = None
    res = requests.get("http://data.rcc-acis.org/StnMeta?bbox={0}&sdate=2010-06-01&edate=2010-09-01&output=json".format(bbox))
    stationresults = json.loads(res.text)
    urbanstations = []
    ruralstations = []
    for station in stationresults['meta']:
        if len(station['sids']) == 0:
            continue
        stationpoint = Point(station['ll'])
        if stationpoint.within(s['censusurb']['wgs84']):
            urbanstations.append(station)
        elif stationpoint.within(s['censusurb']['processbufferwgs']):
            ruralstations.append(station)
    s['allstations'] = {
        'urbanstations': urbanstations,
        'ruralstations': ruralstations
    }




skipping San Antonio with no processbufferwgs
skipping Omaha with no processbufferwgs
skipping Lincoln with no processbufferwgs


In [8]:
#get stations data
import pandas as pd
import acis
import numpy as np
for k,s in log_progress(sampleplaces.iteritems(), every=1):
    if not s['censusurb'].get('processbufferwgs', False):
        print "skipping", s[u'usgsplacename'], "with no processbufferwgs"
        continue
    toprocess = ('urbanstations', 'ruralstations',)
    for processkey in toprocess:
        success = 0
        for station in s['allstations'][processkey]:
            try:
                request = acis.StnDataRequest()  # change Request type
                request.location(sid=station['sids'][0])  # change keyword and SID list
                request.dates("2010-06-01", "2010-09-01")  # sdate and edate
                request.add_element("maxt")
                request.add_element("avgt")
                request.add_element("mint")
                request.metadata("name")
                result = acis.StnDataResult(request.submit())  # change Result type
                
                df = pd.DataFrame([x for x in result], columns=['uid', 'date', 'tmax', 'tavg','tmin'])
                df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
                df = df.set_index(pd.DatetimeIndex(df['date']))
                tempresults = []
                for year in range(2010, 2011):
                    subset = df[(df['date'] > '{0}-5-31'.format(year)) & (df['date'] <= '{0}-8-31'.format(year))]
                    fails = 0
                    for measures in ('tmin', 'tmax', 'tavg',):
                        try:
                            tempresults.append([measures, str(year), \
                                                subset[measures].astype(str).convert_objects(convert_numeric=True).mean()])
                        except Exception,e:
                            tempresults.append([measures, str(year), np.NaN])

                station['results'] = tempresults
                if processkey == 'urbanstations':
                    if s['allstations'].get('urbanuse', False):
                        s['allstations']['urbanuse'].append(station)
                    else:
                        s['allstations']['urbanuse'] = [station]
                else:
                    if s['allstations'].get('ruraluse', False):
                        s['allstations']['ruraluse'].append(station)
                    else:
                        s['allstations']['ruraluse'] = [station]
            except Exception, e:
                if str(e).find("no data available") == -1:
                    print e
                continue                



skipping San Antonio with no processbufferwgs
skipping Omaha with no processbufferwgs
timed out
skipping Lincoln with no processbufferwgs


In [9]:
# print sampleplaces.values()[0]
for k,s in sampleplaces.iteritems():
#     if s.get('usgsplacegeom'):
#         del s['usgsplacegeom']
#     if s.get('usgsplacegeomwgs84'):
#         del s['usgsplacegeomwgs84']
#     if s.get('earthenv'):
#         del s['earthenv']
    s['usgsplacegeomstr'] = str(s['usgsplacegeomstr'])
    s['usgsplacegeomwgs84str'] = str(s['usgsplacegeomwgs84str'])
    
import pickle
with open('Dabbage/dabbage_allstations.pickle', 'wb') as fout:
    pickle.dump(sampleplaces, fout)


In [11]:
import numpy as np
import pandas as pd

outputdict = []

for s in sampleplaces.values():
    if not s.get('allstations', False):
        print "skipping", s['usgsplacename']
        continue
    urbanuse = s['allstations'].get('urbanuse', None)
    if not urbanuse:
        print "skipping", s['usgsplacename']
        continue
    urbandfs = []
    for u in urbanuse:
        tdf = pd.DataFrame(u['results'], columns=['measure', 'year', 'value'])
        tdf.set_index(['measure', 'year'])
        urbandfs.append(tdf)
        
    udf_concat = pd.concat(urbandfs)
    urbanresult = pd.DataFrame(udf_concat.groupby(['measure','year'])['value'].mean()).reset_index()
    
    ruraluse = s['allstations'].get('ruraluse', [])
    
    if len(ruraluse) == 0:
        outputdict.append({
            'placename': s['usgsplacename'],
            'uhitmin': "no rural",
            'uhitavg': "no rural",
            'uhitmax': "no rural"
        })
        continue
    
    ruraldfs = []
    for r in ruraluse:
        tdf = pd.DataFrame(r['results'], columns=['measure', 'year', 'value'])
        tdf.set_index(['measure', 'year'])
        ruraldfs.append(tdf)
    df_concat = pd.concat(ruraldfs)
    ruralresult = pd.DataFrame(df_concat.groupby(['measure','year'])['value'].mean()).reset_index()

    
    mergeddf =  urbanresult.merge(ruralresult,on=['measure','year'],how='left')
#     print mergeddf[['value_x', 'value_y']].sub(axis=1)
    mergeddf['uhi'] = mergeddf['value_x'] - mergeddf['value_y']
    outputdict.append({
            'placeid': s['placeid'],
            'placename': s['usgsplacename'],
            'uhitmin': mergeddf[mergeddf['measure']=='tmin']['uhi'].mean(),
            'uhitavg': mergeddf[mergeddf['measure']=='tavg']['uhi'].mean(),
            'uhitmax': mergeddf[mergeddf['measure']=='tmax']['uhi'].mean()
        })
#     print mergeddf[['value_x']].sub(mergeddf['value_y'], axis=0)
#     for year in range(2004, 2014):
#         for measure in ['tmin', 'tmax', 'tavg']

skipping San Antonio
skipping Omaha
skipping Lincoln


In [12]:
import csv

with open('Dabbage/allstations.csv', 'w') as fin:
    writer = csv.DictWriter(fin, fieldnames=['placeid', 'placename', 'uhitmin', 'uhitmax', 'uhitavg'])
    writer.writeheader()
    for row in outputdict:
        writer.writerow(row)