Urban Definition
--------------

Select the census urban areas definitions with a spatial join of the Metro stats areas

Them:
  - Used Metro Statistical Area of 50 largest cities
  - Buffered by 50km, removed +/- 50m elevation area, removed neighboring urban areas
  
Us:
  - Use metro stats area of urban extents
  - Buffer by 50k, remove neighboring extents, remove elevation areas +/- 50m

Timeframe and data 
----------------
2010
Prism, Dayment, Station Data, Modis

Function
----------------
(Urban & spatial average * tMin) - (Urban & spatial average * tMin)


Stats
-----------------
 - Avg of daily values for the summer months (June, July, August) in 2010
 ~~- Average across all of 2010~~
 ~~- 2006-2010 average~~

In [1]:
# Find the closest station to downtown

from sqlalchemy import create_engine
from shapely import wkb
import os
import requests
POSTGRESURI = 'postgresql://urbis:urbis@ontoserv:5434/urbisdata01'
engine = create_engine(POSTGRESURI)

import pickle
sampleplaces = {}
for fname in os.listdir('Dabbage/pickles'):
    with open(os.path.join('Dabbage', 'pickles', fname), 'rb') as fin:
        tempobj = pickle.load(fin)
        sampleplaces[tempobj['placeid']] = tempobj


In [2]:
from shapely import wkb
from pyspatial.vector import from_series
import pandas as pd
import os.path as op
import json
import numpy as np
from pyspatial.raster import read_catalog, read_raster
import traceback
from dateutil.parser import parse as dateparser
#set up the years that we will use
yearset = {}
for year in range(2010,2011):
    starttime = dateparser("{0}-06-01".format(year))
    endtime = dateparser("{0}-08-31".format(year))
    numberdays = endtime - starttime
    yearset[year] = pd.date_range(starttime, periods=numberdays.days+1, freq='D')
  

In [5]:

#set up the vector layer df that we will use
geomsetkeys = [x['placeid'] for x in sampleplaces.values() if x['censusurb'].get('processbuffer')]
urbanvl = from_series(pd.Series([x['censusurb']['geom'] for x in sampleplaces.values() if x['censusurb'].get('processbuffer')]))

buffervl = from_series(pd.Series([x['censusurb']['processbuffer'] for x in sampleplaces.values() if x['censusurb'].get('processbuffer')]))


for s in sampleplaces.values():
    s['prismresults'] = {
        'buffertmax' : {},
        'buffertmin' : {},
        'buffertavg' : {},
        'urbantmax': {},
        'urbantmin': {},
        'urbantavg': {}
    }



In [6]:
from ipywidgets import FloatProgress
from IPython.display import display
f = FloatProgress(min=0, max=len(yearset.values()[0]))
display(f)


PRISMDIR = '/Volumes/UrbisBackup/rasterstorage/prism'

for year, daterng in yearset.iteritems():
    for daymetdate in daterng:
        f.value += 1
#         print "doing {0}".format(daymetdate)
        daymettimetuple = daymetdate.timetuple()
        day = daymetdate.day 
        month = daymetdate.month
        year = daymettimetuple.tm_year
        yday = daymettimetuple.tm_yday

        tminprismfilename = "(PRISM_{measure}_stable_4kmD1_{year}{month}{day}_bil).tif"\
                                         .format(measure = 'tmin',
                                                year = year,
                                                month = str(month).zfill(2),
                                                day = str(day).zfill(2))
            
        tmaxprismfilename = "(PRISM_{measure}_stable_4kmD1_{year}{month}{day}_bil).tif"\
                                 .format(measure = 'tmax',
                                        year = year,
                                        month = str(month).zfill(2),
                                        day = str(day).zfill(2))


        try:
            tminraster = read_raster(op.join(PRISMDIR, tminprismfilename))
            tmaxraster = read_raster(op.join(PRISMDIR, tmaxprismfilename))
        except Exception,e:
            traceback.print_exc()
            print "{0} Does not exist in the file system".format(daymetfilename)
            for s in sampleplaces.values():
                s['prismresults']['buffertmin'][daymetdate] = np.NaN
                s['prismresults']['buffertmax'][daymetdate] = np.NaN
                s['prismresults']['buffertavg'][daymetdate] = np.NaN
                s['prismresults']['urbantmin'][daymetdate] = np.NaN
                s['prismresults']['urbantmax'][daymetdate] = np.NaN
                s['prismresults']['urbantavg'][daymetdate] = np.NaN
            continue

        resultset = {
            ('urban', 'tmin',): tminraster.query(urbanvl),
            ('urban', 'tmax',): tmaxraster.query(urbanvl),
            ('buffer', 'tmin',): tminraster.query(buffervl),
            ('buffer', 'tmax',): tmaxraster.query(buffervl)
        }

        for resultkey, result in resultset.iteritems():
            for r,skey in zip(result, geomsetkeys):
                s = sampleplaces[skey]
                try:
                    indexc = np.argwhere(r.values > -150)
                    newv =  np.take(r.values, indexc)
                    neww =  np.take(r.weights, indexc)
                    s['prismresults']["".join(resultkey)][daymetdate] = float((newv * neww).sum() / neww.sum())
                except Exception,e:
#                     print e
                    s['prismresults']["".join(resultkey)][daymetdate] = np.NaN
        for skey in geomsetkeys:
            s = sampleplaces[skey]
            try:
                s['prismresults']['buffertavg'][daymetdate] = np.mean([s['prismresults']['buffertmin'][daymetdate], s['prismresults']['buffertmax'][daymetdate]])   
                s['prismresults']['urbantavg'][daymetdate] = np.mean([s['prismresults']['urbantmin'][daymetdate], s['prismresults']['urbantmax'][daymetdate]])   
            except Exception,e:
#                 print e
                s['prismresults']['buffertavg'][daymetdate] = np.NaN
                s['prismresults']['urbantavg'][daymetdate] = np.NaN


In [17]:
# print sampleplaces.values()[0]
for k,s in sampleplaces.iteritems():
#     if s.get('usgsplacegeom'):
#         del s['usgsplacegeom']
#     if s.get('usgsplacegeomwgs84'):
#         del s['usgsplacegeomwgs84']
#     if s.get('earthenv'):
#         del s['earthenv']
    s['usgsplacegeomstr'] = str(s['usgsplacegeomstr'])
    s['usgsplacegeomwgs84str'] = str(s['usgsplacegeomwgs84str'])
    
import pickle
with open('dabbage/dabbageprism.pickle', 'wb') as fout:
    pickle.dump(sampleplaces, fout)


In [16]:
import numpy as np
import pandas as pd

outputdict = []

for s in sampleplaces.values():
    try:
        dictholder = []
        for year, daterng in yearset.iteritems():
            for daymetdate in daterng:
                tempholder = []
                for rmeasure in ['urbantmin', 'urbantmax', 'urbantavg', 'buffertmin', 'buffertmax', 'buffertavg']:
                    tempholder.append(s['prismresults'][rmeasure][daymetdate])
                dictholder.append([year, daymetdate, rmeasure] + tempholder)

        df = pd.DataFrame(dictholder, columns=['year', 'date', 'rmeasure', 'urbantmin', \
                                               'urbantmax', 'urbantavg', 'buffertmin', 'buffertmax', 'buffertavg'])
        # group by year with mean
        urbanresult = pd.DataFrame(df.groupby(['year'])['urbantmin', 'urbantmax', \
                                                        'urbantavg', 'buffertmin', \
                                                        'buffertmax', 'buffertavg'].mean()).reset_index()
        finalresult = urbanresult.mean().reset_index()

        outputdict.append({
                'placeid': s['placeid'],
                'placename': s['usgsplacename'],
                'uhitmin':     float(finalresult.loc[finalresult['index'] == 'urbantmin'][0]) \
                                - float(finalresult.loc[finalresult['index'] == 'buffertmin'][0]),
                'uhitavg': float(finalresult.loc[finalresult['index'] == 'urbantavg'][0]) \
                                - float(finalresult.loc[finalresult['index'] == 'buffertavg'][0]),
                'uhitmax': float(finalresult.loc[finalresult['index'] == 'urbantmax'][0]) \
                                - float(finalresult.loc[finalresult['index'] == 'buffertmax'][0])
            })
    except Exception,e:
        print "ERROR",s['usgsplacename'], e
    
    
 

San Antonio Timestamp('2010-06-01 00:00:00', offset='D')
Omaha Timestamp('2010-06-01 00:00:00', offset='D')
Lincoln Timestamp('2010-06-01 00:00:00', offset='D')


In [18]:
import csv

with open('Dabbage/dabbage_prism.csv', 'w') as fin:
    writer = csv.DictWriter(fin, fieldnames=['placeid', 'placename', 'uhitmin', 'uhitmax', 'uhitavg'])
    writer.writeheader()
    for row in outputdict:
        writer.writerow(row)