Urban Definition
--------------
Them:
    - Use the closest station that has data from 1970-2013 to the urban center and select that one urban
    - For the Rural selection:
       - Between 50km - 250km away
       - Be in a population of < 10k
       - lie in a dim, dark, or unlight nighttime lights area
Us:
    - Use the closest station that has data from 2004-2013 to the urban center and select that one urban based
    - Add 250km buffer around the selected station and select the closest station that meets the following criterea:
        - Must be greater than 50km
        - Landscan population of less than a value of 193 as proxy for the census data and nighttime lights
Suggest:
    - Using elevation

Timeframe and data 
----------------
2004-2013 
Prism, Dayment, Station Data, Modis

Function
----------------
(Urban & tMin) - (Rural & tMin)

(Urban & tAvg) - (Rural & tAvg)


Stats
-----------------
 - June, July, August 2004-2013
 - Average summer (June to Aug daily temp)

In [1]:
# Find the closest station to downtown

from sqlalchemy import create_engine
from shapely import wkb
import requests
POSTGRESURI = 'postgresql://urbis:urbis@ontoserv:5434/urbisdata01'
engine = create_engine(POSTGRESURI)

SELECTPLACES = """
SELECT 
(array_agg(earthenv.placeid ORDER BY usgscities."pop_2010" DESC))[1] AS placeid,
(array_agg(usgscities.name ORDER BY usgscities."pop_2010" DESC))[1] AS usgsplacename,
(array_agg(ST_AsEWKB(ST_Transform(usgscities.geom, 4326)) ORDER BY usgscities."pop_2010" DESC))[1] AS usgsplacegeomwgs84str,
(array_agg(ST_AsEWKB(usgscities.geom) ORDER BY usgscities."pop_2010" DESC))[1] AS usgsplacegeomstr,
(array_agg(usgscities."pop_2010"  ORDER BY usgscities."pop_2010" DESC))[1] AS usgspopulation,
(array_agg(usgscities.countyfips  ORDER BY usgscities."pop_2010" DESC))[1] AS countryfips,
(array_agg(usgscities."state_fips"  ORDER BY usgscities."pop_2010" DESC))[1] AS statefips
FROM urbanclusters.usgscities as usgscities, 
urbanclusters.earthenv_urbannamed as earthenv
WHERE ST_Intersects(usgscities.geom, earthenv.geom) 
GROUP BY earthenv.placeid
ORDER BY usgspopulation DESC
LIMIT 100 """

placeresult = engine.execute(SELECTPLACES)

sampleplaces = {}

for row in placeresult:
    rowdict = dict(row)
    rowdict['usgsplacegeom'] = wkb.loads(str(rowdict["usgsplacegeomstr"]))
    rowdict['usgsplacegeomwgs84'] = wkb.loads(str(rowdict["usgsplacegeomwgs84str"]))
    sampleplaces[rowdict['placeid']] = rowdict
    


earthenvtable = 'urbanclusters.earthenv_urbannamed'

newsamples = {}

for placeid in sampleplaces.keys():

    GETGEOM = """
        SELECT ST_AsEWKB(geom), ST_AsEWKB(ST_Transform(geom, 4326)) as wgs84geom,
        ST_AsEWKB(ST_Transform(ST_Difference(
        ST_Buffer(geom, sqrt(St_Area(geom)/pi())*2)
        , geom), 4326)) AS ruralgeomwgs84,
        ST_AsEWKB(ST_Difference(
        ST_Buffer(geom, sqrt(St_Area(geom)/pi())*2)
        , geom)) AS ruralgeom
        FROM {0}
        WHERE placeid={1}
        """.format(earthenvtable, placeid)
    r = engine.execute(GETGEOM)
    firstitem = r.first()
    if firstitem:
        newsamples[placeid] = sampleplaces[placeid]
        
        newsamples[placeid]["earthenv"] = {
            'geom': wkb.loads(str(firstitem[0])),
            'wgs84': wkb.loads(str(firstitem[1])),
            'ruralgeom': wkb.loads(str(firstitem[3])),
            'ruralgeomwgs84': wkb.loads(str(firstitem[2])),
        }
sampleplaces = newsamples
print sampleplaces.values()[0]['usgsplacegeomwgs84']



MULTIPOINT (-97.33754479999999 37.69223609908695)


In [17]:
from shapely import wkb
from pyspatial.vector import from_series
import pandas as pd
import os.path as op
import json
import numpy as np
from pyspatial.raster import read_catalog, read_raster
import traceback
from dateutil.parser import parse as dateparser
#set up the years that we will use
yearset = {}
for year in range(2004,2014):
    starttime = dateparser("{0}-06-01".format(year))
    endtime = dateparser("{0}-08-31".format(year))
    numberdays = end - start
    yearset[year] = pd.date_range(starttime, periods=numberdays.days+1, freq='D')
  

In [32]:
#set up the vector layer df that we will use
urbanvl = from_series(pd.Series([x['earthenv']['geom'] for x in sampleplaces.values()]))

buffervl = from_series(pd.Series([x['earthenv']['ruralgeom'] for x in sampleplaces.values()]))


for s in sampleplaces.values():
    s['daymetresults'] = {
        'buffertmax' : {},
        'buffertmin' : {},
        'buffertavg' : {},
        'urbantmax': {},
        'urbantmin': {},
        'urbantavg': {}
    }


In [42]:
DAYMETDIR =  '/Volumes/UrbisBackup/rasterstorage/daymet'
PRISMDIR = '/Volumes/UrbisBackup/rasterstorage/prism'

for year, daterng in yearset.iteritems():
    for daymetdate in daterng:
        if not np.isnan(sampleplaces.values()[0]['daymetresults']['buffertavg'].get(daymetdate, True)):
            print type(sampleplaces.values()[0]['daymetresults']['buffertavg'].get(daymetdate))
            
            continue
        s['daymetresults']['buffertavg'][daymetdate]
        print "doing {0}".format(daymetdate)
        daymettimetuple = daymetdate.timetuple()
        year = daymettimetuple.tm_year
        yday = daymettimetuple.tm_yday

        tmindaymetfile = "daymet_v3_{0}_{1}_{2}.tif".format('tmin', year,yday)
        tmaxdaymetfile = "daymet_v3_{0}_{1}_{2}.tif".format('tmax', year,yday)
        try:
            tminraster = read_raster(op.join(DAYMETDIR, 'tmin', tmindaymetfile))
            tmaxraster = read_raster(op.join(DAYMETDIR, 'tmax', tmaxdaymetfile))
        except Exception,e:
            traceback.print_exc()
            print "{0} Does not exist in the file system".format(daymetfilename)
            for s in sampleplaces.values():
                s['daymetresults']['buffertmin'][daymetdate] = np.NaN
                s['daymetresults']['buffertmax'][daymetdate] = np.NaN
                s['daymetresults']['buffertavg'][daymetdate] = np.NaN
                s['daymetresults']['urbantmin'][daymetdate] = np.NaN
                s['daymetresults']['urbantmax'][daymetdate] = np.NaN
                s['daymetresults']['urbantavg'][daymetdate] = np.NaN
            continue

        resultset = {
            ('urban', 'tmin',): tminraster.query(urbanvl),
            ('urban', 'tmax',): tmaxraster.query(urbanvl),
            ('buffer', 'tmin',): tminraster.query(buffervl),
            ('buffer', 'tmax',): tmaxraster.query(buffervl)
        }

        for resultkey, result in resultset.iteritems():
            for r,s in zip(result, sampleplaces.values()):
                indexc = np.argwhere(r.values > -150)
                newv =  np.take(r.values, indexc)
                neww =  np.take(r.weights, indexc)
                s['daymetresults']["".join(resultkey)][daymetdate] = float((newv * neww).sum() / neww.sum())
        for s in sampleplaces.values():
            s['daymetresults']['buffertavg'][daymetdate] = np.mean([s['daymetresults']['buffertmin'][daymetdate], s['daymetresults']['buffertmax'][daymetdate]])   
            s['daymetresults']['urbantavg'][daymetdate] = np.mean([s['daymetresults']['urbantmin'][daymetdate], s['daymetresults']['urbantmax'][daymetdate]])   



<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'numpy.float64'>
<type 'nump

In [43]:
# print sampleplaces.values()[0]
for k,s in sampleplaces.iteritems():
#     if s.get('usgsplacegeom'):
#         del s['usgsplacegeom']
#     if s.get('usgsplacegeomwgs84'):
#         del s['usgsplacegeomwgs84']
#     if s.get('earthenv'):
#         del s['earthenv']
    s['usgsplacegeomstr'] = str(s['usgsplacegeomstr'])
    s['usgsplacegeomwgs84str'] = str(s['usgsplacegeomwgs84str'])
    
import pickle
with open('climatecentral/climatecentral_daymet.pickle', 'wb') as fout:
    pickle.dump(sampleplaces, fout)


In [44]:
import numpy as np
import pandas as pd

outputdict = []

for s in sampleplaces.values():
    dictholder = []
    for year, daterng in yearset.iteritems():
        for daymetdate in daterng:
            tempholder = []
            for rmeasure in ['urbantmin', 'urbantmax', 'urbantavg', 'buffertmin', 'buffertmax', 'buffertavg']:
                tempholder.append(s['daymetresults'][rmeasure][daymetdate])
            dictholder.append([year, daymetdate, rmeasure] + tempholder)
                
    df = pd.DataFrame(dictholder, columns=['year', 'date', 'rmeasure', 'urbantmin', \
                                           'urbantmax', 'urbantavg', 'buffertmin', 'buffertmax', 'buffertavg'])
    # group by year with mean
    urbanresult = pd.DataFrame(df.groupby(['year'])['urbantmin', 'urbantmax', \
                                                    'urbantavg', 'buffertmin', \
                                                    'buffertmax', 'buffertavg'].mean()).reset_index()
    finalresult = urbanresult.mean().reset_index()

    outputdict.append({
            'placeid': s['placeid'],
            'placename': s['usgsplacename'],
            'uhitmin':     float(finalresult.loc[finalresult['index'] == 'urbantmin'][0]) \
                            - float(finalresult.loc[finalresult['index'] == 'buffertmin'][0]),
            'uhitavg': float(finalresult.loc[finalresult['index'] == 'urbantavg'][0]) \
                            - float(finalresult.loc[finalresult['index'] == 'buffertavg'][0]),
            'uhitmax': float(finalresult.loc[finalresult['index'] == 'urbantmax'][0]) \
                            - float(finalresult.loc[finalresult['index'] == 'buffertmax'][0])
        })
    
    
 

In [45]:
import csv

with open('climatecentral/climatecentraldaymet.csv', 'w') as fin:
    writer = csv.DictWriter(fin, fieldnames=['placeid', 'placename', 'uhitmin', 'uhitmax', 'uhitavg'])
    writer.writeheader()
    for row in outputdict:
        writer.writerow(row)