Urban Definition
--------------
Them:
    - Use the closest station that has data from 1970-2013 to the urban center and select that one urban
    - For the Rural selection:
       - Between 50km - 250km away
       - Be in a population of < 10k
       - lie in a dim, dark, or unlight nighttime lights area
Us:
    - Use the closest station that has data from 2004-2013 to the urban center and select that one urban based
    - Add 250km buffer around the selected station and select the closest station that meets the following criterea:
        - Must be greater than 50km
        - Landscan population of less than a value of 193 as proxy for the census data and nighttime lights
Suggest:
    - Using elevation

Timeframe and data 
----------------
2004-2013 
Prism, Dayment, Station Data, Modis

Function
----------------
(Urban & tMin) - (Rural & tMin)

(Urban & tAvg) - (Rural & tAvg)


Stats
-----------------
 - June, July, August 2004-2013
 - Average summer (June to Aug daily temp)

In [3]:
# Find the closest station to downtown

from sqlalchemy import create_engine
from shapely import wkb
import requests
POSTGRESURI = 'postgresql://urbis:urbis@ontoserv:5434/urbisdata01'
engine = create_engine(POSTGRESURI)

SELECTPLACES = """
SELECT 
(array_agg(earthenv.placeid ORDER BY usgscities."pop_2010" DESC))[1] AS placeid,
(array_agg(usgscities.name ORDER BY usgscities."pop_2010" DESC))[1] AS usgsplacename,
(array_agg(ST_AsEWKB(ST_Transform(usgscities.geom, 4326)) ORDER BY usgscities."pop_2010" DESC))[1] AS usgsplacegeomwgs84str,
(array_agg(ST_AsEWKB(usgscities.geom) ORDER BY usgscities."pop_2010" DESC))[1] AS usgsplacegeomstr,
(array_agg(usgscities."pop_2010"  ORDER BY usgscities."pop_2010" DESC))[1] AS usgspopulation,
(array_agg(usgscities.countyfips  ORDER BY usgscities."pop_2010" DESC))[1] AS countryfips,
(array_agg(usgscities."state_fips"  ORDER BY usgscities."pop_2010" DESC))[1] AS statefips
FROM urbanclusters.usgscities as usgscities, 
urbanclusters.earthenv_urbannamed as earthenv
WHERE ST_Intersects(usgscities.geom, earthenv.geom) 
GROUP BY earthenv.placeid
ORDER BY usgspopulation DESC
LIMIT 100 """

placeresult = engine.execute(SELECTPLACES)

sampleplaces = {}

for row in placeresult:
    rowdict = dict(row)
    rowdict['usgsplacegeom'] = wkb.loads(str(rowdict["usgsplacegeomstr"]))
    rowdict['usgsplacegeomwgs84'] = wkb.loads(str(rowdict["usgsplacegeomwgs84str"]))
    sampleplaces[rowdict['placeid']] = rowdict
    


earthenvtable = 'urbanclusters.earthenv_urbannamed'

newsamples = {}

for placeid in sampleplaces.keys():

    GETGEOM = """
        SELECT ST_AsEWKB(geom), ST_AsEWKB(ST_Transform(geom, 4326)) as wgs84geom,
        ST_AsEWKB(ST_Transform(ST_Difference(
        ST_Buffer(geom, sqrt(St_Area(geom)/pi())*2)
        , geom), 4326)) AS ruralgeomwgs84,
        ST_AsEWKB(ST_Difference(
        ST_Buffer(geom, sqrt(St_Area(geom)/pi())*2)
        , geom)) AS ruralgeom
        FROM {0}
        WHERE placeid={1}
        """.format(earthenvtable, placeid)
    r = engine.execute(GETGEOM)
    firstitem = r.first()
    if firstitem:
        newsamples[placeid] = sampleplaces[placeid]
        
        newsamples[placeid]["earthenv"] = {
            'geom': wkb.loads(str(firstitem[0])),
            'wgs84': wkb.loads(str(firstitem[1])),
            'ruralgeom': wkb.loads(str(firstitem[3])),
            'ruralgeomwgs84': wkb.loads(str(firstitem[2])),
        }
sampleplaces = newsamples
print sampleplaces.values()[0]['usgsplacegeomwgs84']



MULTIPOINT (-97.33754479999999 37.69223609908695)


In [7]:
# get urban and rural stations
from shapely.geometry import Point
import acis
import json

for s in sampleplaces.values():

    bbox = ",".join([str(x) for x in s['earthenv']['ruralgeomwgs84'].bounds])

    closeststation = None
    res = requests.get("http://data.rcc-acis.org/StnMeta?bbox={0}&sdate=2004-01-01&edate=2013-12-31&output=json".format(bbox))
    stationresults = json.loads(res.text)
    urbanstations = []
    ruralstations = []
    for station in stationresults['meta']:
        if len(station['sids']) == 0:
            continue
        stationpoint = Point(station['ll'])
        if stationpoint.within(s['earthenv']['wgs84']):
            urbanstations.append(station)
        elif stationpoint.within(s['earthenv']['ruralgeomwgs84']):
            ruralstations.append(station)
    s['allstations'] = {
        'urbanstations': urbanstations,
        'ruralstations': ruralstations
    }




In [21]:
#get stations data
import pandas as pd
import acis
import numpy as np
counter = 1
for k,s in sampleplaces.iteritems():
    print "Doing", counter, "out of ", len(sampleplaces.keys())
    counter +=1
    toprocess = ('urbanstations', 'ruralstations',)
    for processkey in toprocess:
        success = 0
        for station in s['allstations'][processkey]:
            try:
                request = acis.StnDataRequest()  # change Request type
                request.location(sid=station['sids'][0])  # change keyword and SID list
                request.dates("2004-01-01", "2013-12-31")  # sdate and edate
                request.add_element("maxt")
                request.add_element("avgt")
                request.add_element("mint")
                request.metadata("name")
                result = acis.StnDataResult(request.submit())  # change Result type
                
                df = pd.DataFrame([x for x in result], columns=['uid', 'date', 'tmax', 'tavg','tmin'])
                df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
                df = df.set_index(pd.DatetimeIndex(df['date']))
                tempresults = []
                for year in range(2004, 2014):
                    subset = df[(df['date'] > '{0}-5-31'.format(year)) & (df['date'] <= '{0}-8-31'.format(year))]
                    fails = 0
                    for measures in ('tmin', 'tmax', 'tavg',):
                        try:
                            tempresults.append([measures, str(year), \
                                                subset[measures].astype(str).convert_objects(convert_numeric=True).mean()])
                        except Exception,e:
                            tempresults.append([measures, str(year), np.NaN])

                station['results'] = tempresults
                if processkey == 'urbanstations':
                    if s['allstations'].get('urbanuse', False):
                        s['allstations']['urbanuse'].append(station)
                    else:
                        s['allstations']['urbanuse'] = [station]
                else:
                    if s['allstations'].get('ruraluse', False):
                        s['allstations']['ruraluse'].append(station)
                    else:
                        s['allstations']['ruraluse'] = [station]
            except Exception, e:
                if str(e).find("no data available") == -1:
                    print e
                continue                

Doing 1 out of  100




Doing 2 out of  100
Doing 3 out of  100
Doing 4 out of  100
Doing 5 out of  100
Doing 6 out of  100
Doing 7 out of  100
Doing 8 out of  100
Doing 9 out of  100
Doing 10 out of  100
Doing 11 out of  100
Doing 12 out of  100
Doing 13 out of  100
Doing 14 out of  100
Doing 15 out of  100
Doing 16 out of  100
Doing 17 out of  100
Doing 18 out of  100
Doing 19 out of  100
Doing 20 out of  100
Doing 21 out of  100
Doing 22 out of  100
Doing 23 out of  100
Doing 24 out of  100
Doing 25 out of  100
Doing 26 out of  100
Doing 27 out of  100
Doing 28 out of  100
Doing 29 out of  100
Doing 30 out of  100
Doing 31 out of  100
Doing 32 out of  100
Doing 33 out of  100
Doing 34 out of  100
Doing 35 out of  100
Doing 36 out of  100
Doing 37 out of  100
Doing 38 out of  100
Doing 39 out of  100
Doing 40 out of  100
Doing 41 out of  100
Doing 42 out of  100
Doing 43 out of  100
Doing 44 out of  100
HTTP Error 503: Service Unavailable
Doing 45 out of  100
Doing 46 out of  100
Doing 47 out of  100
Doing 

In [22]:
# print sampleplaces.values()[0]
for k,s in sampleplaces.iteritems():
#     if s.get('usgsplacegeom'):
#         del s['usgsplacegeom']
#     if s.get('usgsplacegeomwgs84'):
#         del s['usgsplacegeomwgs84']
#     if s.get('earthenv'):
#         del s['earthenv']
    s['usgsplacegeomstr'] = str(s['usgsplacegeomstr'])
    s['usgsplacegeomwgs84str'] = str(s['usgsplacegeomwgs84str'])
    
import pickle
with open('climatecentral/climatecentralbase_allstations.pickle', 'wb') as fout:
    pickle.dump(sampleplaces, fout)


In [24]:
for s in sampleplaces.values():
    print len(s['allstations'].get('ruraluse', [])), ",", len(s['allstations'].get('urbanuse', []))

6 , 22
30 , 7
28 , 4
72 , 22
6 , 3
9 , 6
3 , 2
61 , 9
4 , 5
11 , 3
7 , 6
23 , 14
16 , 11
47 , 15
4 , 4
12 , 8
6 , 3
51 , 15
6 , 5
24 , 10
35 , 34
48 , 22
10 , 2
15 , 3
6 , 4
10 , 4
6 , 4
13 , 7
25 , 14
3 , 3
45 , 16
4 , 3
13 , 4
7 , 4
0 , 3
213 , 71
185 , 58
5 , 1
7 , 1
6 , 4
98 , 29
32 , 14
7 , 2
25 , 10
14 , 2
12 , 2
6 , 7
56 , 34
10 , 4
18 , 7
26 , 9
3 , 2
29 , 16
46 , 22
46 , 16
70 , 11
25 , 9
24 , 8
17 , 7
6 , 2
188 , 65
14 , 6
23 , 5
11 , 2
15 , 5
23 , 9
1 , 7
2 , 2
2 , 3
61 , 20
18 , 9
3 , 1
128 , 31
78 , 25
24 , 13
85 , 10
21 , 8
5 , 3
0 , 3
16 , 7
18 , 12
0 , 4
12 , 4
3 , 4
17 , 7
11 , 5
9 , 3
21 , 5
6 , 5
52 , 21
241 , 86
15 , 1
4 , 1
116 , 48
11 , 2
13 , 3
538 , 112
2 , 1
26 , 8
121 , 41


In [26]:
import numpy as np
import pandas as pd

outputdict = []

for s in sampleplaces.values():
    urbanuse = s['allstations'].get('urbanuse', None)
    if not urbanuse:
        print "skipping", s['usgsplacename']
        continue
    urbandfs = []
    for u in urbanuse:
        tdf = pd.DataFrame(u['results'], columns=['measure', 'year', 'value'])
        tdf.set_index(['measure', 'year'])
        urbandfs.append(tdf)
        
    udf_concat = pd.concat(urbandfs)
    urbanresult = pd.DataFrame(udf_concat.groupby(['measure','year'])['value'].mean()).reset_index()
    
    ruraluse = s['allstations'].get('ruraluse', [])
    
    if len(ruraluse) == 0:
        outputdict.append({
            'placename': s['usgsplacename'],
            'uhitmin': "no rural",
            'uhitavg': "no rural",
            'uhitmax': "no rural"
        })
        continue
    
    ruraldfs = []
    for r in ruraluse:
        tdf = pd.DataFrame(r['results'], columns=['measure', 'year', 'value'])
        tdf.set_index(['measure', 'year'])
        ruraldfs.append(tdf)
    df_concat = pd.concat(ruraldfs)
    ruralresult = pd.DataFrame(df_concat.groupby(['measure','year'])['value'].mean()).reset_index()

    
    mergeddf =  urbanresult.merge(ruralresult,on=['measure','year'],how='left')
#     print mergeddf[['value_x', 'value_y']].sub(axis=1)
    mergeddf['uhi'] = mergeddf['value_x'] - mergeddf['value_y']
    outputdict.append({
            'placeid': s['placeid'],
            'placename': s['usgsplacename'],
            'uhitmin': mergeddf[mergeddf['measure']=='tmin']['uhi'].mean(),
            'uhitavg': mergeddf[mergeddf['measure']=='tavg']['uhi'].mean(),
            'uhitmax': mergeddf[mergeddf['measure']=='tmax']['uhi'].mean()
        })
#     print mergeddf[['value_x']].sub(mergeddf['value_y'], axis=0)
#     for year in range(2004, 2014):
#         for measure in ['tmin', 'tmax', 'tavg']

In [27]:
import csv

with open('climatecentral/allstations.csv', 'w') as fin:
    writer = csv.DictWriter(fin, fieldnames=['placeid', 'placename', 'uhitmin', 'uhitmax', 'uhitavg'])
    writer.writeheader()
    for row in outputdict:
        writer.writerow(row)