In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline  
import seaborn as sns

In [102]:
import math

In [136]:
pd.set_option("display.max_rows",1000)

In [10]:
elev_types = {'FEAT_CODE': str,
             'ELEVATION': np.float64,
             'the_geom': str,
             'SOURCE_ID': str,
             'SUB_CODE': str,
             'STATUS': str}

In [25]:
elev=pd.read_csv('data/elevation.csv', dtype=elev_types)

In [28]:
elev_cols = ['feat_code','elevation','geom','source_id','sub_code','status']
elev.columns = elev_cols

In [29]:
elev.head()

Unnamed: 0,feat_code,elevation,geom,source_id,sub_code,status
0,3020,129.74,POINT (-73.98256951739029 40.70191431011495),21302000001,302000,Unchanged
1,3000,120.586263,POINT (-73.98777990215136 40.70192587201795),21300000002,300020,Unchanged
2,3020,114.74,POINT (-73.9844362285535 40.70193746867856),21302000003,302000,Unchanged
3,3020,69.13,POINT (-73.98103106115082 40.70194154237234),21302000004,302000,Unchanged
4,3020,49.235397,POINT (-73.98802335685477 40.70196086988672),21302000005,302000,Unchanged


In [30]:
elev.dtypes

feat_code     object
elevation    float64
geom          object
source_id     object
sub_code      object
status        object
dtype: object

In [31]:
def split_len(x):
    if len(str(x).replace('POINT (','').replace(')','').split(' ')) > 1:
#         return 0
        return float(str(x).replace('POINT (','').replace(')','').split(' ')[1])
    else:
        return float(0)

elev['lon']=elev.geom.apply(lambda x: str(x).replace('POINT (','').split(' ')[0])
# elev['lat']=elev.the_geom.apply(lambda x: str(x).replace('POINT (','').split(' ')[1])
elev['lat']=elev.geom.apply(lambda x: split_len(x))

In [81]:
def make_float(x):
    
    try:
        y = float(x)
        return y
    except:
        return 0

elev['lon'] = elev['lon'].apply(lambda x: make_float(x))
elev['lat'] = elev['lat'].apply(lambda x: make_float(x))

In [32]:
elev.feat_code.value_counts()

3020                              881461
3000                              324396
3010                                 944
  "error" : true                       1
  "status" : 500                       1
}                                      1
  "message" : "Internal error"         1
Name: feat_code, dtype: int64

In [82]:
elev.shape

(1206801, 8)

In [83]:
# Remove errors
elev = elev[elev['feat_code'].isin(['3020','3010','3000'])]

In [84]:
elev.shape

(1206801, 8)

In [85]:
elev.feat_code.value_counts()

3020    881461
3000    324396
3010       944
Name: feat_code, dtype: int64

In [86]:
elevation_grouped = elev.groupby('feat_code')

In [87]:
avg_elevation_by_feat = elevation_grouped['elevation'].mean()

In [88]:
avg_elevation_by_feat

feat_code
3000    56.430096
3010    49.083661
3020    83.866108
Name: elevation, dtype: float64

In [89]:
# Makes assumption that code '3010' is street_level

In [90]:
building_frame = elev[elev['feat_code'] == '3020']
street_frame = elev[elev['feat_code'] == '3010']
other_frame = elev[elev['feat_code'] == '3020']

### Outline of work to do next

1. Import weather station data
2. For each weather station, calculate average elevation of closest n street elevation points
3. For each weather station, calculate average elevation of closest n building elevation points



In [91]:
station_frame = pd.read_csv('data/stations_export.csv')

In [92]:
station_frame.drop('Unnamed: 0', axis=1, inplace=True)

In [93]:
station_frame.head()

Unnamed: 0,name,lat,lon,h
0,KNYBROOK41,40.6,-73.948,13.0
1,KNYBROOK40,40.717,-73.963,64.0
2,KNYBROOK54,40.624,-74.013,89.0
3,KNYBROOK49,40.695,-73.928,62.0
4,KNYBROOK51,40.714,-73.948,39.0


In [94]:
street_frame.head(5)

Unnamed: 0,feat_code,elevation,geom,source_id,sub_code,status,lon,lat
4419,3010,2.828947,POINT (-73.99665128785833 40.701327075072555),21301004461,301000,Unchanged,-73.996651,40.701327
4420,3010,8.748654,POINT (-73.99589227035206 40.70272217747915),21301004462,301000,Unchanged,-73.995892,40.702722
8396,3010,7.468097,POINT (-73.9963081279997 40.701926083290786),21301008673,301000,New,-73.996308,40.701926
8536,3010,2.231,POINT (-73.93761274918306 40.547189182438856),1301000004,301000,Unchanged,-73.937613,40.547189
12200,3010,2.195313,POINT (-73.90667080733019 40.56256446852484),1301003568,301000,Unchanged,-73.906671,40.562564


In [95]:
station_lat = 40.600
station_long = -73.948

In [105]:
street_frame['eu_distance'] = pow(station_lat - street_frame['lat'],2) + pow(station_long - street_frame['lon'], 2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [116]:
building_frame['eu_distance'] = pow(station_lat - building_frame['lat'],2) + pow(station_long - building_frame['lon'], 2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [117]:
building_frame.sort_values('eu_distance', ascending=True).iloc[0:10,:]

Unnamed: 0,feat_code,elevation,geom,source_id,sub_code,status,lon,lat,eu_distance
1100990,3020,46.89,POINT (-73.94798502528953 40.599898253226954),18302029992,302000,Unchanged,-73.947985,40.599898,1.057665e-08
1101021,3020,46.73,POINT (-73.94805678341106 40.599912394167724),18302030023,302000,Unchanged,-73.948057,40.599912,1.089914e-08
1101022,3020,46.84,POINT (-73.94791292147072 40.59991232905482),18302030024,302000,Unchanged,-73.947913,40.599912,1.526886e-08
1101032,3020,46.84,POINT (-73.9478398527347 40.59991687957364),18302030034,302000,Unchanged,-73.94784,40.599917,3.255615e-08
1101605,3020,38.34,POINT (-73.94793320140974 40.60018141074357),18302030607,302000,Unchanged,-73.947933,40.600181,3.737191e-08
1197136,3020,24.9546,POINT (-73.948118634545 40.60016299517335),18302126658,302000,Unchanged,-73.948119,40.600163,4.064158e-08
1101698,3020,20.96,POINT (-73.94806914783616 40.60021995427857),18302030700,302000,Unchanged,-73.948069,40.60022,5.316131e-08
1101424,3020,37.06,POINT (-73.94820932345235 40.600097764412375),18302030426,302000,Unchanged,-73.948209,40.600098,5.337419e-08
1101058,3020,46.84,POINT (-73.94777581556663 40.59993120571953),18302030060,302000,Unchanged,-73.947776,40.599931,5.499131e-08
1101693,3020,40.38,POINT (-73.94791228708402 40.6002181822038),18302030695,302000,Unchanged,-73.947912,40.600218,5.529703e-08


In [118]:
building_frame.sort_values('eu_distance', ascending=True).iloc[0:10,:]['elevation'].mean()

39.58346

In [125]:
CLOSEST_N = 10

In [126]:
def get_street_elevation(row):
    
    station_lat = row['lat']
    station_long = row['lon']
    
    # Get euclidian distance of each *street* point, get closest n, then average elevation        
    street_frame['eu_distance'] = pow(station_lat - street_frame['lat'],2) + pow(station_long - street_frame['lon'], 2)
    closest_street_frame = street_frame.sort_values('eu_distance', ascending=True).iloc[0:CLOSEST_N,:]
    avg_street_elevation = closest_street_frame['elevation'].mean()
    
    return avg_street_elevation


def get_building_elevation(row):
    
    station_lat = row['lat']
    station_long = row['lon']
    
    # Get euclidian distance of each *building* point, get closest n, then average elevation  
    building_frame['eu_distance'] = pow(station_lat - building_frame['lat'],2) + pow(station_long - building_frame['lon'], 2)
    closest_building_frame = building_frame.sort_values('eu_distance', ascending=True).iloc[0:CLOSEST_N,:]
    avg_building_elevation = closest_building_frame['elevation'].mean()
    
    print row['name']
    
    return avg_building_elevation

In [127]:
station_frame['street_elevation'] = station_frame.apply(get_street_elevation, axis=1)
station_frame['building_elevation'] = station_frame.apply(get_building_elevation, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [131]:
station_frame.head(100)

Unnamed: 0,name,lat,lon,h,street_elevation,building_elevation
0,KNYBROOK41,40.600,-73.948,13.0,8.792996,39.583460
1,KNYBROOK40,40.717,-73.963,64.0,32.836291,76.218000
2,KNYBROOK54,40.624,-74.013,89.0,29.978261,110.081000
3,KNYBROOK49,40.695,-73.928,62.0,54.526489,111.238000
4,KNYBROOK51,40.714,-73.948,39.0,32.836291,35.341226
5,KNYBROOK101,40.686,-73.968,118.0,32.836291,137.632996
6,KNYBROOK103,40.687,-73.986,52.0,31.447026,79.934000
7,KNYBROOK106,40.728,-73.945,16.0,50.857721,34.195976
8,KNYBROOK109,40.577,-73.958,3.0,5.312353,93.294703
9,KNYBROOK115,40.705,-74.009,-107506.0,32.836291,229.053000


In [133]:
station_frame.to_csv('station_elevation_closest_10.csv')

In [134]:
station_frame['diff'] = station_frame['h'] - station_frame['street_elevation']

In [137]:
station_frame.sort_values('diff', ascending=True).head(120)

Unnamed: 0,name,lat,lon,h,street_elevation,building_elevation,diff
72,KNYLONGI7,40.747,-73.974,-107506.0,50.857721,90.737,-107556.857721
164,KNYNEWYO382,40.764,-73.973,-107506.0,50.857721,282.587,-107556.857721
153,KNYNEWYO264,40.75,-73.977,-107506.0,50.857721,117.117736,-107556.857721
133,KNYNEWYO193,40.751,-73.978,-107506.0,50.857721,299.103556,-107556.857721
62,KNYLONGI11,40.749,-73.971,-107506.0,50.857721,191.729,-107556.857721
125,KNYNEWYO168,40.762,-73.986,-107506.0,50.857721,181.035,-107556.857721
68,KNYLONGI33,40.749,-73.974,-107506.0,50.857721,190.55897,-107556.857721
130,KNYNEWYO184,40.708,-74.018,-107506.0,32.836291,210.442382,-107538.836291
161,KNYNEWYO349,40.707,-74.008,-107506.0,32.836291,263.46509,-107538.836291
135,KNYNEWYO198,40.71,-74.015,-107506.0,32.836291,243.383848,-107538.836291
