In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns; sns.set()

In [2]:
def read_shapefile(shp_path):
    """
    Read a shapefile into a Pandas dataframe with a 'coords' column holding
    the geometry information. This uses the pyshp package
    """
    import shapefile
    import pandas as pd

    #read file, parse out the records and shapes
    sf = shapefile.Reader(shp_path)
    fields = [x[0] for x in sf.fields][1:]
    records = sf.records()
    shps = [s.points for s in sf.shapes()]

    #write into a dataframe
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)

    return df

df_bromley = read_shapefile("C:/Users/Maan/Desktop/Georeferencing/height2017/height2017/bromley_40.shp")
df_bromley.head()

Unnamed: 0,fid_,planning_a,max_height,parking_sp,one_bed_un,two_bed_un,three_bed_,four_plus_,has_underg,studio_uni,...,rejected_s,rejected_n,rejected_l,rejected_p,rejected_1,units,F_AREA,BUFF_DIST,ORIG_FID,coords
0,0,18/04199/OUT,0.0,11,0,0,0,0,0,0,...,,,,,,9,313.823828,40.0,0,"[(543075.2259, 169383.09770000022), (543075.25..."
1,0,16/04099/FULL1,8.802,8,0,0,0,0,0,0,...,False,False,False,False,True,8,318.270785,40.0,1,"[(542467.5720999996, 168850.72419999912), (542..."
2,0,16/05875/FULL1,9.19,13,0,0,0,0,0,0,...,,,,,,9,356.178421,40.0,2,"[(542746.0261000004, 168918.10290000078), (542..."
3,0,16/03597/FULL1,9.73,13,0,0,0,0,0,0,...,True,False,False,False,False,9,361.438515,40.0,3,"[(542744.8821, 168925.1301000015), (542744.967..."
4,0,15/04152/FULL1,9.71,9,0,0,0,0,0,0,...,,,,,,8,325.165649,40.0,4,"[(542919.8344, 169031.75550000105), (542919.92..."


In [24]:
import geopandas

gdf = geopandas.read_file("C:/Users/Maan/Desktop/Georeferencing/height2017/height2017/bromley_40.shp")
gdf.head()

Unnamed: 0,fid_,planning_a,max_height,parking_sp,one_bed_un,two_bed_un,three_bed_,four_plus_,has_underg,studio_uni,...,rejected_s,rejected_n,rejected_l,rejected_p,rejected_1,units,F_AREA,BUFF_DIST,ORIG_FID,geometry
0,0,18/04199/OUT,0.0,11,0,0,0,0,0,0,...,,,,,,9,313.823828,40.0,0,"POLYGON ((543075.226 169383.098, 543075.255 16..."
1,0,16/04099/FULL1,8.802,8,0,0,0,0,0,0,...,False,False,False,False,True,8,318.270785,40.0,1,"POLYGON ((542467.572 168850.724, 542464.956 16..."
2,0,16/05875/FULL1,9.19,13,0,0,0,0,0,0,...,,,,,,9,356.178421,40.0,2,"POLYGON ((542746.026 168918.103, 542746.028 16..."
3,0,16/03597/FULL1,9.73,13,0,0,0,0,0,0,...,True,False,False,False,False,9,361.438515,40.0,3,"POLYGON ((542744.882 168925.130, 542744.968 16..."
4,0,15/04152/FULL1,9.71,9,0,0,0,0,0,0,...,,,,,,8,325.165649,40.0,4,"POLYGON ((542919.834 169031.756, 542919.920 16..."


In [25]:
#converting status values from strings to 0 and 1
gdf['status'] = gdf['status'].map({'approved': 1, 'appeal_allowed': 1, 'refused': 0, 'appeal_dismissed': 0})

In [26]:
gdf['status'].head()

0    1
1    0
2    1
3    0
4    1
Name: status, dtype: int64

In [29]:
print(gdf['on_corner'])


0      0
1      0
2      1
3      1
4      0
      ..
108    0
109    0
110    0
111    1
112    1
Name: on_corner, Length: 113, dtype: int64


In [28]:
#converting on_corner values from TRUE/FALSE to 1/0
gdf['on_corner'] = gdf['on_corner'].map({'TRUE': 1, 'FALSE': 0, None: 0})

In [34]:
#converting reasons for rejection values from TRUE/FALSE to 1/0
gdf['rejected_s'] = gdf['rejected_s'].map({'TRUE': 1, 'FALSE': 0, None: 0})
gdf['rejected_n'] = gdf['rejected_n'].map({'TRUE': 1, 'FALSE': 0, None: 0})
gdf['rejected_l'] = gdf['rejected_l'].map({'TRUE': 1, 'FALSE': 0, None: 0})
gdf['rejected_p'] = gdf['rejected_p'].map({'TRUE': 1, 'FALSE': 0, None: 0})
gdf['rejected_1'] = gdf['rejected_1'].map({'TRUE': 1, 'FALSE': 0, None: 0})

In [35]:
gdf['on_corner'].head()

0    0
1    0
2    1
3    1
4    0
Name: on_corner, dtype: int64

In [36]:
print(type(gdf['units'][0]))




<class 'numpy.int64'>


In [37]:
gdf['units'] = gdf['units'].astype('int64')
gdf['parking_per_unit'] = round(gdf['parking_sp']/gdf['units'], 2)

In [38]:
gdf.head()

Unnamed: 0,fid_,planning_a,max_height,parking_sp,one_bed_un,two_bed_un,three_bed_,four_plus_,has_underg,studio_uni,...,rejected_n,rejected_l,rejected_p,rejected_1,units,F_AREA,BUFF_DIST,ORIG_FID,geometry,parking_per_unit
0,0,18/04199/OUT,0.0,11,0,0,0,0,0,0,...,0,0,0,0,9,313.823828,40.0,0,"POLYGON ((543075.226 169383.098, 543075.255 16...",1.22
1,0,16/04099/FULL1,8.802,8,0,0,0,0,0,0,...,0,0,0,1,8,318.270785,40.0,1,"POLYGON ((542467.572 168850.724, 542464.956 16...",1.0
2,0,16/05875/FULL1,9.19,13,0,0,0,0,0,0,...,0,0,0,0,9,356.178421,40.0,2,"POLYGON ((542746.026 168918.103, 542746.028 16...",1.44
3,0,16/03597/FULL1,9.73,13,0,0,0,0,0,0,...,0,0,0,0,9,361.438515,40.0,3,"POLYGON ((542744.882 168925.130, 542744.968 16...",1.44
4,0,15/04152/FULL1,9.71,9,0,0,0,0,0,0,...,0,0,0,0,8,325.165649,40.0,4,"POLYGON ((542919.834 169031.756, 542919.920 16...",1.12


In [39]:
gdf.to_csv('C:/Users/Maan/Desktop/Georeferencing/height2017/height2017/bromley_cleaned.csv', index = False)