In [1]:
import numpy as npf
import pandas as pd
import geopandas as gpd

In [2]:
pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 999)

# Green Spaces Dataset Wrangling

In [3]:
fname = "./Data/espacesVerts.geojson"
grass_gdf = gpd.read_file(fname)

In [4]:
grass_gdf.head()

Unnamed: 0,gml_id,nom,surf_nat,numero,nom_voie,numsite,nuquart,nusect,objectid,id_senh,materiel,geometry
0,gev_senh.64928,MAIL LOUISE BOURGEOIS,PELOUSE,0972SENH01,Saint-Hélier,972,8.0,19.0,64928,,,"MULTIPOLYGON (((-1.66297 48.10281, -1.66295 48..."
1,gev_senh.64929,CHEMIN DES LOUVRIES,PRAIRIE,1362SENH02,des Iles Chaussey,1362,5.0,14.0,64929,,,"MULTIPOLYGON (((-1.62954 48.14565, -1.62954 48..."
2,gev_senh.64930,CHEMIN DES LOUVRIES,PRAIRIE,1362SENH01,des Iles Chaussey,1362,5.0,14.0,64930,,,"MULTIPOLYGON (((-1.63062 48.14727, -1.63063 48..."
3,gev_senh.64931,MAIL LOUISE BOURGEOIS,PRAIRIE HUMIDE,0972SENH11,Saint-Hélier,972,8.0,19.0,64931,,,"MULTIPOLYGON (((-1.66363 48.10279, -1.66367 48..."
4,gev_senh.64932,MAIL LOUISE BOURGEOIS,PELOUSE,0972SENH04,Saint-Hélier,972,8.0,19.0,64932,,,"MULTIPOLYGON (((-1.66313 48.10266, -1.66300 48..."


We only need a few informations:
    - the name of the place
    - the type 
    - the neighborhood ID
    - and the geometry

In [5]:
grass_gdf.drop(['gml_id', 'numero', 'nom_voie', 'numsite', 'nuquart', 'objectid', 'id_senh', 'materiel'], axis=1, inplace=True)

In [6]:
# This is the Neighborhood_ID
grass_gdf['nusect'].unique()

array([19., 14., 18., 13., 10., 38., 36., 29.,  9., 11., 42.,  1., 25.,
       17., 33., 16., 28., 12., 24., 35., 26.,  4., 22., 21., 34., 15.,
       39.,  6., 27., 30.,  5.,  8., nan])

In [7]:
for index, row in grass_gdf.iterrows():
    if row['nusect'] != row['nusect']:
        print(row)

nom                                                      None
surf_nat                                                 None
nusect                                                    NaN
geometry    (POLYGON ((-1.695644759804986 48.1310227430573...
Name: 11862, dtype: object
nom                                                      None
surf_nat                                                 None
nusect                                                    NaN
geometry    (POLYGON ((-1.696365864615534 48.1310983453010...
Name: 11863, dtype: object
nom                                                      None
surf_nat                                                 None
nusect                                                    NaN
geometry    (POLYGON ((-1.696723202092756 48.1312662209487...
Name: 11864, dtype: object
nom                                                      None
surf_nat                                                 None
nusect                                             

We are not dropping the NaN value just yet as we may be able to localize it based on the geometry anyway

In [8]:
grass_gdf.rename(columns={'nom':'Name', 'surf_nat':'Type of Surface', 'nusect':'Neighborhood_ID'}, inplace=True)
grass_gdf.head()

Unnamed: 0,Name,Type of Surface,Neighborhood_ID,geometry
0,MAIL LOUISE BOURGEOIS,PELOUSE,19.0,"MULTIPOLYGON (((-1.66297 48.10281, -1.66295 48..."
1,CHEMIN DES LOUVRIES,PRAIRIE,14.0,"MULTIPOLYGON (((-1.62954 48.14565, -1.62954 48..."
2,CHEMIN DES LOUVRIES,PRAIRIE,14.0,"MULTIPOLYGON (((-1.63062 48.14727, -1.63063 48..."
3,MAIL LOUISE BOURGEOIS,PRAIRIE HUMIDE,19.0,"MULTIPOLYGON (((-1.66363 48.10279, -1.66367 48..."
4,MAIL LOUISE BOURGEOIS,PELOUSE,19.0,"MULTIPOLYGON (((-1.66313 48.10266, -1.66300 48..."


In [9]:
grass_gdf

Unnamed: 0,Name,Type of Surface,Neighborhood_ID,geometry
0,MAIL LOUISE BOURGEOIS,PELOUSE,19.0,"MULTIPOLYGON (((-1.66297 48.10281, -1.66295 48..."
1,CHEMIN DES LOUVRIES,PRAIRIE,14.0,"MULTIPOLYGON (((-1.62954 48.14565, -1.62954 48..."
2,CHEMIN DES LOUVRIES,PRAIRIE,14.0,"MULTIPOLYGON (((-1.63062 48.14727, -1.63063 48..."
3,MAIL LOUISE BOURGEOIS,PRAIRIE HUMIDE,19.0,"MULTIPOLYGON (((-1.66363 48.10279, -1.66367 48..."
4,MAIL LOUISE BOURGEOIS,PELOUSE,19.0,"MULTIPOLYGON (((-1.66313 48.10266, -1.66300 48..."
...,...,...,...,...
11907,SQUARE DU GRAND PLACIS,PELOUSE,36.0,"MULTIPOLYGON (((-1.66140 48.13922, -1.66141 48..."
11908,SQUARE DU GRAND PLACIS,PELOUSE,36.0,"MULTIPOLYGON (((-1.66035 48.14065, -1.66040 48..."
11909,SQUARE DU GRAND PLACIS,PELOUSE,36.0,"MULTIPOLYGON (((-1.66079 48.13953, -1.66077 48..."
11910,RUE EMILE BERNARD,PELOUSE,36.0,"MULTIPOLYGON (((-1.66740 48.13968, -1.66740 48..."


In [10]:
grass_gdf['Type of Surface'].unique()

array(['PELOUSE', 'PRAIRIE', 'PRAIRIE HUMIDE', 'PRAIRIE FLEURIE',
       'FLEURISSEMENT EXTENSIF BULBE', 'DALLAGE JOINT GAZON',
       'PRAIRIE DE FAUCHE', 'PRAIRIE RUSTIQUE', 'DALLE ALVEOLEE',
       'EVERGREEN', 'PRAIRIE SECHE', None], dtype=object)

In [11]:
# Translating 'Type of Surface' column
# NB : I took some liberty for the translation as I don't have extensive info 
# on the meaning of each category
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'PELOUSE'), 'Type of Surface'] = 'Grass'
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'PRAIRIE'), 'Type of Surface'] = 'Meadow'
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'PRAIRIE HUMIDE'), 'Type of Surface'] = 'Wet Meadow'
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'PRAIRIE FLEURIE'), 'Type of Surface'] = 'Flowery Meadow'
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'FLEURISSEMENT EXTENSIF BULBE'), 'Type of Surface'] = 'Extensive Flowering'
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'DALLAGE JOINT GAZON'), 'Type of Surface'] = 'Grass-Covered joints'
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'PRAIRIE DE FAUCHE'), 'Type of Surface'] = 'Hay field'
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'PRAIRIE RUSTIQUE'), 'Type of Surface'] = 'Hardy Meadow'
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'DALLE ALVEOLEE'), 'Type of Surface'] = 'Hollow-Core Slab'
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'EVERGREEN'), 'Type of Surface'] = 'Evergreen'
grass_gdf.loc[(grass_gdf['Type of Surface'] == 'PRAIRIE SECHE'), 'Type of Surface'] = 'Dry Meadow'

In [12]:
grass_gdf['Latitude'] = grass_gdf['geometry'].centroid.y
grass_gdf['Longitude'] = grass_gdf['geometry'].centroid.x

In [13]:
grass_gdf.dropna(subset=['Neighborhood_ID'], axis=0, inplace=True)
grass_gdf['Neighborhood_ID'] = grass_gdf['Neighborhood_ID'].astype('int')

In [14]:
fixed_columns = ['Name', 'Neighborhood_ID', 'Type of Surface', 'Latitude', 'Longitude', 'geometry']
grass_gdf = grass_gdf[fixed_columns]

In [15]:
grass_gdf

Unnamed: 0,Name,Neighborhood_ID,Type of Surface,Latitude,Longitude,geometry
0,MAIL LOUISE BOURGEOIS,19,Grass,48.102762,-1.662893,"MULTIPOLYGON (((-1.66297 48.10281, -1.66295 48..."
1,CHEMIN DES LOUVRIES,14,Meadow,48.146857,-1.630007,"MULTIPOLYGON (((-1.62954 48.14565, -1.62954 48..."
2,CHEMIN DES LOUVRIES,14,Meadow,48.146189,-1.630393,"MULTIPOLYGON (((-1.63062 48.14727, -1.63063 48..."
3,MAIL LOUISE BOURGEOIS,19,Wet Meadow,48.102745,-1.663677,"MULTIPOLYGON (((-1.66363 48.10279, -1.66367 48..."
4,MAIL LOUISE BOURGEOIS,19,Grass,48.102661,-1.663064,"MULTIPOLYGON (((-1.66313 48.10266, -1.66300 48..."
...,...,...,...,...,...,...
11907,SQUARE DU GRAND PLACIS,36,Grass,48.139286,-1.661406,"MULTIPOLYGON (((-1.66140 48.13922, -1.66141 48..."
11908,SQUARE DU GRAND PLACIS,36,Grass,48.140683,-1.660336,"MULTIPOLYGON (((-1.66035 48.14065, -1.66040 48..."
11909,SQUARE DU GRAND PLACIS,36,Grass,48.139516,-1.660742,"MULTIPOLYGON (((-1.66079 48.13953, -1.66077 48..."
11910,RUE EMILE BERNARD,36,Grass,48.139718,-1.667388,"MULTIPOLYGON (((-1.66740 48.13968, -1.66740 48..."


In [16]:
grass_gdf.to_file("./Data/cleaned_data/grasslands.geojson", driver='GeoJSON')