# Read and Write DNAInfo Data 

This file gives a basic approach to reading and writing the DNAINfo geojsons primarily using geopandas.

In [18]:
import geopandas as gpd
import os

## New York Data

In [20]:
# Get the names of all the New York folders.
ny_dir = r'C:\Users\djl543\OneDrive\Draw-Your-Neighborhood-master\Draw-Your-Neighborhood-master\New_York' # input dir location
ny_nhoods = os.listdir(ny_dir)

# Ideally I want everything in a single file to start with, so we'll iterate through all the neighbourhoods
# and add all drawings to a single geopandas dataframe.
# The first drawing in the first neighbourhood will act as a template.
ny_1 = r'C:\Users\djl543\OneDrive\Draw-Your-Neighborhood-master\Draw-Your-Neighborhood-master\New_York\alphabet-city\alphabet-city_0.geojson'
data = gpd.read_file(ny_1)

# Now iterate through all the neighbourhoods and drawings
for hood in ny_nhoods:
    # Get all drawings within a neighbourhood directory.
    drawings = os.listdir(r'C:\Users\djl543\OneDrive\Draw-Your-Neighborhood-master\Draw-Your-Neighborhood-master\New_York\\' + hood)
    for drawing in drawings:
        # Get each drawing in turn
        drawing = r'C:\Users\djl543\OneDrive\Draw-Your-Neighborhood-master\Draw-Your-Neighborhood-master\New_York\\' + hood + '\\' + drawing
        # Make the drawing a (temporary) geopandas dataframe
        temp = gpd.read_file(drawing)
        # Append the temporary geopandas dataframe to the template we made earlier.
        # This is the equivalent of a += operation. Seems to work.
        data = gpd.GeoDataFrame(pd.concat([data,temp],ignore_index=True))
    #print hood

# Don't forget to remove the record used as a template for the geopandas dataframe!
data = data.drop(0)

# Set the coordinate system to WGS-84
data.crs = {'init': 'epsg:4326'}

# Export the geopandas dataframe as a single geojson. NB geojsons always use wgs84, unless otherwise specified.
data.to_file('raw_wgs84.geojson',driver='GeoJSON')

In [16]:
# The exported file can now be read using geopandas, like this:
readdata = gpd.read_file('raw_wgs84.geojson')

# Let's project the data to: EPSG:32118 - New York Long Island, NAD83-based projection in metres.
readdata = readdata.to_crs({'init':'epsg:32118'})

# or, EPSG: 32618 - UTM Zone 18N, metres.
#readdata = readdata.to_crs({'init':'epsg:32618'})

In [28]:
# Finally, let's see if we can save the projected data as a shapefile
#import fiona; fiona.supported_drivers

readdata.to_file('NYC_raw_epsg32118.shp',driver='ESRI Shapefile')

## Chicago Data

In [25]:
# Get the names of all the New York folders.
chicago_dir = r'C:\Users\djl543\OneDrive\Draw-Your-Neighborhood-master\Draw-Your-Neighborhood-master\Chicago' # input dir location
chicago_nhoods = os.listdir(chicago_dir)

# Ideally I want everything in a single file to start with, so we'll iterate through all the neighbourhoods
# and add all drawings to a single geopandas dataframe.
# The first drawing in the first neighbourhood will act as a template.
chi_1 = r'C:\Users\djl543\OneDrive\Draw-Your-Neighborhood-master\Draw-Your-Neighborhood-master\Chicago\chi-albany-park\albany-park_0.geojson'
data = gpd.read_file(chi_1)

# Now iterate through all the neighbourhoods and drawings
for hood in chicago_nhoods:
    # Get all drawings within a neighbourhood directory.
    drawings = os.listdir(r'C:\Users\djl543\OneDrive\Draw-Your-Neighborhood-master\Draw-Your-Neighborhood-master\Chicago\\' + hood)
    for drawing in drawings:
        # Get each drawing in turn
        drawing = r'C:\Users\djl543\OneDrive\Draw-Your-Neighborhood-master\Draw-Your-Neighborhood-master\Chicago\\' + hood + '\\' + drawing
        # Make the drawing a (temporary) geopandas dataframe
        temp = gpd.read_file(drawing)
        # Append the temporary geopandas dataframe to the template we made earlier.
        # This is the equivalent of a += operation. Seems to work.
        data = gpd.GeoDataFrame(pd.concat([data,temp],ignore_index=True))
    #print hood

# Don't forget to remove the record used as a template for the geopandas dataframe!
data = data.drop(0)

# Set the coordinate system to WGS-84
data.crs = {'init': 'epsg:4326'}

# Export the geopandas dataframe as a single geojson. NB geojsons always use wgs84, unless otherwise specified.
data.to_file('Chicago_raw_wgs84.geojson',driver='GeoJSON')

In [30]:
# The exported file can now be read using geopandas, like this:
readdata = gpd.read_file('Chicago_raw_wgs84.geojson')

# Let's project the data to: EPSG:3528 - NAD83(NSRS2007) / Illinois East, projection in metres.
readdata = readdata.to_crs({'init':'epsg:3528'})

# or, EPSG: 32616 - UTM Zone 16N, metres.
#readdata = readdata.to_crs({'init':'epsg:32618'})

In [31]:
# Finally, let's see if we can save the projected data as a shapefile
#import fiona; fiona.supported_drivers

readdata.to_file('Chicago_raw_epsg3528.shp',driver='ESRI Shapefile')