In [1]:
# importing libraries
import json
import pandas as pd
import numpy as np
import folium
import branca
import math
from IPython.display import display

In [2]:
def add_to_map(my_map, location_path, names_path, info_path, colorscale, cap, threshold, features, pop_data):
    
    # importing locations, it also have age info but I want to proccess them with pandas using the csv
    with open(location_path) as f:
        block_geojson = json.load(f)
    #print(block_geojson['features'])

    # importing info about column names 
    with open(names_path) as f:
        metadata = json.load(f)
    #print(metadata)

    # importing info about age in csv format
    df = pd.read_csv(info_path)

    # dropping error columns, useless for plotting
    df = df.drop([col for col in df.columns if "Error" in col], axis=1)

    # changing df column names to human readable format
    # starting with columns that are inclueded in the df but not in metadata file as they already are in a human readble format
    new_names = ["geoid", "name"]
    shared_names = []
    # iterating over column names
    for key, key_dict in metadata["tables"]["B01001"]["columns"].items():
        # if len of string is <8 give these results -> "Total:", "Male:", "Female:" -> stripping ":" from them
        if(len(key_dict["name"]) < 8):
            new_names.append(key_dict["name"].strip(":"))
        # the rest of the columns have the same name for female and male columns e.g "65 and 66 years" for both columns
        # these columns are ordered in metadata file as first all male columns and then all female columns
        # so if in a iteration the name is new  -> add the name to new_names as "male" and to the shared_names without anything else
        # and if the names is not new (it is already in shared_names) -> add the name to new_names as "female" 
        # this enables the distinction between male columns and females columns
        elif(key_dict["name"] not in shared_names):
            new_names.append(key_dict["name"] + " male")
            shared_names.append(key_dict["name"])
        else:
            new_names.append(key_dict["name"] + " female")
    df.columns = new_names

    # adding +65 column and percentage over total pop of the block, and cap percentage of total block
    df["65 and over"] = df['65 and 66 years female'] + df['67 to 69 years female'] \
                        + df['70 to 74 years female'] + df['75 to 79 years female'] \
                        + df['80 to 84 years female'] + df['85 years and over female'] \
                        + df['65 and 66 years male'] + df['67 to 69 years male'] \
                        + df['70 to 74 years male'] + df['75 to 79 years male'] \
                        + df['80 to 84 years male'] + df['85 years and over male']
    df["65 and over percentage"] = df["65 and over"] / df["Total"]
    
    cap_percentage_func = lambda x: x if math.isnan(float(x)) or x<cap else cap
    df["65 and over cap percentage"] = df["65 and over percentage"].apply(cap_percentage_func)  
    
    # checking nans (blocks without data) and replace them with None to comply with style function
    #display(df.loc[np.isnan(df["65 and over percentage"])])
    df = df.where(pd.notnull(df), None)

    # checking if both data sources have the same size
    assert len(block_geojson['features']), len(df)
    assert len(df), df["geoid"].nunique()
    
    #adding data to compute stats
    pop_data.extend(df["65 and over cap percentage"].values)

    # checking if min and max percentage values are feasible
    #display(df["65 and over percentage"].describe().to_frame())
                  
    # creating style function
    def style_function(feature):
        block_df = df.loc[df["geoid"]==feature['properties']["geoid"]]
        pop_per = block_df.iloc[0,-1] # -2 -> percentage with cap!!
        return {'fillOpacity': 0.8, 
                'weight': 0, 
                'fillColor': '#ffffff00' if pop_per is None or pop_per < threshold else colorscale(pop_per)
        }
    #print(style_function(block_geojson['features'][0]))

    # adding info to map
    for block in block_geojson['features']:
        
        # adding block with proper style
        gj = folium.GeoJson(block, style_function=style_function)
        gj.add_to(my_map)
        
        # adding block popup with corresponding name and rounded and formatted percentage
        block_df = df.loc[df["geoid"]==block['properties']["geoid"]]
        pop_per = block_df.iloc[0,-2] # -2 -> percentage without cap!!
        if pop_per is not None:
            pop_per = "{}%".format(round(pop_per*100,2))
        text = "{}: {}".format(block['properties']["name"], pop_per)
        gj.add_child(folium.Popup(text))
        gj.add_to(my_map)
        
        #adding blocks to global json
        feature_block = block.copy()
        feature_block['properties']['pop%_+65'] = block_df.iloc[0,-2]
        features.append(feature_block)
    
    return features, pop_data

In [19]:
data_stats = {
    "count": 2202.000000,
    "mean": 0.143090,
    "std": 0.071672,
    "min": 0.000000,
    "25%": 0.097801,
    "50%": 0.132974,
    "75%": 0.175784,
    "max": 1.000000
}

# creating map (location is Central Park), array of files of different places, and cap for color scale 
my_map = folium.Map(location=[40.785091, -73.968285], zoom_start=10)
data_path = "ny_census_track"
cap = data_stats["50%"]
threshold = data_stats["50%"]
colorscale = branca.colormap.linear.Purples_05.scale(0,cap)
features = []
pop_data = []

# add blocks of each file using:
# 1. location path (coordinates file, format: geoJSON)
# 2. names path (human readable columns file, format: json)
# 3. info path (age info file, format: csv)

l_path = '{}_data/{}.geojson'.format(data_path, data_path)
n_path = '{}_data/metadata.json'.format(data_path, data_path)
i_path = '{}_data/{}.csv'.format(data_path, data_path)
features, pop_data = add_to_map(my_map, l_path, n_path, i_path, colorscale, cap, threshold, features, pop_data)

In [None]:
# plotting map
my_map

In [21]:
# saving final map
my_map.save('map.html')

In [None]:
branca.colormap.linear

# Processing GeoJSON

In [None]:
help(folium.Map)

In [5]:
pd.Series(pop_data, dtype=np.float).describe()

count    2202.000000
mean        0.143090
std         0.071672
min         0.000000
25%         0.097801
50%         0.132974
75%         0.175784
max         1.000000
dtype: float64

In [6]:
json_map = my_map.to_json()

In [8]:
with open('map_JSON.json', 'w') as fp:
    json.dump(my_map.to_json(), fp)

In [56]:
with open(l_path) as f:
    b_geojson = json.load(f)

In [57]:
b_geojson

{'type': 'FeatureCollection',
 'crs': {'type': 'name',
  'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}},
 'features': [{'type': 'Feature',
   'properties': {'geoid': '15000US340259900000',
    'name': 'Block Group 0, Monmouth, NJ',
    'B01001001': 0.0,
    'B01001001, Error': 12.0,
    'B01001002': 0.0,
    'B01001002, Error': 12.0,
    'B01001003': 0.0,
    'B01001003, Error': 12.0,
    'B01001004': 0.0,
    'B01001004, Error': 12.0,
    'B01001005': 0.0,
    'B01001005, Error': 12.0,
    'B01001006': 0.0,
    'B01001006, Error': 12.0,
    'B01001007': 0.0,
    'B01001007, Error': 12.0,
    'B01001008': 0.0,
    'B01001008, Error': 12.0,
    'B01001009': 0.0,
    'B01001009, Error': 12.0,
    'B01001010': 0.0,
    'B01001010, Error': 12.0,
    'B01001011': 0.0,
    'B01001011, Error': 12.0,
    'B01001012': 0.0,
    'B01001012, Error': 12.0,
    'B01001013': 0.0,
    'B01001013, Error': 12.0,
    'B01001014': 0.0,
    'B01001014, Error': 12.0,
    'B01001015': 0.0,
    'B01

In [9]:
features[0]

{'type': 'Feature',
 'properties': {'geoid': '14000US34003002100',
  'name': 'Census Tract 21, Bergen, NJ',
  'B01001001': 1513.0,
  'B01001001, Error': 149.0,
  'B01001002': 709.0,
  'B01001002, Error': 80.0,
  'B01001003': 48.0,
  'B01001003, Error': 28.0,
  'B01001004': 38.0,
  'B01001004, Error': 26.0,
  'B01001005': 55.0,
  'B01001005, Error': 24.0,
  'B01001006': 31.0,
  'B01001006, Error': 20.0,
  'B01001007': 4.0,
  'B01001007, Error': 6.0,
  'B01001008': 1.0,
  'B01001008, Error': 3.0,
  'B01001009': 2.0,
  'B01001009, Error': 5.0,
  'B01001010': 25.0,
  'B01001010, Error': 20.0,
  'B01001011': 11.0,
  'B01001011, Error': 12.0,
  'B01001012': 30.0,
  'B01001012, Error': 15.0,
  'B01001013': 45.0,
  'B01001013, Error': 26.0,
  'B01001014': 35.0,
  'B01001014, Error': 23.0,
  'B01001015': 54.0,
  'B01001015, Error': 26.0,
  'B01001016': 33.0,
  'B01001016, Error': 18.0,
  'B01001017': 62.0,
  'B01001017, Error': 26.0,
  'B01001018': 11.0,
  'B01001018, Error': 9.0,
  'B01001019'

In [13]:
final_geoJSON = {
   'type': 'FeatureCollection',
   'crs': {'type': 'name',
           'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}},
   'features': features,
   'stats': data_stats
}

In [14]:
final_geoJSON["stats"]

{'count': 2202.0,
 'mean': 0.14309,
 'std': 0.071672,
 'min': 0.0,
 '25%': 0.097801,
 '50%': 0.132974,
 '75%': 0.175784,
 'max': 1.0}

In [15]:
len(features)

2250

In [16]:
with open('proccessed_geoJSON.json', 'w') as fp:
    json.dump(final_geoJSON, fp)

In [17]:
with open('proccessed_geoJSON.json') as f:
    block_geojson = json.load(f)

In [None]:
block_geojson["features"][0]["properties"].keys()