In [3]:
#imports and defs
import ipywidgets as widgets
from IPython.display import display
from numpy.core import shape_base
#some required libraries for the conversion.
import numpy as np
import pandas as pd
import jenkspy
import json
import sys
from pathlib import Path
import os
import copy
import math
from shapely.geometry import Polygon
import io
from ipyvuetify.extra import FileInput


enddata = {}
#converts the value to numeric (int if int, else float). 
#returns -1 if:
#   a) the value is not numeric, check for strings reflecting numeric values is implemented
#   b) if the value is nan
def makeFloat(val):

      if(isinstance(val,float)):
        if(math.isnan(val)):
          return -1
        else:
          return val
      elif (isinstance(val,int)):
        if(math.isnan(val)):
          return -1
        else:
          return val
      else: #string
        try:
          res = float(val)
          if(math.isnan(res)):
            return -1
          else:
            return res
        except ValueError:  #casting not possible --> not a numeric value in the string
          return -1

#mediator function to call jenkspy jenks calculation
def getJenks(arr,num):
      return jenkspy.jenks_breaks(arr, nb_class=num)

#get the jenks values from the jenksVals, which have been aggregated per parameters.
def makeJenks(data, jenksVals):
      enddata= data
      currJson={}
      range = [12] #Jenks steps to be used
      #print(data.keys())
      currJson["all"]={}
      for key in jenksVals.keys():
        currJson["all"][key] = {} 
        for val in range:
          currJson["all"][key][val] = getJenks(jenksVals[key], val)  

      enddata["jenks"] = currJson  #attach the jenks values to the data.
      #print(data.keys())
      #print(data)
      #textoutput = json.dumps(data)
      result = widgets.Textarea(
        value=str(enddata),
        placeholder='Waiting for output...',
        description='Result:',
        disabled=False,
        layout=widgets.Layout(width='80%', height="500px")
        )
      display(result)
      print("Finished jenks. Result is in the result textarea.")
      #with open("./"+ name +'_withjenks.json', 'w') as f: #dump the data in the new file.
      #  f.write(json.dumps(data, separators=(',', ':')))


<h1> Preprocessing for the urban diversity web tools </h1>

The urban superdiversity web tool requires a certain format of json to read data. You can create the format yourself or use the tools provided in this notebook. 
Below, find an example of the data used in the web app, with explaning comments. If you only wanted to know how the data is supposed to look like, you've found it and won't need the rest of this notebook!



```
{"type":"FeatureCollection",    //geojson format
"cityYear":"Vancouver-2006",
"features":[                    //each data point is in the features array. 
  {"type":"Feature",            //each data point is its own object, always type feature
  "properties":{                //properties are copied from the shape file information
    "DAUID":"59150004",
    "CSDUID":"5915055",
    "CCSUID":"5915020",
    "CDUID":"5915",
    "ERUID":"5920",
    "PRUID":"59",
    "CTUID":"9330133.01",
    "CMAUID":"933","
    indices":{                  //indices is what was provided in the data csv file.
      "Population":353,
      "Ethnicity-raw-count":22,
      "Ethnicity-raw-normalized":0.062323,
      "Mobility-raw-pct":36.619718,
      "Generation-raw-SI":0.732222,
      "Education-raw-SI":0.796875,
      "Income-raw-SI":0.865077}
      },
    "geometry":{               // point is the centroid of the polygon in the shapefile
      "type":"Point",
      "coordinates":[-123.25887520351195,49.38854863564009]},
    "geom_store":{
      "type":"Polygon",
      "coordinates":[[[a,b],   //left out the proper coordinates, but this is an array of many lat/lon tuples
  [c,d],
  [...]]]}},
  {...},
  {...},
  {...}]}                           this array of object contains all data points used on the map later.
```



<h3>Data format needed for these scripts to work</h3>



*   A shapefile, converted to geojson (use a program like ArcGIS to do this)
*   A csv file containing all the parameters/indices you want to analyse. One column of this csv file MUST identify the matching parameter to the shape file. This is what the two files will be merged on.

<h4> Step 1 </h4>
Upload the two files here , by clicking on the upload icon below "Files" in the left menu. The scripts expect one file ending in .csv and one ending in .json.

<h4> Step 2</h4>
Provide the names for the matching parameter in both files. Please type the names in the form below, which have been populated with sample names. The csv_comparator should be the column name in your csv file which contains the list of entities, while the geojson_comparator should be the property within the file that matches the data in the csv column. 
You can also provide a meaningful name for your dataset



In [4]:
def processfiles(json_data, csv_data):
    print("Processing started...")
    #print(json_data)
    #print(csv_data)
    df_shape={}
    df_indices={}
    csv_comp = csv_comparator.value
    json_comp = json_comparator.value
    try:
        df_indices = pd.DataFrame.from_dict(pd.read_csv(csv_data))
        print("Loaded csv.")
        #print(df_indices)
    except IOError:
        print("Error loading csv.")   
    
    #read the geojson (sometimes only json ending)
 
    try:
        df_shape = pd.read_json(json_data)
        print("Loaded json")
    except IOError:
        print("Error while loading geojson.") 
   
    #print(csv_comp)
    #print(json_comp)
    #print(df_indices)
    #print(df_indices.loc[df_indices[csv_comp]==59150630.0])
    for feat in df_shape['features']:
    #Merging code. Grabs the row from the csv that matches the geojson_comparator from each entry in the geojson and puts the 
    #indices in a new  indices property.
        #print(type(feat))
        feat["properties"]["indices"] = {}
        #print(feat["properties"][json_comp])
        row = df_indices.loc[df_indices[csv_comp]==makeFloat(feat["properties"][json_comp])]  #find the correct row
        rowDict = row.to_dict(orient='records')
        if len(rowDict)>0:
            #print(rowDict)
            for key in rowDict[0]:  #go through the keys and add them to the json object.
                if (key.find("ndex") < 0) & (key!=csv_comp):
                  #print(row[key])
                    val = makeFloat(row[key])  #TODO: This needs to change if we're ever going to use non-numerical values in 
                    #our evaluations. 
                    feat["properties"]["indices"][key] = round(val,6)
    
    print("Finished merging. Starting geometry calculations.")
    #create the new resulting json.
    newJson = {}
    newJson["type"] = "FeatureCollection"
    newJson["cityYear"] = name
    newJson["features"] = []

    #write the data to the new geojson object. Also adds centroid property for the geometry
    for feat in df_shape.features:

          obj = {"type": "Feature", "properties": feat["properties"],"geometry":feat["geometry"]}
           #create the centroid for the data points
          geometry = obj["geometry"]
          #print(geometry)
          if (type( geometry['coordinates'][0][0][0] ) is float):
            P = Polygon(geometry['coordinates'][0])
          else :
            P = Polygon(geometry['coordinates'][0][0])
          circle = P.centroid

          geometry_ = {"type":"Point","coordinates":[circle.x,circle.y]}
          obj["geometry"]=geometry_
          obj["geom_store"] = geometry
          newJson["features"].append(obj)

    df_shape = newJson
    #print("Generated file: ")
    #print(json.dumps(df_shape))
    #result.value = json.dumps(df_shape)
    #with open("./"+ name +'.json', 'w') as f: #dump the data in the new file.
    #    f.write(json.dumps(df_shape, separators=(',', ':')))
        
    print("Finished. Starting Jenks calculations.")
    #jenks
    arr = {"name":name, "all": {}}
    #aggregate all values for all parameters in an array per parameter, which will be used for the jenks calculation.
    #in the same loop, we also caluclate the centroid for each polygon which is used for the bars in the multiviz tool.
    for feat in df_shape["features"]:

      for key in feat['properties']['indices'].keys(): #go through all the indices
          val = feat['properties']['indices'][key]

          if (key in arr['all'].keys()):  #is this index alread in the result? then add to that key
            if (val >=0 ): 
              arr['all'][key].append(val)
          else:
            if (val >=0 ) :
                arr['all'][key] = []
                arr['all'][key].append(val)

    makeJenks(df_shape,arr["all"]) 



#### Troubleshooting 


*   Code fails to execute: Please read the error message. The most likely culprits are: Could not find csv or json file.
*   Resulting file does not show any of the indices in the csv! Please make sure you provided the correct spelling (case sensitive) for the column name in the csv for the geographical entity you want to  match and the same for the parameter in the shape file. The format of both parameters must be the same too - not floats for one and int for the other, there will be a direct comparison, as some will be strings, and others not, so make sure your index data is prepped accordingly and matches the shape file (or vice versa).
*   Jenks calculation fails: Only numerical values work. 


#### Upload json file

In [5]:

file_input_json = FileInput(placeholder="Select JSON")
file_input_json


FileInput(events=['upload'])

#### Upload CSV file

In [6]:
file_input_csv = FileInput(label="Select CSV")
file_input_csv

FileInput(events=['upload'])

In [7]:
csv_comparator = widgets.Text(
    description="CSVfile Comparator   ",
    style ={'description_width': 'initial'},
    disabled=False,
    layout = widgets.Layout(width='50%',height='40px')
)

json_comparator = widgets.Text(
    description="GEOJSON Comparator",
    style ={'description_width': 'initial'},
    disabled=False,
    layout = widgets.Layout(width='50%',height='40px')
)

name = widgets.Text(
    value = "result",
    style ={'description_width': 'initial'},
    description="Dataset name",
    disabled=False,
    layout = widgets.Layout(width='50%',height='40px')
)
display(csv_comparator)
display(json_comparator)
display(name)






#processfiles(json_data, file2[0]["file_obj"].read())

Text(value='', description='CSVfile Comparator   ', layout=Layout(height='40px', width='50%'), style=Descripti…

Text(value='', description='GEOJSON Comparator', layout=Layout(height='40px', width='50%'), style=DescriptionS…

Text(value='result', description='Dataset name', layout=Layout(height='40px', width='50%'), style=DescriptionS…

In [8]:
from io import StringIO
button = widgets.Button(description="Do Magic!")
output = widgets.Output()
display(button, output)

def callfunction(a):
    file = file_input_json.get_files()
    file2 = file_input_csv.get_files()
    csvString = StringIO(file2[0]["file_obj"].read().decode("utf-8"))
    processfiles(file[0]["file_obj"].read(),csvString)
    #print(enddata)
    
button.on_click(callfunction)




Button(description='Do Magic!', style=ButtonStyle())

Output()