## This program creates Neighborhood Delination across different time period


In [4]:
#!/usr/bin/env python
# coding: utf-8
import json, math, copy, sys
from geosnap.io import store_ltdb
from geosnap import Community, datasets
from geosnap.io import store_census

import pandas as pd
import shapely.wkt
import shapely.geometry
from datetime import datetime
from datetime import timedelta
from pathlib import Path
import urllib.parse
import webbrowser
import os
import pprint
from sklearn.preprocessing import minmax_scale
import numpy as np
from scipy import stats
from notebook import notebookapp
from IPython.core.display import display, HTML
import geopandas as gpd

### Variable names in "variable" column are needed to use built-in data from geosnap.

In [18]:
datasets.codebook().head()

Unnamed: 0,variable,label,formula,ltdb,ncdb,census_1990_form,census_1990_table_column,census_2000_form,census_2000_table_column,acs,category,notes
0,geoid,FIPS code,,geoid,GEO2010,,,,,,,
1,n_mexican_pop,persons of Mexican parentage or ancestry,,mex,MEXIC,SF1,P0090001,SF1,PCT011004,B03001_004E,Ethnicity & Immigration,
2,n_cuban_pop,persons of Cuban parentage or ancestry,,cuban,CUBAN,SF1,P0090004,SF1,PCT011006,B03001_006E,Ethnicity & Immigration,
3,n_puerto_rican_pop,persons of Puerto Rican parentage or ancestry,,pr,PRICAN,SF1,P0090003,SF1,PCT011005,B03001_005E,Ethnicity & Immigration,
4,n_russian_pop,persons of Russian/USSR parentage or ancestry,,ruanc,,SF3,P0330022,SF3,PCT016064+PCT016053+PCT016052+PCT016037,B04004_064E,Ethnicity & Immigration,ruancXX (page 17 of LTDB codebook) suggests th...


### Run clustering using built-in data from geosnap (Sequence Analysis is included)

In [35]:
param = {
    'title': "Longitudinal Neighborhood Change, Cook County (tract level)",
    'subject': "NEIGHBORHOOD",
    'filename_suffix': "Cook_Temporal_1980_1990_2000_2010_Sequence",              
    'state_fips': None,    #fips code is avaiable for every US state at https://github.com/suhanmappingideas/geosnap-viz/blob/master/state_id.csv
    'msa_fips': None,      #For more options: http://su-gis.iptime.org/LNE/pick_POI.html
    'county_fips': "17031",#county code is also availabe in the link right above.     
    'years': [1980,1990,2000,2010], # Available years: 1970, 1980, 1990, 2000 and 2010
    'method': "kmeans",                # Aspatial Clustering: affinity_propagation, gaussian_mixture, hdbscan, kmeans, spectral, ward
                                    # Spatial Clustering: azp, max_p, skater, spenc, ward_spatial   
    'nClusters': 6,                 # This option should be commented out for affinity_propagation, hdbscan and max_p 
    'variables': [
            "p_nonhisp_white_persons",
            "p_nonhisp_black_persons",
            "p_hispanic_persons",
            "p_asian_persons",
            "p_foreign_born_pop",
            "p_edu_college_greater",
            "p_unemployment_rate",
            "p_employed_manufacturing",
            "p_poverty_rate",
            "p_vacant_housing_units",
            "p_owner_occupied_units",
            "p_housing_units_multiunit_structures",
            "median_home_value",
            "p_structures_30_old",
            "p_household_recent_move",
            "p_persons_under_18",
            "p_persons_over_60",        
                 ],
    'Sequence': {'seq_clusters': 5, 'dist_type': 'tran'},
    'Maps_of_neighborhood': True,                #choropleth map: Maps representing clustering result		
    'Temporal_change_in_neighborhoods': True,    #stacked chart: Temporal Change in Neighborhoods over years		
    'Parallel_Categories_Diagram_in_neighborhoods': True,
    'Chord_Diagram_in_neighborhoods': False
}

### Run clustering using built-in data from geosnap (four periods)

In [91]:
param = {
    'title': "Longitudinal Neighborhood Change, Cook County (tract level)",
    'subject': "NEIGHBORHOOD",
    'filename_suffix': "Cook_Temporal_1980_1990_2000_2010",
    'state_fips': None,    #fips code is avaiable for every US state at https://github.com/suhanmappingideas/geosnap-viz/blob/master/state_id.csv
    'msa_fips': None,      #For more options: http://su-gis.iptime.org/LNE/pick_POI.html
    'county_fips': "17031",#county code is also availabe in the link right above.         
    'years': [1980,1990,2000,2010],              # Available years: 1970, 1980, 1990, 2000 and 2010
    'method': "kmeans",                          # Aspatial Clustering: affinity_propagation, gaussian_mixture, hdbscan, kmeans, spectral, ward
                                                 # Spatial Clustering: azp, max_p, skater, spenc, ward_spatial   
    'nClusters': 6,                              # This option should be commented out for affinity_propagation, hdbscan and max_p 
    'variables': [
            "p_nonhisp_white_persons",
            "p_nonhisp_black_persons",
            "p_hispanic_persons",
            "p_asian_persons",
            "p_foreign_born_pop",
            "p_edu_college_greater",
            "p_unemployment_rate",
            "p_employed_manufacturing",
            "p_poverty_rate",
            "p_vacant_housing_units",
            "p_owner_occupied_units",
            "p_housing_units_multiunit_structures",
            "median_home_value",
            "p_structures_30_old",
            "p_household_recent_move",
            "p_persons_under_18",
            "p_persons_over_60",        
                 ],
    #'Sequence': {'seq_clusters': 5, 'dist_type': 'tran'},
    'Maps_of_neighborhood': True,                #choropleth map: Maps representing clustering result		
    'Temporal_change_in_neighborhoods': True,    #stacked chart: Temporal Change in Neighborhoods over years		
    'Parallel_Categories_Diagram_in_neighborhoods': False,
    'Chord_Diagram_in_neighborhoods': True
}

### Run clustering using built-in data from geosnap (two periods)

In [390]:
param = {
    'title': "Longitudinal Neighborhood Change, Cook County (tract level)",
    'subject': "NEIGHBORHOOD",
    'filename_suffix': "Cook_Temporal_1980_2010",              # "Albertville"
    'state_fips': None,    #fips code is avaiable for every US state at https://github.com/suhanmappingideas/geosnap-viz/blob/master/state_id.csv
    'msa_fips': None,      #For more options: http://su-gis.iptime.org/LNE/pick_POI.html
    'county_fips': "17031",#county code is also availabe in the link right above.
    'years': [1980, 2010],           # Available years: 1970, 1980, 1990, 2000 and 2010
    'method': "kmeans",                          # Aspatial Clustering: affinity_propagation, gaussian_mixture, hdbscan, kmeans, spectral, ward
                                                 # Spatial Clustering: azp, max_p, skater, spenc, ward_spatial   
    'nClusters': 6,                              # This option should be commented out for affinity_propagation, hdbscan and max_p 
    'variables': [
            "p_nonhisp_white_persons",
            "p_nonhisp_black_persons",
            "p_hispanic_persons",
            "p_asian_persons",
            "p_foreign_born_pop",
            "p_edu_college_greater",
            "p_unemployment_rate",
            "p_employed_manufacturing",
            "p_poverty_rate",
            "p_vacant_housing_units",
            "p_owner_occupied_units",
            "p_housing_units_multiunit_structures",
            "median_home_value",
            "p_structures_30_old",
            "p_household_recent_move",
            "p_persons_under_18",
            "p_persons_over_60",        
                 ],
    #'Sequence': {'seq_clusters': 5, 'dist_type': 'tran'},
    'Maps_of_neighborhood': True,                #choropleth map: Maps representing clustering result		
    'Temporal_change_in_neighborhoods': True,    #stacked chart: Temporal Change in Neighborhoods over years		
    'Parallel_Categories_Diagram_in_neighborhoods': True,
    'Chord_Diagram_in_neighborhoods': False
}

### Run clustering using Data received from Users. csv and shapefile received from the user in this example

In [5]:
param = {
    'title': "Longitudinal Neighborhood Change, Cook County (tract level)",
    'subject': "NEIGHBORHOOD",
    'filename_suffix': "Cook_Temporal_1980_1990_2000_2010_from_file",           
    'inputCSV': "attributes/LTDB_2018_1990_2000_2010__tract_Cook_byTract_normalized.csv",   
    'shapefile': "shp/Cook_County_Tract.shp", 
    'years': [1980, 1990, 2000, 2010],           # Available years: 1970, 1980, 1990, 2000 and 2010
    'method': "kmeans",                          # Aspatial Clustering: affinity_propagation, gaussian_mixture, hdbscan, kmeans, spectral, ward
                                                 # Spatial Clustering: azp, max_p, skater, spenc, ward_spatial   
    'nClusters': 6,                              # This option should be commented out for affinity_propagation, hdbscan and max_p 
    'variables': [
            "p_nonhisp_white_persons",
            "p_nonhisp_black_persons",
            "p_hispanic_persons",
            "p_asian_persons",
            "p_foreign_born_pop",
            "p_edu_college_greater",
            "p_unemployment_rate",
            "p_employed_manufacturing",
            "p_poverty_rate",
            "p_vacant_housing_units",
            "p_owner_occupied_units",
            "p_housing_units_multiunit_structures",
            "median_home_value",
            "p_structures_30_old",
            "p_household_recent_move",
            "p_persons_under_18",
            "p_persons_over_60",       
                 ],
    #'Sequence': {'seq_clusters': 5, 'dist_type': 'tran'},
    'Maps_of_neighborhood': True,                #choropleth map: Maps representing clustering result		
    'Temporal_change_in_neighborhoods': True,    #stacked chart: Temporal Change in Neighborhoods over years		
    'Parallel_Categories_Diagram_in_neighborhoods': True,
    'Chord_Diagram_in_neighborhoods': False
}

### Run clustering using Data received from Users. csv and shapefile received from the user in this example

In [407]:
param = {
    'title': "Neighborhood, Cook County (tract level)",
    'subject': "NEIGHBORHOOD",
    'filename_suffix': "Cook_2018_from_ACS", 
    'inputCSV': "attributes/ACS_2018_5year__tract_Cook_byTract_normalized.csv",   
    'shapefile': "shp/Cook_County_Tract.shp", 
    'years': [2018],           
    'method': "kmeans",                          # Aspatial Clustering: affinity_propagation, gaussian_mixture, hdbscan, kmeans, spectral, ward
                                                 # Spatial Clustering: azp, max_p, skater, spenc, ward_spatial   
    'nClusters': 6,                              # This option should be commented out for affinity_propagation, hdbscan and max_p 
    'variables': [
            "Median monthly housing costs",
            "% below poverty",
            "% unemployed",
            "% with 4year college degree",
            "% manufacturing",
            "% service industry",
            "% structures more than 30 years old",
            "% households moved <10 years ago",
            "% multiunit structures",
            "% owner occupied housing",
            "% vacant housing",
            "% > 60 years old",
            "% < 18 years old",
            "% white",
            "% Asian",
            "% Hispanic",
            "% black",
            "% foreign born"       
                 ],
    'Maps_of_neighborhood': True,                #choropleth map: Maps representing clustering result		
    'Temporal_change_in_neighborhoods': False,    #stacked chart: Temporal Change in Neighborhoods over years		
    'Parallel_Categories_Diagram_in_neighborhoods': False,
    'Chord_Diagram_in_neighborhoods': False
}

### Run clustering using Data received from Users. csv and shapefile received from the user in this example

In [38]:
param = {
    'title': "Neighborhood, Cook County (zipcode level)",
    'subject': "NEIGHBORHOOD",
    'filename_suffix': "Cook_2018_from_ACS_zipcode", 
    'inputCSV': "attributes/ACS_2018_5year__zipcode_Cook_byZipcode_normalized.csv",   
    'shapefile': "shp/zipcode_Cook_County.shp", 
    'years': [2018],           
    'method': "kmeans",                          # Aspatial Clustering: affinity_propagation, gaussian_mixture, hdbscan, kmeans, spectral, ward
                                                 # Spatial Clustering: azp, max_p, skater, spenc, ward_spatial   
    'nClusters': 6,                              # This option should be commented out for affinity_propagation, hdbscan and max_p 
    'variables': [
            "Median monthly housing costs",
            "% below poverty",
            "% unemployed",
            "% with 4year college degree",
            "% manufacturing",
            "% service industry",
            "% structures more than 30 years old",
            "% households moved <10 years ago",
            "% multiunit structures",
            "% owner occupied housing",
            "% vacant housing",
            "% > 60 years old",
            "% < 18 years old",
            "% white",
            "% Asian",
            "% Hispanic",
            "% black",
            "% foreign born"       
                 ],
    'Maps_of_neighborhood': True,                #choropleth map: Maps representing clustering result		
    'Temporal_change_in_neighborhoods': False,    #stacked chart: Temporal Change in Neighborhoods over years		
    'Parallel_Categories_Diagram_in_neighborhoods': False,
    'Chord_Diagram_in_neighborhoods': False
}

In [6]:
# select community by state_fips, msa_fips, county_fips
community = None
if ('msa_fips' in param and param['msa_fips']):
    community = Community.from_ltdb(years=param['years'], msa_fips=param['msa_fips'])
elif ('county_fips' in param and param['county_fips']):
    community = Community.from_ltdb(years=param['years'], county_fips=param['county_fips'])
elif ('state_fips' in param and param['state_fips']):
    community = Community.from_ltdb(years=param['years'], state_fips=param['state_fips'])

# if the user enters CSV and shapefile, use the files from the user

# This is executed when the user enter attributes in csv file and geometroy in shapefile ######################  
if (community is None and 'inputCSV' in param and param['inputCSV']):
    community = Community()
    community.gdf = pd.read_csv(param['inputCSV'], dtype={'geoid':str})
    geoid = community.gdf.columns[0]
    #community.gdf = community.gdf.astype(str)
    #print("inputCSV:  " + community.gdf.geoid)        
    community.gdf['geoid'] = community.gdf['geoid'].astype(str)
    #print("community.gdf.columns[0]:", community.gdf.columns[0])
    
    # read shape file to df_shape
    df_shape = gpd.read_file(param['shapefile'])
    df_shape = df_shape.astype(str)     
    #print("shapefile:  " + df_shape.GEOID10)        
    df_shape = df_shape.set_index("GEOID10")
    
    # insert geometry to community.gdf
    geometry = []
    for index, row in community.gdf.iterrows():
        tractid = row[geoid]
        try:
            tract = df_shape.loc[tractid]
            geometry.append(shapely.wkt.loads(tract.geometry))
        except KeyError:
            #print("Tract ID [{}] is not found in the shape file {}".format(tractid, param['shapefile']))
            geometry.append(None)
    community.gdf.insert(len(community.gdf.columns), "geometry", geometry)
################################################################################################################      
    
community.gdf = community.gdf.replace([np.inf, -np.inf], np.nan)

# check if geometry is not null for Spatial Clustering
community.gdf = community.gdf[pd.notnull(community.gdf['geometry'])]

community.gdf
#community.gdf.to_csv('LTDB_2018_1990_2000_2010__tract_Cook_byTract_normalized.csv')

Unnamed: 0,geoid,n_asian_under_15,n_black_under_15,n_hispanic_under_15,n_native_under_15,n_white_under_15,n_persons_under_18,n_asian_over_60,n_black_over_60,n_hispanic_over_60,...,n_white_persons,year,n_total_housing_units_sample,p_white_over_60,p_black_over_60,p_hispanic_over_60,p_native_over_60,p_asian_over_60,p_disabled,geometry
0,17031010100,199.260498,642.615112,482.210419,0.0,347.709564,1646.888062,0.000000,59.778149,11.955630,...,,1980,2964.000000,6.403551,0.951022,0.190204,0.0,0.000000,3.407830,"POLYGON ((-87.67719899983393 42.0229420001732,..."
1,17031010201,86.901924,146.249573,170.977768,0.0,519.291992,987.008057,16.956472,8.478236,12.010835,...,,1980,2804.883301,20.415481,0.148386,0.210214,0.0,0.296773,3.956968,POLYGON ((-87.68465300003589 42.01948499993284...
2,17031010202,36.098076,60.750423,71.022232,0.0,215.708023,409.991974,7.043527,3.521764,4.989165,...,,1980,1165.116821,20.415482,0.148386,0.210214,0.0,0.296773,3.956968,"POLYGON ((-87.67682999990456 42.0194110001176,..."
3,17031010300,79.000000,81.000000,157.000000,0.0,389.000000,698.000000,27.000000,50.000000,56.000000,...,,1980,3359.000000,26.352395,0.772798,0.865533,0.0,0.417311,2.998454,POLYGON ((-87.67132999987956 42.01937400021001...
4,17031010400,7.000000,10.000000,10.000000,0.0,234.000000,358.000000,14.000000,0.000000,23.000000,...,,1980,2363.000000,17.813015,0.000000,0.473641,0.0,0.288303,3.088962,POLYGON ((-87.66345299995595 42.01282999970476...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5267,17031843700,20.000000,0.000000,64.000000,0.0,320.000000,513.000000,,,,...,,2010,992.000000,,,,,,,POLYGON ((-87.69682900035804 41.94966999997058...
5268,17031843800,46.000000,298.000000,12.000000,0.0,104.000000,583.000000,,,,...,,2010,892.000000,,,,,,,POLYGON ((-87.64553800027846 41.80886399977385...
5269,17031843900,0.000000,623.000000,2.000000,0.0,0.000000,735.000000,,,,...,,2010,2321.000000,,,,,,,"POLYGON ((-87.59294999974264 41.7750790002957,..."
5270,17031980000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,,,,...,,2010,0.000000,,,,,,,POLYGON ((-87.94024700029877 42.00717199958814...


## Write index.html

In [7]:
#Create a new folder where GEO_CONFIG.js GEO_JSON.js VARIABLES.js will be saved
oDir = 'QUAL_' + param['filename_suffix']
path = Path(oDir + '/data')
path.mkdir(parents=True, exist_ok=True)

contents = []
#open Neighborhood_Analysis_Mapper.html (the excutable file for the visualization)
ifile = open("template/Qualitative_Analysis_Mapper.html", "r", encoding="utf-8")
contents = ifile.read()

#Replace variables based on the user's selection in each of four files below.
contents = contents.replace("Neighborhood Analysis Mapper", param['title'])
contents = contents.replace("data/CONFIG.js", "data/CONFIG_"+param['filename_suffix']+".js")
contents = contents.replace("data/GEO_JSON.js", "data/GEO_JSON_"+param['filename_suffix']+".js")
contents = contents.replace("data/VARIABLES.js", "data/VARIABLES_"+param['filename_suffix']+".js")

#write new outfiles: GEO_CONFIG.js GEO_JSON.js VARIABLES.js
ofile = open(oDir+"/index.html", "w", encoding="utf-8")
ofile.write(contents)
ofile.close()

print (contents)

<!DOCTYPE html>
<html>
<head>
	<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
	<title>Longitudinal Neighborhood Change, Cook County (tract level)</title>

	<script src="data/CONFIG_Cook_Temporal_1980_1990_2000_2010_from_file.js"></script>
	<script src="data/GEO_JSON_Cook_Temporal_1980_1990_2000_2010_from_file.js"></script>
	<script src="data/VARIABLES_Cook_Temporal_1980_1990_2000_2010_from_file.js"></script>

	<script src="../template/Neighborhood_Analysis_Mapper/lib/jQuery/jquery-3.3.1.js"></script>
	<script src="../template/Neighborhood_Analysis_Mapper/lib/d3/d3.min.js"></script>
	<!--script src="lib/d3-chord-diagrams-master/lib/d3.js"></script-->
	<!--script src="lib/d3-chord-diagrams-master/lib/underscore.js"></script>
	<script src="lib/d3-chord-diagrams-master/js/mapper.js"></script-->
	<script src="../template/Neighborhood_Analysis_Mapper/lib/moment/moment.min.js"></script>
	<script src="../template/Neighborhood_Analysis_Mapper/lib/geostats/geostats.min.js">

## Write GEO_CONFIG_XXX.js

In [8]:
# read ACM_GEO_CONFIG.js
ifile = open("template/QUAL_CONFIG.js", "r", encoding="utf-8")
contents = ifile.read()

SubjectName = "";
Maps_of_neighborhood = True;               
Temporal_change_in_neighborhoods = True;
Parallel_Categories_Diagram_in_neighborhoods = True;
Chord_Diagram_in_neighborhoods = True;

if ('subject' in param): SubjectName =  param['subject']
if ('Maps_of_neighborhood' in param): Maps_of_neighborhood =  param['Maps_of_neighborhood']
if ('Temporal_change_in_neighborhoods' in param): Temporal_change_in_neighborhoods =  param['Temporal_change_in_neighborhoods']
if ('Parallel_Categories_Diagram_in_neighborhoods' in param): Parallel_Categories_Diagram_in_neighborhoods =  param['Parallel_Categories_Diagram_in_neighborhoods']
if ('Chord_Diagram_in_neighborhoods' in param): Chord_Diagram_in_neighborhoods =  param['Chord_Diagram_in_neighborhoods']

# perpare parameters
NumOfMaps = len(param['years'])

InitialLayers = []
if (len(param['years']) <= 1): InitialLayers = []
for i, year in enumerate(param['years']):
	InitialLayers.append(str(year))
InitialLayers

['1980', '1990', '2000', '2010']

In [9]:
# Automatically set Map_width, Map_height. 
Map_width = "300px"
Map_height = "300px"
if (NumOfMaps <= 6):
    Map_width = "300px"
    Map_height = "300px"    
if (NumOfMaps <= 5):
    Map_width = "350px"
    Map_height = "350px"
if (NumOfMaps <= 4):
    Map_width = "400px"
    Map_height = "400px"
if (NumOfMaps <= 3):
    Map_width = "400px"
    Map_height = "400px"
if (NumOfMaps <= 2):
    Map_width = "450px"
    Map_height = "450px"
if (NumOfMaps ==    1):
    Map_width = "800px"
    Map_height = "800px"
    
# replace newly computed "NumOfMaps", "InitialLayers", "Map_width", "Map_height" in CONFIG.js. See the example replacement below
NumOfMaps = "var NumOfMaps = " + str(NumOfMaps) + ";"
InitialLayers = "var InitialLayers = " + json.dumps(InitialLayers) + ";"
SubjectName = 'var SubjectName = "' + SubjectName + '";'
Maps_of_neighborhood = "var Maps_of_neighborhood = " + json.dumps(Maps_of_neighborhood)+ ";"
Temporal_change_in_neighborhoods = "var Temporal_change_in_neighborhoods = " + json.dumps(Temporal_change_in_neighborhoods)+ ";"
Parallel_Categories_Diagram_in_neighborhoods = "var Parallel_Categories_Diagram_in_neighborhoods = " + json.dumps(Parallel_Categories_Diagram_in_neighborhoods)+ ";"
Chord_Diagram_in_neighborhoods = "var Chord_Diagram_in_neighborhoods = " + json.dumps(Chord_Diagram_in_neighborhoods)+ ";"
Map_width = 'var Map_width  = "' + Map_width + '";'
Map_height = 'var Map_height = "' + Map_height + '";'

contents = contents.replace("var InitialLayers = [];", InitialLayers)
contents = contents.replace('var SubjectName = "";', SubjectName)
contents = contents.replace("var Maps_of_neighborhood = true;", Maps_of_neighborhood)
contents = contents.replace("var Temporal_change_in_neighborhoods = true;", Temporal_change_in_neighborhoods)
contents = contents.replace("var Parallel_Categories_Diagram_in_neighborhoods = true;", Parallel_Categories_Diagram_in_neighborhoods)
contents = contents.replace("var Chord_Diagram_in_neighborhoods = true;", Chord_Diagram_in_neighborhoods)
contents = contents.replace('var Map_width  = "400px";', Map_width)
contents = contents.replace('var Map_height = "400px";', Map_height)

#Write output including the replacement above
filename_GEO_CONFIG = "QUAL_" + param['filename_suffix'] + "/data/CONFIG_"+param['filename_suffix']+".js"
ofile = open(filename_GEO_CONFIG, 'w', encoding="utf-8")
ofile.write(contents)
ofile.close()

print (contents)

// Define the number of maps and some configuration parameters that you want to visualize.
var SubjectName = "NEIGHBORHOOD";
var InitialLayers = ["1980", "1990", "2000", "2010"];

/* Map Extent and Zoom level will be automatically adjusted when you do not define map center and zoom level */
//var Initial_map_center = [34.0522, -117.9];  
//var Initial_map_zoom_level = 8;   

var Maps_of_neighborhood = true;							//choropleth map: Maps representing categorical data  
var Temporal_change_in_neighborhoods = true;				//stacked chart: Temporal Change
var Parallel_Categories_Diagram_in_neighborhoods = true;	//parallel categories diagram
var Chord_Diagram_in_neighborhoods = false;					//chord diagram
  

var Num_Of_Decimal_Places = 2;                             // default = 2

var Map_width  = "400px";                                  // min 350px
var Map_height = "400px";                                  // min 300px


## Write GEO_JSON_XXX.js

In [10]:
# query geometry for each tract
geoid = community.gdf.columns[0]
tracts = community.gdf[[geoid, 'geometry']].copy()
tracts.drop_duplicates(subset=geoid, inplace=True)					# get unique geoid
tracts

Unnamed: 0,geoid,geometry
0,17031010100,"POLYGON ((-87.67719899983393 42.0229420001732,..."
1,17031010201,POLYGON ((-87.68465300003589 42.01948499993284...
2,17031010202,"POLYGON ((-87.67682999990456 42.0194110001176,..."
3,17031010300,POLYGON ((-87.67132999987956 42.01937400021001...
4,17031010400,POLYGON ((-87.66345299995595 42.01282999970476...
...,...,...
1313,17031843700,POLYGON ((-87.69682900035804 41.94966999997058...
1314,17031843800,POLYGON ((-87.64553800027846 41.80886399977385...
1315,17031843900,"POLYGON ((-87.59294999974264 41.7750790002957,..."
1316,17031980000,POLYGON ((-87.94024700029877 42.00717199958814...


In [11]:
# open GEO_JSON.js write heading for geojson format
filename_GEO_JSON = "QUAL_" + param['filename_suffix'] + "/data/GEO_JSON_"+param['filename_suffix']+".js"
ofile = open(filename_GEO_JSON, 'w')
ofile.write('var GEO_JSON =\n')
ofile.write('{"type":"FeatureCollection", "features": [\n')

#Convert geometry in GEOJSONP to geojson format
wCount = 0
for tract in tracts.itertuples():
	feature = {"type":"Feature"}
	if (type(tract.geometry) is float):								# check is NaN?
		#print(tract.geometry)
		continue
	#print(tract.geometry)        
	feature["geometry"] = shapely.geometry.mapping(tract.geometry)
	#feature["properties"] = {geoid: tract.__getattribute__(geoid), "tractID": tract.__getattribute__(geoid)}
	feature["properties"] = {geoid: tract.__getattribute__(geoid)}
	wCount += 1
	ofile.write(json.dumps(feature)+',\n')
#print("GEO_JSON.js write count:", wCount)
# complete the geojosn format by adding parenthesis at the end.	
ofile.write(']}\n')
ofile.close()

## Write GEO_VARIABLES_XXXX.js

### Clustering Algorithm runs in the cell below. It might take a while depending on what clustring algorithm you picked

In [12]:
geoid       = community.gdf.columns[0]
method      = param['method']
nClusters   = param['nClusters']
years       = param['years']
variables   = param['variables']

# filtering by years
community.gdf = community.gdf[community.gdf.year.isin(years)]
#community.gdf.to_csv(r'output.csv')   

if (method == 'kmeans' or method == 'ward' or method == 'affinity_propagation' or method == 'spectral' or method == 'gaussian_mixture' or method == 'hdbscan'):
	clusters = community.cluster(columns=variables, method=method, n_clusters=nClusters)
if (method == 'ward_spatial' or method == 'spenc' or method == 'skater' or method == 'azp' or method == 'max_p'):
	clusters = community.cluster_spatial(columns=variables, method=method, n_clusters=nClusters)		

# Use the sequence method to obtain the distance matrix of neighborhood sequences
gdf_new, df_wide, seq_dis_mat = clusters.sequence(dist_type='tran', cluster_col=method)
df_wide

  return linkage(y, method='ward', metric='euclidean')


year,1980,1990,2000,2010,tran_5
geoid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
17031010100,5,2,1,5,1
17031010201,5,5,5,5,0
17031010202,5,5,5,5,0
17031010300,5,5,5,5,0
17031010400,4,4,5,4,1
...,...,...,...,...,...
17031843500,2,2,2,0,3
17031843600,1,1,1,1,0
17031843700,5,5,5,4,3
17031843800,1,1,1,1,0


In [13]:
df_pivot = df_wide
lastColumn = df_pivot.columns[df_pivot.shape[1]-1]					# get the last column name as like 'tran-5'
df_pivot.rename(columns={lastColumn: 'Sequence'}, inplace=True)		# change the last column name to 'Sequence'
df_pivot
#df_pivot.to_csv(r'df_pivot.csv')  

year,1980,1990,2000,2010,Sequence
geoid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
17031010100,5,2,1,5,1
17031010201,5,5,5,5,0
17031010202,5,5,5,5,0
17031010300,5,5,5,5,0
17031010400,4,4,5,4,1
...,...,...,...,...,...
17031843500,2,2,2,0,3
17031843600,1,1,1,1,0
17031843700,5,5,5,4,3
17031843800,1,1,1,1,0


In [14]:
if ('Sequence' not in param or not param['Sequence']): df_pivot.drop(columns=['Sequence'], inplace=True)
df_pivot
#df_pivot.to_csv(r'df_pivot.csv') 

year,1980,1990,2000,2010
geoid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
17031010100,5,2,1,5
17031010201,5,5,5,5
17031010202,5,5,5,5
17031010300,5,5,5,5
17031010400,4,4,5,4
...,...,...,...,...
17031843500,2,2,2,0
17031843600,1,1,1,1
17031843700,5,5,5,4
17031843800,1,1,1,1


In [15]:
# write df_wide to GEO_VARIABLES.js
filename_GEO_VARIABLES = "QUAL_" + param['filename_suffix'] + "/data/VARIABLES_"+param['filename_suffix']+".js"
ofile = open(filename_GEO_VARIABLES, 'w')
ofile.write('var GEO_VARIABLES =\n')
ofile.write('[\n')

heading = [geoid]
heading.extend(list(map(str, df_pivot.columns.tolist())))
ofile.write('  '+json.dumps(heading)+',\n')
wCount = 0
for i, row in df_pivot.reset_index().iterrows():
	aLine = row.tolist()
	for j, col in enumerate(aLine[2:], 2):
		try:
			aLine[j] = int(col)                                  # convert float to int
		except ValueError:
			aLine[j] = -9999                                     # if Nan, set -9999
	wCount += 1 
	ofile.write('  '+json.dumps(aLine)+',\n')
#print("GEO_VARIABLES.js write count:", wCount)
ofile.write(']\n')
ofile.close()

#Print what's inside GEO_VARIABLES.js
with open(filename_GEO_VARIABLES, 'r') as f2:
    data = f2.read()
    print(data)

var GEO_VARIABLES =
[
  ["geoid", "1980", "1990", "2000", "2010"],
  ["17031010100", 5, 2, 1, 5],
  ["17031010201", 5, 5, 5, 5],
  ["17031010202", 5, 5, 5, 5],
  ["17031010300", 5, 5, 5, 5],
  ["17031010400", 4, 4, 5, 4],
  ["17031010501", 5, 5, 5, 4],
  ["17031010502", 5, 5, 5, 5],
  ["17031010503", 5, 5, 5, 4],
  ["17031010600", 5, 5, 5, 5],
  ["17031010701", 5, 5, 5, 5],
  ["17031010702", 5, 5, 5, 2],
  ["17031020100", 3, 5, 5, 5],
  ["17031020200", 3, 3, 3, 5],
  ["17031020301", 3, 3, 3, 3],
  ["17031020302", 3, 3, 3, 5],
  ["17031020400", 4, 5, 5, 5],
  ["17031020500", 5, 5, 5, 5],
  ["17031020601", 5, 5, 5, 5],
  ["17031020602", 5, 5, 5, 5],
  ["17031020701", 5, 5, 5, 3],
  ["17031020702", 5, 5, 5, 5],
  ["17031020801", 5, 5, 5, 5],
  ["17031020802", 5, 5, 5, 5],
  ["17031020901", 5, 5, 5, 5],
  ["17031020902", 5, 5, 5, 5],
  ["17031030101", 4, 5, 5, 5],
  ["17031030102", 4, 5, 5, 5],
  ["17031030103", 4, 5, 5, 5],
  ["17031030104", 4, 5, 5, 5],
  ["17031030200", 5, 5, 4, 4],
  [

### Creating URL to view the visualizatino result

In [16]:
servers = list(notebookapp.list_running_servers())
servers1 = 'https://cybergisx.cigi.illinois.edu'+servers[0]["base_url"]+ 'view'
servers2 = 'https://cybergisx.cigi.illinois.edu'+servers[0]["base_url"]+ 'edit'      
cwd = os.getcwd()
prefix_cwd = "/home/jovyan/work"
cwd = cwd.replace(prefix_cwd, "")
local_dir1 = servers1 + cwd
local_dir2 = servers2 + cwd    
#print(local_dir)
fname =urllib.parse.quote('index.html')
template_dir = os.path.join(local_dir1, 'QUAL_' + param['filename_suffix'])
#url = 'file:' + os.path.join(template_dir, fname)
url = os.path.join(template_dir, fname)    
webbrowser.open(url)
print('To see visualization of your analysis, click the URL below:')
print(url)    
print('Advanced options are available in ')  
print(local_dir2 + '/'+ 'QUAL_' + param['filename_suffix']+'/data/GEO_CONFIG_' + param['filename_suffix']+'.js')

To see visualization of your analysis, click the URL below:
https://cybergisx.cigi.illinois.edu/user/suhanmappingideas/view/geosnap-viz/PYTHON_Categorical_Data_VIZ/QUAL_Cook_Temporal_1980_1990_2000_2010_from_file/index.html
Advanced options are available in 
https://cybergisx.cigi.illinois.edu/user/suhanmappingideas/edit/geosnap-viz/PYTHON_Categorical_Data_VIZ/QUAL_Cook_Temporal_1980_1990_2000_2010_from_file/data/GEO_CONFIG_Cook_Temporal_1980_1990_2000_2010_from_file.js
