In [32]:
#!/usr/bin/env python
# coding: utf-8
import json, math, copy, sys
from geosnap.io import store_ltdb
from geosnap import Community, datasets
from geosnap.io import store_census

import pandas as pd
import shapely.wkt
import shapely.geometry
from datetime import datetime
from datetime import timedelta
from pathlib import Path
from INCS import linc
import urllib.parse
import webbrowser
import os
import pprint
from sklearn.preprocessing import minmax_scale
import numpy as np
from scipy import stats
from notebook import notebookapp

def write_INDEX_html(param):
	#Create a new folder where GEO_CONFIG.js GEO_JSON.js VARIABLES.js will be saved
	oDir = 'NAM_' + param['filename_suffix']
	path = Path(oDir + '/data')
	path.mkdir(parents=True, exist_ok=True)
	
	contents = []
	#open Neighborhood_Analysis_Mapper.html (the excutable file for the visualization)
	ifile = open("template/Neighborhood_Analysis_Mapper.html", "r", encoding="utf-8")
	contents = ifile.read()
	
	#Replace variables based on the user's selection in each of four files below.
	contents = contents.replace("Neighborhood Analysis Mapper", param['title'])
	contents = contents.replace("data/GEO_CONFIG.js", "data/GEO_CONFIG_"+param['filename_suffix']+".js")
	contents = contents.replace("data/GEO_JSON.js", "data/GEO_JSON_"+param['filename_suffix']+".js")
	contents = contents.replace("data/GEO_VARIABLES.js", "data/GEO_VARIABLES_"+param['filename_suffix']+".js")
	
	#write new outfiles: GEO_CONFIG.js GEO_JSON.js VARIABLES.js
	ofile = open(oDir+"/index.html", "w", encoding="utf-8")
	ofile.write(contents)
	ofile.close()

def write_GEO_CONFIG_js(param):
	# read ACM_GEO_CONFIG.js
	ifile = open("template/NAM_GEO_CONFIG.js", "r", encoding="utf-8")
	contents = ifile.read()
	
	Maps_of_neighborhood = True;               
	Temporal_change_in_neighborhoods = True;
	Parallel_Categories_Diagram_in_neighborhoods = True;
	Chord_Diagram_in_neighborhoods = True;
	
	if ('Maps_of_neighborhood' in param): Maps_of_neighborhood =  param['Maps_of_neighborhood']
	if ('Temporal_change_in_neighborhoods' in param): Temporal_change_in_neighborhoods =  param['Temporal_change_in_neighborhoods']
	if ('Parallel_Categories_Diagram_in_neighborhoods' in param): Parallel_Categories_Diagram_in_neighborhoods =  param['Parallel_Categories_Diagram_in_neighborhoods']
	if ('Chord_Diagram_in_neighborhoods' in param): Chord_Diagram_in_neighborhoods =  param['Chord_Diagram_in_neighborhoods']

	# Automatically set Map_width, Map_height. 
	Map_width = "300px"
	Map_height = "300px"
	if (NumOfMaps <= 6):
		Map_width = "300px"
		Map_height = "300px"	
	if (NumOfMaps <= 5):
		Map_width = "350px"
		Map_height = "350px"
	if (NumOfMaps <= 4):
		Map_width = "400px"
		Map_height = "400px"
	if (NumOfMaps <= 3):
		Map_width = "400px"
		Map_height = "400px"
	if (NumOfMaps <= 2):
		Map_width = "450px"
		Map_height = "450px"
	if (NumOfMaps ==	1):
		Map_width = "800px"
		Map_height = "800px"	
	# replace newly computed "NumOfMaps", "InitialLayers", "Map_width", "Map_height" in CONFIG.js. See the example replacement below
	'''
		'years': [1980, 1990, 2000, 2010]            ->    'var InitialLayers = ["INC", "1980", "1990", "2000", "2010"];'
	'''
	Maps_of_neighborhood = "var Maps_of_neighborhood = " + json.dumps(Maps_of_neighborhood)+ ";"
	Temporal_change_in_neighborhoods = "var Temporal_change_in_neighborhoods = " + json.dumps(Temporal_change_in_neighborhoods)+ ";"
	Parallel_Categories_Diagram_in_neighborhoods = "var Parallel_Categories_Diagram_in_neighborhoods = " + json.dumps(Parallel_Categories_Diagram_in_neighborhoods)+ ";"
	Chord_Diagram_in_neighborhoods = "var Chord_Diagram_in_neighborhoods = " + json.dumps(Chord_Diagram_in_neighborhoods)+ ";"
	Map_width = 'var Map_width  = "' + Map_width + '";'
	Map_height = 'var Map_height = "' + Map_height + '";'
   
	contents = contents.replace("var Maps_of_neighborhood = true;", Maps_of_neighborhood)
	contents = contents.replace("var Temporal_change_in_neighborhoods = true;", Temporal_change_in_neighborhoods)
	contents = contents.replace("var Parallel_Categories_Diagram_in_neighborhoods = true;", Parallel_Categories_Diagram_in_neighborhoods)
	contents = contents.replace("var Chord_Diagram_in_neighborhoods = true;", Chord_Diagram_in_neighborhoods)
	contents = contents.replace('var Map_width  = "400px";', Map_width)
	contents = contents.replace('var Map_height = "400px";', Map_height)

	#Write output including the replacement above
	filename_GEO_CONFIG = "NAM_" + param['filename_suffix'] + "/data/GEO_CONFIG_"+param['filename_suffix']+".js"
	ofile = open(filename_GEO_CONFIG, 'w', encoding="utf-8")
	ofile.write(contents)
	ofile.close()

def write_GEO_JSON_js(community, param):
	# query geometry for each tract
	geoid = community.gdf.columns[0]
	tracts = community.gdf[[geoid, 'geometry']].copy()
	tracts.drop_duplicates(subset=geoid, inplace=True)					# get unique geoid
	#print(tracts)
	
	# open GEO_JSON.js write heading for geojson format
	filename_GEO_JSON = "NAM_" + param['filename_suffix'] + "/data/GEO_JSON_"+param['filename_suffix']+".js"
	ofile = open(filename_GEO_JSON, 'w')
	ofile.write('var GEO_JSON =\n')
	ofile.write('{"type":"FeatureCollection", "features": [\n')
	
	#Convert geometry in GEOJSONP to geojson format
	wCount = 0
	for tract in tracts.itertuples():
		feature = {"type":"Feature"}
		if (type(tract.geometry) is float):								# check is NaN?
			#print(tract.geometry)
			continue
		feature["geometry"] = shapely.geometry.mapping(tract.geometry)
		#feature["properties"] = {geoid: tract.__getattribute__(geoid), "tractID": tract.__getattribute__(geoid)}
		feature["properties"] = {geoid: tract.__getattribute__(geoid)}
		wCount += 1
		ofile.write(json.dumps(feature)+',\n')
	#print("GEO_JSON.js write count:", wCount)
	# complete the geojosn format by adding parenthesis at the end.	
	ofile.write(']}\n')
	ofile.close()

def write_GEO_VARIABLES_js(community, param):
	#print(param)
	geoid       = community.gdf.columns[0]
	method      = param['method']
	nClusters   = param['nClusters']
	years       = param['years']
	variables   = param['variables']
	labels      = param['labels']
	
	seqClusters = 5
	distType    = 'tran'
	if ('Sequence' in param and type(param['Sequence']) is dict):
		if ('seq_clusters' in param['Sequence']): seqClusters = param['Sequence']['seq_clusters']
		if ('dist_type' in param['Sequence']): distType = param['Sequence']['dist_type']
	
	# filtering by years
	community.gdf = community.gdf[community.gdf.year.isin(years)]
	#print(community.gdf)
	community.gdf.to_csv(r'output.csv')    

	if (method == 'kmeans' or method == 'ward' or method == 'affinity_propagation' or method == 'spectral' or method == 'gaussian_mixture' or method == 'hdbscan'):
		clusters = community.cluster(columns=variables, method=method, n_clusters=nClusters)
	if (method == 'ward_spatial' or method == 'spenc' or method == 'skater' or method == 'azp' or method == 'max_p'):
		clusters = community.cluster_spatial(columns=variables, method=method, n_clusters=nClusters)		

	# Use the sequence method to obtain the distance matrix of neighborhood sequences
	gdf_new, df_wide, seq_dis_mat = clusters.sequence(seq_clusters=seqClusters, dist_type=distType, cluster_col=method)
	#print(df_wide)
	
	# pivot by year column
	#df_pivot = df.reset_index().pivot(geoid, "year", method)
	df_pivot = df_wide
	lastColumn = df_pivot.columns[df_pivot.shape[1]-1]					# get the last column name as like 'tran-5'
	df_pivot.rename(columns={lastColumn: 'Sequence'}, inplace=True)		# change the last column name to 'Sequence'
	#print(df_pivot)

	if ('Sequence' not in param or not param['Sequence']): df_pivot.drop(columns=['Sequence'], inplace=True)
	#if ('Sequence' not in param or type(param['Sequence']) is not dict): df_pivot.drop(columns=['Sequence'], inplace=True)
	#print(df_pivot)
	
	# calculate zscore
	clusters_flattened = pd.DataFrame(df_pivot.to_records())                   # convert pivot to data frame
	geoids = clusters_flattened["geoid"].tolist()                              # get list of geoids from pivot
	valid_gdf = community.gdf[community.gdf.geoid.isin(geoids)]                # get all rows of valid geoids from community.gdf
	#print("clusters_flattened:", clusters_flattened)
	#print("geoids: ", len(geoids))
	#print("geoids:", geoids)
	#print("valid_gdf:", valid_gdf)

	# write df_pivot to GEO_VARIABLES.js
	filename_GEO_VARIABLES = "NAM_" + param['filename_suffix'] + "/data/GEO_VARIABLES_"+param['filename_suffix']+".js"
	ofile = open(filename_GEO_VARIABLES, 'w')
	ofile.write('var GEO_VARIABLES =\n')
	ofile.write('[\n')
	#heading = [geoid, 'INC']
	#if (len(years) <= 1): heading = [geoid]
	#heading.extend(list(map(str, years)))
	heading = [geoid]
	heading.extend(list(map(str, df_pivot.columns.tolist())))
	ofile.write('  '+json.dumps(heading)+',\n')
	wCount = 0
	for i, row in df_pivot.reset_index().iterrows():
		aLine = row.tolist()
		for j, col in enumerate(aLine[2:], 2):
			try:
				aLine[j] = int(col)                                  # convert float to int
			except ValueError:
				aLine[j] = -9999                                     # if Nan, set -9999
		wCount += 1 
		ofile.write('  '+json.dumps(aLine)+',\n')
	#print("GEO_VARIABLES.js write count:", wCount)
	ofile.write(']\n')

	ofile.close()

def Clustering_viz(param):
	write_LOG(param)
	
	# select community by state_fips, msa_fips, county_fips
	metros = None
	community = None
	if ('allMetros' in param and param['allMetros']):
		#metros = data_store.msa_definitions
		#print(metros)
		#metros = data_store.msas()
		metros = datasets.msas()
	elif ('msa_fips' in param and param['msa_fips']):
		community = Community.from_ltdb(years=param['years'], msa_fips=param['msa_fips'])
		#community = Community.from_ltdb(msa_fips=param['msa_fips'])
	elif ('county_fips' in param and param['county_fips']):
		community = Community.from_ltdb(years=param['years'], county_fips=param['county_fips'])
	elif ('state_fips' in param and param['state_fips']):
		community = Community.from_ltdb(years=param['years'], state_fips=param['state_fips'])
	
	community.gdf = community.gdf.replace([np.inf, -np.inf], np.nan)
	
	# check if geometry is not null for Spatial Clustering
	community.gdf = community.gdf[pd.notnull(community.gdf['geometry'])]
	#print(community.gdf)
    
	codebook = pd.read_csv('template/conversion_table_codebook.csv')
	codebook.set_index(keys='variable', inplace=True)
	labels = copy.deepcopy(param['variables'])
	label = 'short_name'                                             # default
	if ('label' in param and param['label'] == 'variable'): label = 'variable'
	if ('label' in param and param['label'] == 'full_name'): label = 'full_name'
	if ('label' in param and param['label'] == 'short_name'): label = 'short_name'
	if (label != 'variable'):
		for idx, variable in enumerate(param['variables']):
			try:
				codeRec = codebook.loc[variable]
				labels[idx] = codeRec[label]
			except:
				print("variable not found in codebook.  variable:", variable)
	param['labels'] = labels
    
	write_INDEX_html(param)
	write_GEO_CONFIG_js(param)
	write_GEO_VARIABLES_js(community, param)
	write_GEO_JSON_js(community, param)

	#local_dir = os.path.dirname(os.path.realpath(__file__))
	servers = list(notebookapp.list_running_servers())
	servers1 = 'https://cybergisx.cigi.illinois.edu'+servers[0]["base_url"]+ 'view'
	servers2 = 'https://cybergisx.cigi.illinois.edu'+servers[0]["base_url"]+ 'edit'      
	cwd = os.getcwd()
	prefix_cwd = "/home/jovyan/work"
	cwd = cwd.replace(prefix_cwd, "")
	local_dir1 = servers1 + cwd
	local_dir2 = servers2 + cwd    
	#print(local_dir)
	fname =urllib.parse.quote('index.html')
	template_dir = os.path.join(local_dir1, 'NAM_' + param['filename_suffix'])
	#url = 'file:' + os.path.join(template_dir, fname)
	url = os.path.join(template_dir, fname)    
	webbrowser.open(url)
	print('To see visualization of your analysis, click the URL below:')
	print(url)    
	print('Advanced options are available in ')  
	print(local_dir2 + '/'+ 'NAM_' + param['filename_suffix']+'/data/GEO_CONFIG_' + param['filename_suffix']+'.js')  


In [33]:
param = {
    'title': "Neighborhood Analysis: Kmeans, San Diego",
    'filename_suffix': "SD_everything_8",              # "Albertville"
    'state_fips': None,
    'msa_fips': "41740",                         # "10700"
    'county_fips': None,
    'years': [1980, 1990, 2000, 2010],           # Available years: 1970, 1980, 1990, 2000 and 2010
    'method': "kmeans",                          # Aspatial Clustering: affinity_propagation, gaussian_mixture, hdbscan, kmeans, spectral, ward
                                                 # Spatial Clustering: azp, max_p, skater, spenc, ward_spatial   
    'nClusters': 8,                              # This option should be commented out for affinity_propagation, hdbscan and max_p 
    'variables': ["p_nonhisp_white_persons", 
                  "p_nonhisp_black_persons", 
                  "p_hispanic_persons", 
                  "p_native_persons", 
                  "p_asian_persons",
                 ],
    'Maps_of_neighborhood': True,                #choropleth map: Maps representing clustering result		
    'Temporal_change_in_neighborhoods': True,    #stacked chart: Temporal Change in Neighborhoods over years		
    'Parallel_Categories_Diagram_in_neighborhoods': True,
}

Clustering_viz(param)

  return _prepare_from_string(" ".join(pjargs))


NameError: name 'NumOfMaps' is not defined