##SAMPLE CODE FOR CALLING FCA METHODS FROM ACCESS

## Load in your demand, supply, and costs files

In [None]:
#import the access class and pandas (for dataframe cleanup later)
import sys 
sys.path.append("..") #this is included so that you can run this notebook from this folder

from access import *
import pandas as pd

#read in the times, pops, docs files for Illinois
times = pd.read_csv("~/Downloads/timesfinal.csv", header = None, names = ['origin','dest','cost'])
pops = pd.read_csv("~/Downloads/popfinal.csv", header = None, names = ['geoid','demand','region'])
docs = pd.read_csv("~/Downloads/docfinal.csv", header = None, names = ['geoid','supply','region'])
pops = pops[['geoid','demand']]  
docs = docs[['geoid','supply']] 

## Pre-process your costs dataframe (times) and your supply dataframe (docs)

In [None]:
#@postcondition:  adds self times and removes duplicates from costs
def cleanUpCosts(costs, maxCost = 30, demand = None):
    if demand is not None:     
        validOrigins = list(demand['geoid'])
        additionalSelfTimes = pd.DataFrame({'origin': validOrigins, 'dest': validOrigins, 'cost': [0] * len(validOrigins)})
        costs = pd.concat([costs, additionalSelfTimes], ignore_index = True)
        costs = costs[(costs['cost'] < maxCost)]
    costs.sort_values('dest', inplace = True)
    costs = costs[(costs['cost'] < maxCost)]
    costs.drop_duplicates(inplace = True)
    return costs

#miscellaneous clean-up
max_cost = 30
times = times[(times['cost'] < max_cost)]
times.sort_values('dest', inplace = True)
docs = docs[docs['supply'] != 0]
validOrigins = list(pops['geoid'])
additionalSelfTimes = pd.DataFrame({'origin': validOrigins, 'dest': validOrigins, 'cost': [0] * len(validOrigins)})
times = pd.concat([times, additionalSelfTimes], ignore_index = True)
times = cleanUpCosts(times, max_cost, pops)
docs.set_index('geoid', inplace = True)
pops.set_index('geoid', inplace = True)

## Call fca, two-stage, enhanced two-stage, three-stage, using normalization/weighting features

In [None]:
#create access object
tester = access(demand_df = pops, demand_value = 'demand', 
                supply_df = docs, supply_value = 'supply',
                demand_index = True, supply_index = True,
                cost_df = times, cost_origin = 'origin', cost_dest = 'dest', cost_name = 'cost',
                neighbor_cost_df = times, neighbor_cost_origin = 'dest', 
                neighbor_cost_dest = 'origin', neighbor_cost_name = 'cost')

#stepwise weight dictionaries
d = {10 : 1, 20 : 0.68, 30 : 0.22}
d_3Stage = {10 : 0.962, 20 : 0.704, 30 : 0.377, 60 : 0.042}


#create the weighting function
fn = weights.step_fn(d)


#UNCOMMENT LINE BELOW for base FCA
# fca_ratio_series = tester.fca_ratio(max_cost = 30)

#UNCOMMENT LINE BELOW for two stage FCA
# two_stage_fca_series = tester.two_stage_fca(max_cost = 30)




#UNCOMMENT LINE BELOW for E2SFCA normalized and weighted
print (tester.enhanced_two_stage_fca(max_cost = 30, normalize = True))

#UNCOMMENT LINE BELOW for raw values of base fca
print (tester.fca_ratio(max_cost = 30))

#UNCOMMENT LINE BELOW for 3SFCA
print (tester.three_stage_fca(max_cost = 30))

#UNCOMMENT LINE BELOW for all three access columns, normalized
print (tester.norm_access_df)

## Euclidean distances tests

In [None]:

#load in shapefile
geofile = "~/Downloads/cb_2018_17_tract_500k/cb_2018_17_tract_500k.shp"
gdf = gpd.read_file(geofile)
gdf = gdf.to_crs(epsg = 3528)

#prepare geodataframe for merging
gdf.set_index((gdf.STATEFP + gdf.COUNTYFP + gdf.TRACTCE).astype(int), inplace = True)

#modify demand and supply dataframes (pops and docs) to geodataframes with geometries
#WARNING: the demand_df (eucl_pops) and supply_df (eucl_docs) has the tracts included within the geofile you loaded in. 
#         In this case, since this geofile has 3121 tracts, the demand_df (eucl_pops) has 3121 tracts
eucl_pops = gdf[['geometry']].join(pops)
eucl_docs = gdf[['geometry']].join(docs)
eucl_pops.index.name = 'geoid'
eucl_docs.index.name = 'geoid'

#create access object WITHOUT COSTS
tester_eucl = access(demand_df = eucl_pops, demand_value = 'demand', 
                     supply_df = eucl_docs, supply_value = 'supply',
                     demand_index = True, supply_index = True,
                     #cost_df = times, cost_origin = 'origin', cost_dest = 'dest', cost_name = 'cost',
                     neighbor_cost_df = times, neighbor_cost_origin = 'origin', 
                     neighbor_cost_dest = 'dest', neighbor_cost_name = 'cost'
               )

#calculate euclidean distances FOR POLYGONS
tester_eucl.euclidean_distance(threshold = 20000 , centroid_o = False, centroid_d = False)
#UNCOMMENT BELOW for neighbors
#tester_eucl.euclidean_distance_neighbors(threshold = 20000 , centroid = False)

#base FCA using euclidean distance and euclidean distance neighbors
#print (tester_eucl.fca_ratio(max_cost = 20000))

#calculate the correlation
correlation_df = tester.cost_df.merge(tester_eucl.cost_df,how = 'outer', on = ['origin', 'dest'])
print (correlation_df[['cost', 'euclidean']].corr())

#WARNING WARNING
#CALLING THIS IS USELESS BECAUSE THE DEFAULT WEIGHTS DICTIONARY IN ENHANCED TWO STAGE IS DESIGNED FOR TIMES IN UNIT MINUTES, NOT DISTANCES IN UNIT METERS
#IF YOU CALL IT LIKE THIS USING EUCLIDEAN DISTANCES, THE DEFAULT WEIGHTS DICTIONARY(WHICH GOES UP TO 30) WILL APPLY TO YOUR DISTANCES (VALUES IN THE THOUSANDS), ENSURING THAT YOU'RE ONLY LOOKING AT DISTANCES LESS THAN 30 METERS..  (basically 0 distance)
#print (tester_eucl.enhanced_two_stage_fca(max_cost = 5000))


#Whenever you call enhanced, the max_cost parameter must be less than or equal to the max key of the weighting dictionary. Having a larger max_cost is misleading and leads to a nonsensical two_stage or three_stage method call
#This is extended a step further when you have euclidean distances. In euclidean_distances() the threshold parameter acts the same as the max key of the weighting dictionary.
print (tester_eucl.enhanced_two_stage_fca(weight_fn = weights.step_fn({5000 : 1, 10000 : 0.68, 20000 : 0.22}), max_cost = 5000))

#add these below as desired
#print (tester_eucl.three_stage_fca(weight_fn = weights.step_fn({5000 : 1, 10000 : 0.68, 20000 : 0.22}), max_cost = 20000))

#print (tester_eucl.norm_access_df)