# Surface adjusted distance project

Start by loading up some packages. 

## Note

Before running this code, use the Clusters tab to provision some resources (multiple cpus!).

In [31]:
import sys
print (sys.version)
sys.path.append("C:/Users/yi/git/TerrainMetrics_conda2/Update")
import time
import itertools
import cProfile, pstats
import os.path
import numpy as np
import pandas as pd
from ipyparallel import Client
import surface_adjusted_benchmark

# next line loads packages installed for my user account
sys.path.append("C:/ProgramData/Anaconda3/lib/site-packages")

3.7.1 (default, Dec 10 2018, 22:54:23) [MSC v.1915 64 bit (AMD64)]


Verify that the cluster is up and running

In [34]:
# check cluster status
rc = Client()
print(rc.ids)

[0, 1, 2, 3, 4, 5, 6, 7]


In [None]:
lview = rc.load_balanced_view()
lview.block = True
print(lview)

In [None]:
#test it 
lview.map(lambda x:x**10, range(8))

Now use `%px` to import packages on each of the processors.

In [None]:
%px import sys
%px sys.path.append("/home/majo3748/.local/lib/python3.4/site-packages")
%px sys.path.append("/projects/majo3748/TerrainMetrics_conda2")

# load surfaceAdjusted module on each worker
%px import surfaceAdjusted

## Determining the cases 

We need to generate a data frame containing a row for each distance calculation we want to do. 
The following code blocks assign the cases of interest, and generate all relevant combinations, producing a data frame at the end.

In [3]:
# for testing, just use the larger resolutions
resolution_L = [3,10,30,100,1000]

# NN interpolation does not work, leave commented out
methods = ['clos', 'wavg', 'biLin', 'biQua', 'biQub', 'TIN', 'p2p']#, 'NN']

# for testing, just use one state
study_areas = ["Colorado", "Nebraska", "Louisiana", "Washington", "NC", "Texas"]

data_dir = r"D:/SAD/Modified_2/"

In [4]:
def expandgrid(*itrs):
    """
    Generate all possible combinations of elements in multiple lists
    
    Args:
        - lists separated by commas
    
    Example:
        >>> expandgrid([0, 1], [2, 3, 4])
        >>> [[0, 0, 0, 1, 1, 1], [2, 3, 4, 2, 3, 4]]
    
    Returns: 
        - a list of lists, with one element in each inner list for each 
          combination of elements in the input lists
    """
    product = list(itertools.product(*itrs))
    return [[x[i] for x in product] for i in range(len(itrs))]

In [5]:
# Determine all of the cases to compute (area X resolution X transect X method combinations)
cases_list = []
for area in study_areas:
    area_start_time = time.time()
    area_path = data_dir + area + '/simulation/'
    area_transects = np.genfromtxt(area_path + 'tran_sim_pts.csv', delimiter=",")
    
    for resolution in resolution_L:
        
        n_transects = int(area_transects.shape[0] / 2)
        transect_indices = [i for i in range(n_transects)]

        cases = expandgrid(transect_indices, ["clos"], [resolution], [area_path], [area])
        
        n_cases = len(cases[0])
        
        df = pd.DataFrame(cases).transpose()
        df.columns = ["transect", "method", "resolution", "path", "area"]
        cases_list.append(df)

cases_df = pd.concat(cases_list)
cases_df.describe()

Unnamed: 0,transect,method,resolution,path,area
count,30000,30000,30000,30000,30000
unique,1000,1,5,6,6
top,999,clos,1000,D:/SAD/Modified_2/Washington/simulation/,NC
freq,30,30000,6000,5000,5000


For testing purposes, we'll just use a few of the transects.

In [6]:
# subset transects
n_transects = 1
cases_df = cases_df.loc[lambda df: df.transect < n_transects, :]
cases_df

Unnamed: 0,transect,method,resolution,path,area
0,0,clos,3,D:/SAD/Modified_2/Colorado/simulation/,Colorado
0,0,clos,10,D:/SAD/Modified_2/Colorado/simulation/,Colorado
0,0,clos,30,D:/SAD/Modified_2/Colorado/simulation/,Colorado
0,0,clos,100,D:/SAD/Modified_2/Colorado/simulation/,Colorado
0,0,clos,1000,D:/SAD/Modified_2/Colorado/simulation/,Colorado
0,0,clos,3,D:/SAD/Modified_2/Nebraska/simulation/,Nebraska
0,0,clos,10,D:/SAD/Modified_2/Nebraska/simulation/,Nebraska
0,0,clos,30,D:/SAD/Modified_2/Nebraska/simulation/,Nebraska
0,0,clos,100,D:/SAD/Modified_2/Nebraska/simulation/,Nebraska
0,0,clos,1000,D:/SAD/Modified_2/Nebraska/simulation/,Nebraska


In [7]:
# try just one case
surface_adjusted_benchmark.distance(
                cases_df['transect'].tolist()[0], 
                cases_df['method'].tolist()[0], 
                cases_df['resolution'].tolist()[0], 
                cases_df['path'].tolist()[0])

pnts.shape[0]: 2364


(7121.0443490596235, 0.9939961433410645)

The next line maps our distance function to all of the cases with automatic load balancing. 

In [8]:
# in parallel
print("Processing")
res = map(surface_adjusted_benchmark.distance, 
                cases_df['transect'].tolist(), 
                cases_df['method'].tolist(), 
                cases_df['resolution'].tolist(), 
                cases_df['path'].tolist())

Processing


In [9]:
res_list = list(res)

pnts.shape[0]: 2364
pnts.shape[0]: 2325
pnts.shape[0]: 774
pnts.shape[0]: 258
pnts.shape[0]: 21
pnts.shape[0]: 14105
pnts.shape[0]: 13965
pnts.shape[0]: 4653
pnts.shape[0]: 1551
pnts.shape[0]: 192
pnts.shape[0]: 18503
pnts.shape[0]: 17163
pnts.shape[0]: 5721
pnts.shape[0]: 1959
pnts.shape[0]: 180
pnts.shape[0]: 16096
pnts.shape[0]: 14484
pnts.shape[0]: 4827
pnts.shape[0]: 1446
pnts.shape[0]: 216
pnts.shape[0]: 9458
pnts.shape[0]: 9189
pnts.shape[0]: 3057
pnts.shape[0]: 1008
pnts.shape[0]: 87
pnts.shape[0]: 6634
pnts.shape[0]: 6246
pnts.shape[0]: 2082
pnts.shape[0]: 693
pnts.shape[0]: 63


In [20]:
res_df=pd.DataFrame(res_list,columns=['distance','time'],dtype=float)

In [28]:
cases_df['distance'] = list(res_df['distance'])
cases_df['time'] = list(res_df['time'])

In [None]:
cases_df.to_csv(r'D:/SAD/SAD_result/figures/result/raw_table/benchmark.csv')

In [None]:
# the next lines are for interactive progress tracking (which is off by default)
#frac_done = 1.0 * res.progress / len(res)
#print("Progress: " + str(100 * frac_done) + "% done")

In [None]:
# the next lines are for interactive progress tracking (which is off by default)
#is_done = frac_done == 1.0
#if is_done:
    # add result to data frame
#    cases_df['distance'] = res.get()
#cases_df