# Using AWS Lambda Functions
  
This notebook provides the script to run the Counterterrorism Model as Lambda functions, taking advantage of the ability to scale out the number of functions and run these extremely parallel simulations simultaneously.

In [25]:
import matplotlib.pyplot as plt
import numpy as np
import time
import model_lambda
import itertools
import pandas as pd
import seaborn as sns
import boto3

In [36]:
# create the parameter options to cycle through 
def create_param_list():
    # Define parameter value combinations to test
    prob_violence = [0.0001, 0.0005, 0.001, 0.003, 0.005, 0.008]
    govt_policy = ['NONE','CONC','REPR']
    reactive_lvl = ['high','mid-high','mid-low','low','none']
    discontent = ['high','mid','low']
    starting_population = [200, 300, 400, 600, 800]
    steps = [200, 300, 500, 700]
    
    full_params = [prob_violence, govt_policy, reactive_lvl, discontent, \
                    starting_population,steps]
    param_names = ['prob_violence','govt_policy','reactive_lvl','discontent','starting_population','steps']
    param_options = list(itertools.product(*full_params))
    param_options = [dict(zip(param_names, param)) for param in param_options]
    print(param_options[0])
    #param_dist = [param_options[i:i + n] for i in range(0, len(param_options), n)]
    print(len(param_options), len(param_options[0]))
    return param_options

In [37]:
param_dist = create_param_list()

{'prob_violence': 0.0001, 'govt_policy': 'NONE', 'reactive_lvl': 'high', 'discontent': 'high', 'starting_population': 200, 'steps': 200}
5400 6


In [41]:
# a copy of the lambda function run on AWS
import boto3
import json
import numpy as np
import time
import model_lambda


def lambda_handler(event, context):
    t0 = time.time()
    return_dict = {'params':event}
    prob_violence = float(event['prob_violence'])
    govt_policy = event['govt_policy']
    reactive_lvl = event['reactive_lvl']
    discontent = event['discontent']
    starting_population = int(event['starting_population'])
    steps = int(event['steps'])

    grid_size = {200:(2,3),300:(3,3),400:(3,4),500:(4,4),600:(4,5),\
                800:(5,5)}
    height, width = grid_size[starting_population]
    model = model_lambda.CounterterrorismModel(N=starting_population,height=height,\
                                            width=width,prob_violence=prob_violence,\
                                            policy=govt_policy,reactive_lvl=reactive_lvl,\
                                            discontent=discontent)
    for i in range(steps):
        model.step()

    model_df = model.datacollector.get_model_vars_dataframe()
    agents_df = model.datacollector.get_agent_vars_dataframe()
    deaths_df = model.datacollector.get_table_dataframe('Deaths')

    # get values from model dataframe
    return_dict['final_pop'] = int(model_df[model_df.index == steps-1]\
        ['num_agents'].values[0])
    return_dict['total_num_attacks'] = int(model_df[model_df.index == steps-1]\
        ['num_attacks'].values[0])

    # process agents dataframe
    agents_df = agents_df.reset_index()
    agents_df['step_cat'] = agents_df.Step.astype('str')
    agent_stati_gb = agents_df.groupby(by=['step_cat','status'])
    agent_stati = agent_stati_gb['AgentID'].nunique().reset_index()
    agent_stati = agent_stati.rename(columns={'AgentID':'num_agents'})
    agent_stati['step'] = agent_stati.step_cat.astype('int')

    # get Palestinian actors dataframe
    palestinian_stati = agent_stati[agent_stati.status.isin(['anti-violence','combatant','neutral','sympathetic'])]
    #return_dict['palestinian_stati'] = palestinian_stati

    # get dominant sentiments and proportion of sentiments at end 
    dominant_sentiments = np.zeros((model.grid.width, model.grid.height))
    dominant_sentiments = dominant_sentiments.astype(str)
    dominant_sentiments[dominant_sentiments=='0.0'] = 'none'
    percent_dominant_sentiments = np.zeros((model.grid.width, model.grid.height))

    for cell in model.grid.coord_iter():
        cell_content, x, y = cell
        status_dict = {'anti-violence':0,'NONE':0,'neutral':0,'sympathetic':0,\
            'combatant':0,'TARG-CONC':0,'TARG-REPR':0,'INDISC-CONC':0,'INDISC-REPR':0}
        for agent in cell_content:
            status_dict[agent.status] += 1
            #print(agent.status) 
        dominant_sentiment = max(status_dict)
        if len(cell_content) > 0:
            perc_dominant_sentiment = max(status_dict.values())/len(cell_content)
        else:
            perc_dominant_sentiment = 0

        dominant_sentiments[x][y] = dominant_sentiment 
        percent_dominant_sentiments[x][y] = perc_dominant_sentiment  

    return_dict['dominant_sentiments'] = dominant_sentiments.tolist()
    return_dict['percent_dominant_sentiments'] = percent_dominant_sentiments.tolist()

    # get Israeli government actions dataframes
    govt_status = agent_stati[agent_stati.status.isin(['NONE','INDISC-REPR',\
                                                        'INDISC-CONC','TARG-CONC',\
                                                        'TARG-REPR'])]
    govt_status.drop('num_agents', axis=1,inplace=True)
    govt_status = govt_status.sort_values('step',ascending=True)
    govt_status_cum = govt_status.groupby(['status']).cumcount()
    govt_status_cum = govt_status_cum.rename('cumulative_actions')
    govt_status_cum = govt_status.join(govt_status_cum)

    #return_dict['govt_status'] = govt_status
    #return_dict['govt_status_cum'] = govt_status_cum 
    return_dict['num_targ_conc'] = int(govt_status_cum[govt_status_cum.status == \
                                                    'TARG-CONC']['cumulative_actions'].max())
    return_dict['num_indisc_conc'] = int(govt_status_cum[govt_status_cum.status == \
                                                    'INDISC-CONC']['cumulative_actions'].max())
    return_dict['num_targ_repr'] = int(govt_status_cum[govt_status_cum.status == \
                                                    'TARG-REPR']['cumulative_actions'].max())
    return_dict['num_indisc_repr'] = int(govt_status_cum[govt_status_cum.status == \
                                                    'INDISC-REPR']['cumulative_actions'].max())
    
    # add raw dataframes to dicts
    #return_dict['deaths_df'] = deaths_df
    #return_dict['model_df'] = model_df

    
    model_time = time.time() - t0
    return_dict['time_elapsed'] = float(model_time)
    print('model with params',event,'took',model_time,\
            '\n \t resulting in',return_dict['total_num_attacks'],\
            'total attacks and a final population of',return_dict['final_pop'])

    return_dict_json = json.dumps(return_dict)
    
    return return_dict_json

First, we'll test that our function works when we switch to using json inputs and outpus, as required by the Lambda function. We'll test with a single set of parameters:

In [2]:
event_test = {'prob_violence': 0.001,
   'govt_policy': 'CONC',
   'reactive_lvl': 'mid-high',
   'discontent': 'low',
   'starting_population': 300,
   'steps': 300}

In [42]:
# testing the above function, just removing 'context' input for test purposes before
# re-adding to match lambda function in AWS exactly
lambda_handler(event_test)

current id pre-IsraeliGovt 300
model with params {'prob_violence': 0.001, 'govt_policy': 'CONC', 'reactive_lvl': 'mid-high', 'discontent': 'low', 'starting_population': 300, 'steps': 300} took 0.0325167179107666 
 	 resulting in 324 total attacks and a final population of 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


'{"params": {"prob_violence": 0.001, "govt_policy": "CONC", "reactive_lvl": "mid-high", "discontent": "low", "starting_population": 300, "steps": 300}, "final_pop": 0, "total_num_attacks": 324, "dominant_sentiments": [["sympathetic", "sympathetic", "sympathetic"], ["sympathetic", "sympathetic", "sympathetic"], ["sympathetic", "sympathetic", "sympathetic"]], "percent_dominant_sentiments": [[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], "num_targ_conc": 51, "num_indisc_conc": 60, "num_targ_repr": 18, "num_indisc_repr": 21, "time_elapsed": 0.0325167179107666}'

Since this test was successful, we'll now attempt running this code on AWS. We'll invoke the CounterterrorismModel Lambda function, feeding in the parameters in json format, and return the results of the model. 

In [59]:
aws_lambda = boto3.client('lambda')

def invoke_function(params):
    try:
        r = aws_lambda.invoke(FunctionName='CounterterrorismModel',
                            InvocationType='RequestResponse',
                            Payload=json.dumps(params))
        return json.loads(r['Payload'].read())
    except:
        timeout_result = {'params':params, 'run_error':1}
        return json.dumps(timeout_result)

In [60]:
test_return = invoke_function(event_test)
test_return

'{"params": {"prob_violence": 0.001, "govt_policy": "CONC", "reactive_lvl": "mid-high", "discontent": "low", "starting_population": 300, "steps": 300}, "final_pop": 0, "total_num_attacks": 337, "dominant_sentiments": [["sympathetic", "sympathetic", "sympathetic"], ["sympathetic", "sympathetic", "sympathetic"], ["sympathetic", "sympathetic", "sympathetic"]], "percent_dominant_sentiments": [[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], "num_targ_conc": 42, "num_indisc_conc": 59, "num_targ_repr": 27, "num_indisc_repr": 24, "time_elapsed": 1.2963359355926514}'

In [32]:
multi_event_test = param_dist[1030:1034]
multi_event_test

[{'prob_violence': 0.0005,
  'govt_policy': 'NONE',
  'reactive_lvl': 'mid-low',
  'discontent': 'high',
  'starting_population': 400,
  'steps': 500},
 {'prob_violence': 0.0005,
  'govt_policy': 'NONE',
  'reactive_lvl': 'mid-low',
  'discontent': 'high',
  'starting_population': 400,
  'steps': 700},
 {'prob_violence': 0.0005,
  'govt_policy': 'NONE',
  'reactive_lvl': 'mid-low',
  'discontent': 'high',
  'starting_population': 600,
  'steps': 200},
 {'prob_violence': 0.0005,
  'govt_policy': 'NONE',
  'reactive_lvl': 'mid-low',
  'discontent': 'high',
  'starting_population': 600,
  'steps': 300}]

In [46]:
from concurrent.futures import ThreadPoolExecutor

start = time.time()

with ThreadPoolExecutor(max_workers=4) as executor:
    results = executor.map(invoke_function, multi_event_test)

time_elapsed = time.time() - start

print('Time elapsed to run all',len(multi_event_test),'models:',time_elapsed)

Time elapsed to run all 4 models: 4.760071754455566


In [47]:
[result for result in results]

['{"params": {"prob_violence": 0.0005, "govt_policy": "NONE", "reactive_lvl": "mid-low", "discontent": "high", "starting_population": 400, "steps": 500}, "final_pop": 0, "total_num_attacks": 416, "dominant_sentiments": [["sympathetic", "sympathetic", "sympathetic", "sympathetic"], ["sympathetic", "sympathetic", "sympathetic", "sympathetic"], ["sympathetic", "sympathetic", "sympathetic", "sympathetic"]], "percent_dominant_sentiments": [[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]], "num_targ_conc": 49, "num_indisc_conc": 52, "num_targ_repr": 57, "num_indisc_repr": 55, "time_elapsed": 0.6461374759674072}',
 '{"params": {"prob_violence": 0.0005, "govt_policy": "NONE", "reactive_lvl": "mid-low", "discontent": "high", "starting_population": 400, "steps": 700}, "final_pop": 0, "total_num_attacks": 419, "dominant_sentiments": [["sympathetic", "sympathetic", "sympathetic", "sympathetic"], ["sympathetic", "sympathetic", "sympathetic", "sympathetic"], ["sympathetic", "sympath

Now, we can run our full set of models as groups fed to the lambda function. Note that, contrary to my understanding, the actual max concurrency for my account seems to be 50, which caused the runs I attempted in batches of 3000 to produce errors and fail. I've thus split the models into groups of 1000 instead, first testing on a single group of 1000 to ensure success, and then running the remainder as a for loop below.

In [61]:
first_params = param_dist[:1000]
len(first_params)

1000

In [62]:
start = time.time()

with ThreadPoolExecutor(max_workers=1000) as executor:
    results = executor.map(invoke_function, first_params)

time_elapsed = time.time() - start

print('Time elapsed to run all',len(first_params),'models:',time_elapsed)

Time elapsed to run all 1000 models: 135.54111313819885


In [63]:
first_param_results = [result for result in results]

In [65]:
len(first_param_results)

1000

In [73]:
blocks = [1000, 2000, 3000, 4000, 5000]
full_results = []
for block in blocks:
    params_list = param_dist[block: block + 1000]
    print(len(params_list))
    start = time.time()

    with ThreadPoolExecutor(max_workers=1000) as executor:
        results = executor.map(invoke_function, params_list)

    time_elapsed = time.time() - start

    print('Time elapsed to run all',len(params_list),'models:',time_elapsed)

    results = [result for result in results]
    full_results.extend(results)

1000
Time elapsed to run all 1000 models: 312.9171073436737
1000
Time elapsed to run all 1000 models: 303.06569051742554
1000
Time elapsed to run all 1000 models: 251.86970019340515
1000
Time elapsed to run all 1000 models: 308.6716411113739
400
Time elapsed to run all 400 models: 192.4643406867981


In [74]:
len(full_results)

4400

In [75]:
full_results.extend(first_param_results)
full_results[0:5]

['{"params": {"prob_violence": 0.0005, "govt_policy": "NONE", "reactive_lvl": "mid-high", "discontent": "low", "starting_population": 200, "steps": 200}, "final_pop": 0, "total_num_attacks": 209, "dominant_sentiments": [["sympathetic", "sympathetic", "sympathetic"], ["sympathetic", "sympathetic", "sympathetic"]], "percent_dominant_sentiments": [[1.0, 0.0, 0.0], [0.0, 0.0, 0.0]], "num_targ_conc": 16, "num_indisc_conc": 28, "num_targ_repr": 22, "num_indisc_repr": 23, "time_elapsed": 0.47115182876586914}',
 '{"params": {"prob_violence": 0.0005, "govt_policy": "NONE", "reactive_lvl": "mid-high", "discontent": "low", "starting_population": 200, "steps": 300}, "run_error": 1}',
 '{"params": {"prob_violence": 0.0005, "govt_policy": "NONE", "reactive_lvl": "mid-high", "discontent": "low", "starting_population": 200, "steps": 500}, "final_pop": 0, "total_num_attacks": 225, "dominant_sentiments": [["sympathetic", "sympathetic", "sympathetic"], ["sympathetic", "sympathetic", "sympathetic"]], "per

In [76]:
len(full_results)

5400

In [77]:
import pickle

# store results for analysis
pickle.dump(full_results, open("full_fiftyfourhundredd_results_lambda.p","wb"))

We'll now create a pandas dataframe for the analysis of these results, which we can use to determine which models we should run in our final set of Midway runs. 

In [83]:
pickled_results = pickle.load(open("full_fiftyfourhundredd_results_lambda.p", "rb"))

In [109]:
# convert to dictionary type
pickled_dicts = [json.loads(result) for result in pickled_results if not isinstance(result,dict)]
len(pickled_dicts)

5399

In [111]:
pickled_dicts_orig = [result for result in pickled_results if isinstance(result,dict)]
pickled_dicts_orig

[{'errorMessage': 'RequestId: 5b4bccf6-6088-46cd-b945-b1ea5c74fd4a Error: Runtime exited with error: signal: killed',
  'errorType': 'Runtime.ExitError'}]

In [112]:
results_df = pd.DataFrame(pickled_dicts)
results_df.head()

Unnamed: 0,params,final_pop,total_num_attacks,dominant_sentiments,percent_dominant_sentiments,num_targ_conc,num_indisc_conc,num_targ_repr,num_indisc_repr,time_elapsed,run_error
0,"{'prob_violence': 0.0005, 'govt_policy': 'NONE...",0.0,209.0,"[[sympathetic, sympathetic, sympathetic], [sym...","[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0]]",16.0,28.0,22.0,23.0,0.471152,
1,"{'prob_violence': 0.0005, 'govt_policy': 'NONE...",,,,,,,,,,1.0
2,"{'prob_violence': 0.0005, 'govt_policy': 'NONE...",0.0,225.0,"[[sympathetic, sympathetic, sympathetic], [sym...","[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0]]",51.0,53.0,54.0,42.0,1.194057,
3,"{'prob_violence': 0.0005, 'govt_policy': 'NONE...",0.0,223.0,"[[sympathetic, sympathetic, sympathetic], [sym...","[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0]]",60.0,63.0,78.0,69.0,1.395717,
4,"{'prob_violence': 0.0005, 'govt_policy': 'NONE...",0.0,357.0,"[[sympathetic, sympathetic, sympathetic], [sym...","[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, ...",31.0,19.0,22.0,21.0,1.288822,


First, let's see how many of our runs resulted in memory errors or had non-zero remaining population (both promising signs for the model complexity):

In [122]:
print("""
Model runs which had memory time-outs: {}
Model runs which had non-zero final populations: {}
"""
.format(results_df[results_df.final_pop.isna()].shape,
results_df[results_df.final_pop > 0.0].shape))


Model runs which had memory time-outs: (4323, 11)
Model runs which had non-zero final populations: (222, 11)



It appears a significant number of our models had memory issues running on Lambda instances, which means that the Lambda functions were more useful to weed out the unrealistic models than they were to significantly narrow down the list of models which I might want to study in more depth. If were not concerned about Midway resources, I would run all of these models with MPI (assigning more than the 10 cores I initially assigned), but since I am concerned about my Midway usage, I'll filter this list of memory-problem parameters further.  
  
My hypothesis is that a relatively small number of specific combinations of parameters which determine actions (prob_violence, govt_policy, reactive_lvl, and discontent) are likely responsible for the majority of these memory issues, since the starting population and number of steps act more to scale out the model than to adjust the behavior of the actors. Thus, I'll group by these action-determinants to determine which parameter combinations to use in our final Midway runs. 

In [125]:
memory_params = pd.DataFrame(results_df[results_df.final_pop.isna()].params.tolist())
memory_params.head()

Unnamed: 0,prob_violence,govt_policy,reactive_lvl,discontent,starting_population,steps
0,0.0005,NONE,mid-high,low,200,300
1,0.0005,NONE,mid-high,low,400,700
2,0.0005,NONE,mid-high,low,800,300
3,0.0005,NONE,mid-high,low,800,500
4,0.0005,NONE,mid-high,low,800,700


In [138]:
# collect unique combinations of prob_violence, govt_policy, reactive_lvl, and discontent
# to use for our runs
memory_params_gb = memory_params.groupby(['prob_violence','govt_policy','reactive_lvl','discontent']).count().reset_index()
memory_params_gb = memory_params_gb.rename(columns={'starting_population':'num_runs'})
memory_params_gb.drop(columns=['steps'],inplace=True)
memory_params_sorted = memory_params_gb.sort_values('num_runs',ascending=False)
memory_params_sorted

Unnamed: 0,prob_violence,govt_policy,reactive_lvl,discontent,num_runs
91,0.0010,CONC,high,low,20
233,0.0080,CONC,mid-high,mid,20
123,0.0010,REPR,low,high,20
249,0.0080,NONE,mid-low,high,20
103,0.0010,CONC,none,low,20
...,...,...,...,...,...
110,0.0010,NONE,low,mid,6
67,0.0005,NONE,mid-high,low,5
187,0.0050,CONC,mid-high,low,5
135,0.0030,CONC,high,high,4


Now, I'll collect these combinations of parameters using a starting population of 400 and 400 steps to run on Midway... 

In [153]:
final_params = memory_params_sorted.assign(starting_population = 400, steps = 400)
final_params.drop(columns=['num_runs'],inplace=True)
final_params

Unnamed: 0,prob_violence,govt_policy,reactive_lvl,discontent,starting_population,steps
91,0.0010,CONC,high,low,400,400
233,0.0080,CONC,mid-high,mid,400,400
123,0.0010,REPR,low,high,400,400
249,0.0080,NONE,mid-low,high,400,400
103,0.0010,CONC,none,low,400,400
...,...,...,...,...,...,...
110,0.0010,NONE,low,mid,400,400
67,0.0005,NONE,mid-high,low,400,400
187,0.0050,CONC,mid-high,low,400,400
135,0.0030,CONC,high,high,400,400


.. and join with the models which completed on Lambda with non-zero final populations to produce a final list.

In [154]:
pop_params = pd.DataFrame(results_df[results_df.final_pop > 0.0].params.tolist())
final_params = final_params.append(pop_params)
final_params.shape
# in case any of our newly assigned parameters from the memory-problem list matches the 
# nonzero-population params
final_params = final_params.drop_duplicates()
final_params.shape

(492, 6)

In [156]:
promising_params = final_params.to_dict('records')
promising_params

[{'prob_violence': 0.001,
  'govt_policy': 'CONC',
  'reactive_lvl': 'high',
  'discontent': 'low',
  'starting_population': 400,
  'steps': 400},
 {'prob_violence': 0.008,
  'govt_policy': 'CONC',
  'reactive_lvl': 'mid-high',
  'discontent': 'mid',
  'starting_population': 400,
  'steps': 400},
 {'prob_violence': 0.001,
  'govt_policy': 'REPR',
  'reactive_lvl': 'low',
  'discontent': 'high',
  'starting_population': 400,
  'steps': 400},
 {'prob_violence': 0.008,
  'govt_policy': 'NONE',
  'reactive_lvl': 'mid-low',
  'discontent': 'high',
  'starting_population': 400,
  'steps': 400},
 {'prob_violence': 0.001,
  'govt_policy': 'CONC',
  'reactive_lvl': 'none',
  'discontent': 'low',
  'starting_population': 400,
  'steps': 400},
 {'prob_violence': 0.005,
  'govt_policy': 'CONC',
  'reactive_lvl': 'none',
  'discontent': 'high',
  'starting_population': 400,
  'steps': 400},
 {'prob_violence': 0.001,
  'govt_policy': 'REPR',
  'reactive_lvl': 'mid-high',
  'discontent': 'mid',
  'st

We now have 492 models to attempt running on Midway using MPI once again: this should be a much more manageable amount.

In [157]:
pickle.dump(promising_params, open("promising_params.p","wb"))