In [3]:
import boto3

In [2]:
region = 'us-east-1'

In [7]:
client = boto3.client('stepfunctions', region_name = region)

In [38]:
failed_execution_name = 'ce001005-a92a-44c2-abd6-a968d222f78e_152106'
state_machine_name = 'prod-incoming-leads-business-process'

In [39]:
failed_execution_arn = f'arn:aws:states:us-east-1:133240237239:execution:{state_machine_name}:{failed_execution_name}'

In [33]:
failed_response = client.get_execution_history(
            executionArn=failed_execution_arn,
            reverseOrder=True
        )

In [None]:
failed_response

In [15]:
failedEvents = [] 

In [55]:
failedEvents.extend(failed_response['events'])

In [56]:
failedEvents[0]['executionFailedEventDetails']

{'error': 'States.Timeout'}

In [21]:
currentEventId = failedEvents[0]['id']
while currentEventId != 0:
    # multiply event id by -1 for indexing because we're looking at the reversed history
    currentEvent = failedEvents[-1 * currentEventId]
    '''
    We can determine if the failed state was a parallel state because it an event
    with 'type'='ParallelStateFailed' will appear in the execution history before
    the name of the failed state
    '''
    if currentEvent['type'] == 'ParallelStateFailed':
        failedAtParallelState = True
    '''
    If the failed state is not a parallel state, then the name of failed state to return
    will be the name of the state in the first 'TaskStateEntered' event type we run into 
    when tracing back the execution history
    '''
    if currentEvent['type'] == 'TaskStateEntered' and failedAtParallelState == False:
        failedState = currentEvent['stateEnteredEventDetails']['name']
        failedInput = currentEvent['stateEnteredEventDetails']['input']
#         print (failedState, failedInput)
        print (failedState)
    '''
    If the failed state was a paralell state, then we need to trace execution back to 
    the first event with 'type'='ParallelStateEntered', and return the name of the state
    '''
    if currentEvent['type'] == 'ParallelStateEntered' and failedAtParallelState:
        failedState = failedState = currentEvent['stateEnteredEventDetails']['name']
        failedInput = currentEvent['stateEnteredEventDetails']['input']
#         print (failedState, failedInput)
        print (failedState)
    # Update the id for the next execution of the loop
    currentEventId = currentEvent['previousEventId']

FinalFanout


In [57]:
failedStateName = 'FinalFanout'
failedInput



In [22]:
def smArnFromExecutionArn(arn):
    '''
    Get the State Machine Arn from the execution Arn
    Input: Execution Arn of a state machine
    Output: Arn of the state machine
    '''
    smArn = arn.split(':')[:-1]
    smArn[5] = 'stateMachine'
    return ':'.join(smArn)

In [60]:
smArn = smArnFromExecutionArn(failed_execution_arn)
smArn

'arn:aws:states:us-east-1:133240237239:stateMachine:prod-incoming-leads-business-process'

In [90]:
response = client.describe_state_machine(
            stateMachineArn=smArn
        )

In [91]:
response

{'stateMachineArn': 'arn:aws:states:us-east-1:133240237239:stateMachine:prod-incoming-leads-business-process',
 'name': 'prod-incoming-leads-business-process',
 'status': 'ACTIVE',
 'definition': '{\n  "Comment": "Business process flow for leads",\n  "StartAt": "insertNonLeadFormDataInToDynamo",\n  "States": {\n    "insertNonLeadFormDataInToDynamo": {\n      "Type": "Task",\n      "Comment": "Insert all non lead form data into Dynamo DB",\n      "Resource": "arn:aws:lambda:us-east-1:133240237239:function:integriant-leadintake-api-prod-allLeadDynamoInsert",\n      "TimeoutSeconds": 60,\n      "Retry": [\n        {\n          "ErrorEquals": [\n            "HandledError",\n            "States.TaskFailed",\n            "Lambda.Unknown"\n          ],\n          "IntervalSeconds": 10,\n          "MaxAttempts": 2,\n          "BackoffRate": 2\n        },\n        {\n          "ErrorEquals": [\n            "States.ALL"\n          ],\n          "IntervalSeconds": 5,\n          "MaxAttempts": 2,\

In [92]:
from datetime import datetime
now_str = datetime.now().strftime("%Y%m%d")

In [93]:
import json

roleArn = response['roleArn']
stateMachine = json.loads(response['definition'])
# Create a name for the new state machine
newName = failed_execution_name + '-FR-' + now_str
# Get the StartAt state for the original state machine, because we will point the 'GoToState' to this state
originalStartAt = stateMachine['StartAt']
'''
Create the GoToState with the variable $.resuming
If new state machine is executed with $.resuming = True, then the state machine will skip to the failed state
Otherwise, it will execute the state machine from the original start state
'''
goToState = {'Type': 'Choice',
             'Choices': [{'Variable': '$.resuming', 'BooleanEquals': False, 'Next': originalStartAt}],
             'Default': failedStateName}
# Add GoToState to the set of states in the new state machine
stateMachine['States']['GoToState'] = goToState
# Add StartAt
stateMachine['StartAt'] = 'GoToState'
# Create new state machine
try:
    response = client.create_state_machine(
        name=newName,
        definition=json.dumps(stateMachine),
        roleArn=roleArn
    )
except:
    raise ('Failed to create new state machine with GoToState')
print (response)

{'stateMachineArn': 'arn:aws:states:us-east-1:133240237239:stateMachine:ce001005-a92a-44c2-abd6-a968d222f78e_152106-FR-20210526', 'creationDate': datetime.datetime(2021, 5, 26, 10, 55, 37, 877000, tzinfo=tzlocal()), 'ResponseMetadata': {'RequestId': '7d32ad0e-e768-467e-bbe4-1383dcc1fc05', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '7d32ad0e-e768-467e-bbe4-1383dcc1fc05', 'content-type': 'application/x-amz-json-1.0', 'content-length': '160'}, 'RetryAttempts': 0}}


In [40]:
failed_execution_name

'ce001005-a92a-44c2-abd6-a968d222f78e_152106'

In [106]:
new_statemachine_arn = 'arn:aws:states:us-east-1:133240237239:stateMachine:ce001005-a92a-44c2-abd6-a968d222f78e_152106-FR-20210526'

In [76]:
failed_input = json.loads(failedInput)

In [None]:
json.dumps(failed_input)

In [104]:
response["stateMachineArn"]

KeyError: 'stateMachineArn'

In [105]:
new_sm_name = stateMachineArn=response["stateMachineArn"].split(":")[-1]

KeyError: 'stateMachineArn'

In [102]:
failed_input["resuming"] = True

In [100]:
new_sm_arn

'ce001005-a92a-44c2-abd6-a968d222f78e_152106-FR-20210526'

In [107]:
response = client.start_execution(
    stateMachineArn=new_statemachine_arn,
    name=new_sm_arn + "-" +  datetime.now().strftime("%H%M%S"),
    input=json.dumps(failed_input)
)

In [109]:
try:
    response = client.delete_state_machine(
        stateMachineArn=new_statemachine_arn
    )
except:
    raise ('Failed to create new state machine with GoToState')
print (response)

{'ResponseMetadata': {'RequestId': '3095eedd-ae83-4098-a8c3-d0c183eb73fe', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '3095eedd-ae83-4098-a8c3-d0c183eb73fe', 'content-type': 'application/x-amz-json-1.0', 'content-length': '2'}, 'RetryAttempts': 0}}


In [110]:
exec_arn = "arn:aws:states:us-east-1:133240237239:execution:ce001005-a92a-44c2-abd6-a968d222f78e_152106-FR-20210526:ce001005-a92a-44c2-abd6-a968d222f78e_152106-FR-20210526-110334"

In [115]:
exec_arn.split(":")[-2]

'ce001005-a92a-44c2-abd6-a968d222f78e_152106-FR-20210526'