# Imports

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# NLTK to find word stems
import nltk

# Data Cleaning

In [None]:
import pandas as pd # Pandas library enables data manipulation
data_url = "https://raw.githubusercontent.com/linklab-uva/Scenario2Vector/master/BDDX.csv?token=AH3QIXZFJFEOSPN3LXNIPD27ANQAM"
def load_bddx_data(csv_name):
    column_names = ['Index', 'InputVideo', '1S', '1E', '1A', '1J', '2S', '2E', '2A', '2J', '3S', '3E', '3A', '3J',
                    '4S', '4E', '4A', '4J','5S', '5E', '5A', '5J','6S', '6E', '6A', '6J','7S', '7E', '7A', '7J',
                    '8S', '8E', '8A', '8J','9S', '9E', '9A', '9J','10S', '10E', '10A', '10J','11S', '11E', '11A', '11J',
                    '12S', '12E', '12A', '12J','13S', '13E', '13A', '13J','14S', '14E', '14A', '14J','15S', '15E', '15A', '15J']
    
    return pd.read_csv(csv_name, names=column_names)
bddx = load_bddx_data(data_url)
bddx = bddx.drop(['1S', '1E','2S', '2E','3S', '3E','4S', '4E','5S', '5E','6S', '6E','7S', '7E','8S', '8E','9S', '9E','10S', '10E','11S', '11E','12S', '12E','13S', '13E','14S', '14E','15S', '15E', ], axis=1)
bddx = bddx.fillna("")

bddx['1AJ'] = bddx[['1A', '1J']].agg(' '.join, axis=1)
bddx['2AJ'] = bddx[['2A', '2J']].agg(' '.join, axis=1)
bddx['3AJ'] = bddx[['3A', '3J']].agg(' '.join, axis=1)
bddx['4AJ'] = bddx[['4A', '4J']].agg(' '.join, axis=1)
bddx['5AJ'] = bddx[['5A', '5J']].agg(' '.join, axis=1)
bddx['6AJ'] = bddx[['6A', '6J']].agg(' '.join, axis=1)
bddx['7AJ'] = bddx[['7A', '7J']].agg(' '.join, axis=1)
bddx['8AJ'] = bddx[['8A', '8J']].agg(' '.join, axis=1)
bddx['9AJ'] = bddx[['9A', '9J']].agg(' '.join, axis=1)
bddx['10AJ'] = bddx[['10A', '10J']].agg(' '.join, axis=1)
bddx['11AJ'] = bddx[['11A', '11J']].agg(' '.join, axis=1)
bddx['12AJ'] = bddx[['12A', '12J']].agg(' '.join, axis=1)
bddx['13AJ'] = bddx[['13A', '13J']].agg(' '.join, axis=1)
bddx['14AJ'] = bddx[['14A', '14J']].agg(' '.join, axis=1)
bddx['15AJ'] = bddx[['15A', '15J']].agg(' '.join, axis=1)

bddx = bddx.drop(['Index', '1A', '1J', '2A', '2J', '3A', '3J', '4A', '4J', '5A', '5J', '6A', '6J', '7A', '7J', '8A', '8J', '9A', '9J', '10A', '10J', '11A', '11J', '12A', '12J', '13A', '13J', '14A', '14J', '15A', '15J', ], axis=1)
bddx = bddx.drop(bddx.index[0])

# SDL Class

In [None]:
class Actor:
    def __init__(self, description):
        """
        self.description should be one of the following:
          ego
          light vehicle
          heavy vehicle
          cyclist
          pedestrian
        """
        self.description = description
        self.action = ""

In [None]:
class SDL_Util:
    def __init__(self):
        self.stemmer = nltk.stem.PorterStemmer()
        # stemmer allows us to match words with the same roots: e.g., it identifies "turns", "turning", and "turn" as the same word
        # But it recognizes "slow" and "slowly" as different, and it thinks "go" and "goes" are different

        self.actor_list = {'car':'light vehicle',
                           'bus':'heavy vehicle',
                           'truck':'heavy vehicle',
                           'cyclist':'cyclist',
                           'pedestrian':'pedestrian',
                           'ambulance':'heavy vehicle',
                           'minivan':'light vehicle',
                           'traffic':'traffic',
                           'the car':'light vehicle'
                           }
        self.scene_list = ['intersection',
                           'crosswalk',
                           'bridge',
                           'light',
                           'sign',
                           'traffic light',
                           'traffic signal',
                           'turn lane',
                           ]

        init_action_list = {'turn':'turn',
                            'merge':'merge',
                            'swerve':'merge',
                            'veer':'merge',
                            'switch':'merge',
                            'accelerate':'accelerate',
                            'pick':'accelerate',
                            'brake':'brake',
                            'slow':'brake',
                            'reduce':'brake',
                            'decelerate':'brake',
                            'stop':'stop',
                            'wait':'stop',
                            'sit':'stop',
                            'forward':'forward',
                            'move':'forward',
                            'stay':'forward',
                            'maintain':'forward',
                            'proceeds':'forward',
                            'proceed':'forward',
                            'inch':'forward',
                            'pass':'forward',
                            'roll':'forward',
                            'advance':'forward',
                            'drive':'drive',
                            'steer':'drive',
                            'go':'drive',
                            'goes':'drive',
                            'head':'drive',
                            'pull':'drive',
                            'travel':'drive',
                            'flow':'drive',
                            'reverse':'reverse',
                            'walk':'walk',
                            'cross':'walk',
                            'park':'park',
                            'drift':'forward', # Not sure about this one, but it's not changing lanes so I'm calling it "forward"
                            'block':'stop', # Vehicles that are blocking generally are stopped in the path of the ego
                            'enter':'merge',
                            'straighten':'forward',
                            'follow':'forward',
                            'shift':'merge',
                            'change':'merge',
                            'stand':'stop', # Standing still
                            'curve':'forward',
                           }
        self.adjective_action = {'clear':'_negative', # traffic is clear
                                 'stationary':'stop', # car is stationary
                                 'complete':'stop', # is at a complete stop
                                 'heavy':'forward', # traffic is heavy
                                 'accelerating':'accelerate', # car is accelerating
                                 'light':'_negative', # traffic is light
                                 'slow':'forward', # traffic is slow
                                 'parallel':'reverse', # car is parallel parking
                                 'rolling':'forward', # car is rolling forward
                                 'driving':'drive', # car is driving
                                 'full':'stop', # car is at a full stop
                                 'stopped':'stop', # car is stopped
                                 'busy':'forward', # traffic is busy
                                 'double':'stop', # car is double parked
                                 'double-parked':'stop', # car is double-parked
                                 'idle':'stop', # car is idle
                                 'stopping':'brake', # car is stopping
                                 'minimal':'_negative', # traffic is minimal
                                 'empty':'_negative', # traffic is empty
                                 'moderate':'forward', # traffic is moderate
                                }
        self.action_list = {}
        for a in init_action_list:
            self.action_list[self.stemmer.stem(a)] = init_action_list[a]

        self.light_status = ["green","yellow","red"]
        self.directions = ["left","right","u-turn","uturn","through","backward","down","straight"]

        self.debug = {"_None":[]}
    def update_debug(self,word,phrase):
        if not word in self.debug:
            self.debug[word] = []
        self.debug[word].append(phrase)

util = SDL_Util()

class SDL:
  def __init__(self, index, statements, link):
    self.statements = statements
    self.index = index
    self.videoLink = link
    
    self.actors = {'1':[], '2':[], '3':[], '4':[], '5':[], '6':[], '7':[], 
                   '8':[], '9':[], '10':[], '11':[], '12':[], '13':[], '14':[], '15':[]}
    self.scene = {'1':[], '2':[], '3':[], '4':[], '5':[], '6':[], '7':[], 
                   '8':[], '9':[], '10':[], '11':[], '12':[], '13':[], '14':[], '15':[]}
        
  def getDescriptors(self, statement, timeSegment): 

    init_phrase = nltk.word_tokenize(statement.lower())
    phrase = nltk.pos_tag(init_phrase)

    if(statement == "No Data Recorded"):
      self.actors[timeSegment].append(Actor("NaN"))
      self.actors[timeSegment][-1].action = "NaN"
      self.scene[timeSegment].append("NaN")
    
    skip = 0
    actor_locs = []
    for idx,word in enumerate(phrase):
        while skip > 0:
            skip-=1
            continue
        if word[1][0] == "N": # Word is a noun
            element = word[0]
            for i in range(idx+1,len(phrase)):
                if phrase[i][1][0] == "N": # consecutive nouns should be together, e.g. "traffic light"
                    element+=" "+phrase[i][0]
                    skip+=1
                elif element == "traffic" and (phrase[i][0] == "light" or phrase[i][0] == "signal"):
                    element+=" "+phrase[i][0]
                    skip+=1
                else:
                    break
            if element == "car" and (idx > 0 and phrase[idx-1][0] == "the") and (idx+1 < len(phrase) and phrase[idx+1] != 'in'):
                element = "the car" # "the car" always refers to the ego, except in the case of "the car in front"
            elif element == "sign":
                # Find the type of sign, e.g. "stop sign"
                if idx > 0 and phrase[idx-1][1][0] == "J":
                    element = phrase[idx-1][0]+" "+element
            if util.stemmer.stem(element) in util.actor_list or util.stemmer.stem(element) in util.scene_list:
                actor_locs.append((element,idx))
        elif word[1] == ".":
            actor_locs.append((".",idx))
    for idx in range(len(actor_locs)):
        if idx+1 < len(actor_locs):
            search_phrase = phrase[actor_locs[idx][1]:actor_locs[idx+1][1]]
        else:
            search_phrase = phrase[actor_locs[idx][1]:]
        if util.stemmer.stem(actor_locs[idx][0]) in util.actor_list:
            is_positive = True
            negative = ["no","clear","lack","free"]
            for i in range(max(0,actor_locs[idx][1]-3),actor_locs[idx][1]):
                if phrase[i][0] in negative:
                    is_positive = False # don't add an actor if the phrase is "there are no cars"
                    break
            if is_positive:
                self.compose_actor(search_phrase,actor_locs[idx][0],timeSegment)
        elif util.stemmer.stem(actor_locs[idx][0]) in util.scene_list:
            #TODO: Check lights to see if the color is 1 word earlier (e.g., "red light")
            self.compose_scene(search_phrase,actor_locs[idx][0],timeSegment)
                
  def compose_actor(self,phrase,actor,timeSegment):
    if len(self.actors[timeSegment]) == 0:
        self.actors[timeSegment].append(Actor("ego"))
        action = self.find_action(phrase)
        if action and action != "_negative":
            self.actors[timeSegment][-1].action = action
    elif actor == 'the car':
        return # "the car" always refers to ego
    elif util.stemmer.stem(actor) == 'pedestrian': # Pedestrians always walk
        self.actors[timeSegment].append(Actor(util.actor_list[util.stemmer.stem(actor)]))
        self.actors[timeSegment][-1].action = util.action_list[util.stemmer.stem("walk")]
    else:
        action = self.find_action(phrase)
        if action == "_negative":
            return # don't add an actor if the phrase is "the traffic is clear"
        self.actors[timeSegment].append(Actor(util.actor_list[util.stemmer.stem(actor)]))
        if action:
            self.actors[timeSegment][-1].action = action
            
  def find_action(self,phrase):
    delay_verbs = ["come","make","continu","begin","start","complet","tri"] # Verbs where the actual action is later in the sentence
    exist_verbs = ["is","are","remain"]
    action = ""
    action_index = 0
    for i in range(1,len(phrase)):
        if phrase[i][1][0:2] == "RB": # Ignore adverbs
            continue
        elif phrase[i][1] == ".": # Sentence has ended. Stop looking for a verb
            break
        if phrase[i][1][0] == "V":
            action = phrase[i][0]
            action_index = i
            for j in range(i+1,len(phrase)):
                if phrase[j][1][0:2] == "RB": # Ignore adverbs
                    continue
                elif phrase[j][1][0] == "V": # If there are multiple verbs in a row, take the last
                    if phrase[j][0] == "left": # "left" is getting misidentified as a verb, ignore it
                        break
                    action = phrase[j][0]
                    action_index = j
                else:
                    break
            break
    if action == "":
        util.update_debug("_None",phrase)
        return None
    elif util.stemmer.stem(action) in util.action_list:
        action = util.action_list[util.stemmer.stem(action)]
    # Start of hand-crafted rules exclusive to this dataset
    elif action_index+1 < len(phrase) and phrase[action_index+1][0] == "forward":
        return util.action_list[util.stemmer.stem("forward")]
    elif util.stemmer.stem(action) in delay_verbs:
        better_action = False
        for i in range(action_index+1,len(phrase)):
            if phrase[i][1][0:2] == "RB": # Ignore adverbs
                continue
            if util.stemmer.stem(phrase[i][0]) in util.action_list:
                word = phrase[i][0]
                for j in range(i+1,len(phrase)):
                    if phrase[j][1][0:2] == "RB": # Ignore adverbs
                        continue
                    elif phrase[i][1][0] == "V" and phrase[j][1][0] == "V":
                        word = phrase[j][0]
                    elif phrase[i][1][0] == "N" and phrase[j][1][0] == "N":
                        word = phrase[j][0]
                    else:
                        break
                if util.stemmer.stem(word) in util.action_list:
                    action = util.action_list[util.stemmer.stem(word)]
                    better_action = True
                elif word == "hand":
                    action = util.action_list["turn"]
                    better_action = True
            elif phrase[i][0] in util.directions:
                action = util.action_list["drive"]
                better_action = True
            if better_action:
                break
        if not better_action:
            if util.stemmer.stem(action) == "continu":
                # The only continues unclassified at this point are "forward"
                return util.action_list[util.stemmer.stem("forward")]
            elif util.stemmer.stem(action) == util.stemmer.stem("come"):
                if action_index+1 < len(phrase) and phrase[action_index+1][0] == "to":
                    return util.action_list[util.stemmer.stem("stop")] # the car comes to a stop
                else:
                    action = "drive"
            else:
                util.update_debug(util.stemmer.stem(action),phrase)
                return None
    elif util.stemmer.stem(action) in exist_verbs:
        if action_index+1 < len(phrase) and phrase[action_index+1][0] == "at":
            # traffic is at a stop / standstill / red
            return util.action_list[util.stemmer.stem("stop")]
        adj = ""
        for i in range(action_index+1,len(phrase)):
            if phrase[i][1][0] == "R":
                continue # Ignore adverbs
            elif phrase[i][1][0] == "J":
                adj = phrase[i][0]
                break
            else:
                break
        if adj in util.adjective_action:
            action = util.adjective_action[adj]
        else:
            util.update_debug(util.stemmer.stem(action),phrase)
            return None
    elif util.stemmer.stem(action) == util.stemmer.stem("speed"):
        if action_index+1 < len(phrase) and phrase[action_index+1][0] == "up":
            return util.action_list[util.stemmer.stem("accelerate")]
        elif action_index+1 < len(phrase) and phrase[action_index+1][0] == "down":
            return util.action_list[util.stemmer.stem("forward")]
        else:
            util.update_debug(util.stemmer.stem(action),phrase)
            return None
    elif util.stemmer.stem(action) == util.stemmer.stem("back"):
        if action == "backed" and action_index > 0 and phrase[action_index-1][0] == "is":
            return util.action_list[util.stemmer.stem("stop")] # traffic is backed up
        else:
            return util.action_list[util.stemmer.stem("reverse")]
    elif util.stemmer.stem(action) == util.stemmer.stem("clear"):
        if action_index > 0 and phrase[action_index-1][0] == "to":
            return util.action_list[util.stemmer.stem("forward")] # waiting for traffic to clear
        else:
            return "_negative" # traffic has cleared
    else:
        util.update_debug(util.stemmer.stem(action),phrase)
        return None

    if action == "drive": # "drive" needs more information
        direction = self.find_direction(phrase,action_index)
        if direction == "left" or direction == "right":
            return "turn "+direction
        elif direction == "through" or direction == "down" or direction == "straight":
            return "forward"
        elif direction == "backward":
            return "reverse"
        elif direction == "u-turn":
            return "u-turn"
        else:
            return "forward"
    elif action == "turn":
        direction = self.find_direction(phrase,action_index)
        if direction == "left" or direction == "right":
            return "turn "+direction
        elif direction == "u-turn":
            return "u-turn"
        else:
            return "turn"
    elif action == "merge":
        direction = self.find_direction(phrase,action_index)
        if direction == "left" or direction == "right":
            return "merge "+direction
        else:
            return "merge"
    else:
        return action
    
  def find_direction(self,phrase,index):
    if index > 0 and (phrase[index-1][0] == "left" or phrase[index-1][0] == "right"):
        return phrase[index-1][0]
    for i in range(index+1,len(phrase)):
        if phrase[i][0] in util.directions:
            if phrase[i][0] == "left":
                if i+2 < len(phrase) and phrase[i+1][0] == "to" and phrase[i+2][0] == "right":
                    return "right"
                else:
                    return "left"
            elif phrase[i][0] == "right":
                if i+2 < len(phrase) and phrase[i+1][0] == "to" and phrase[i+2][0] == "left":
                    return "left"
                else:
                    return "right"
            elif phrase[i][0] == "u-turn" or phrase[i][0] == "uturn":
                return "u-turn"
            else:
                return phrase[i][0]
        elif phrase[i][0] == "front" and (i > 0 and phrase[i-1][0] == "in"):
            #n-gram "in front"
            return "center"
        elif phrase[i][0] == "u" and (i+1 < len(phrase) and phrase[i+1][0] == "turn"):
            #n-gram "u turn"
            return "u-turn"
        elif phrase[i][0] == "its" and (i+1 < len(phrase) and phrase[i+1][0] == "lane"):
            #n-gram "its lane"
            return "center"
        if phrase[i][1][0] == "V":
            return None
    return None
            
  def compose_scene(self,phrase,scene,timeSegment):
    element = scene.split(" ")
    if element[-1] == "light":
        status = ""
        for ls in util.light_status:
            for word in phrase:
                if word[0] == ls:
                    status = ls
                    break
            if status != "":
                break
        if status != "":
            self.scene[timeSegment].append(status+" "+scene)
            return
        else:
            self.scene[timeSegment].append(scene)
            return
    else:
        self.scene[timeSegment].append(scene)
        return



In [None]:
sdlList = []
for index, row in bddx.iterrows():
    sdlList.append(row.astype(str))
assert len(sdlList) == 6996, "length of sdl list should be 6996"

In [None]:
sdlStatements = []
for i in range(len(sdlList)):
  sdlStatements.append({'1': sdlList[i]['1AJ'], '2': sdlList[i]['2AJ'], '3': sdlList[i]['3AJ'], '4': sdlList[i]['4AJ'], 
                        '5': sdlList[i]['5AJ'], '6': sdlList[i]['6AJ'], '7': sdlList[i]['7AJ'], '8': sdlList[i]['8AJ'], 
                        '9': sdlList[i]['9AJ'], '10': sdlList[i]['10AJ'], '11': sdlList[i]['11AJ'], '12': sdlList[i]['12AJ'], 
                        '13': sdlList[i]['13AJ'], '14': sdlList[i]['14AJ'], '15': sdlList[i]['15AJ']})

for i in range(len(sdlStatements)):
  keysTemp = list(sdlStatements[i].keys())
  for j in keysTemp:
    if (sdlStatements[i][j]==" "):
      sdlStatements[i][j] = "No Data Recorded"

sdlObjectList = []
for i in range(len(sdlList)):
  sdlObjectList.append(SDL(i, sdlStatements[i], sdlList[i]['InputVideo']))

In [None]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
for i in range(len(sdlObjectList)):
  keysTemp = list(sdlObjectList[i].statements.keys())
  for j in (keysTemp):
    sdlObjectList[i].getDescriptors(sdlObjectList[i].statements[j], j)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
examples = [1,5730,1999] 

for example in examples:
    print('Object %i: '%(example))
    print("Actors: ")
    print(sdlObjectList[example].statements)

    for a in range(len(sdlObjectList[example].actors)):
      actorsIndex = str(a+1)
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        print('Time segment:', actorsIndex, ",  %s: %s"%(sdlObjectList[example].actors[actorsIndex][j].description,sdlObjectList[example].actors[actorsIndex][j].action))
    print('Scene: ', sdlObjectList[example].scene)

Object 1: 
Actors: 
{'1': 'The car is stopped. The car is at an intersection with a red light.', '2': 'The car is accelerating through the intersection. The light at the intersection has changed to green', '3': 'No Data Recorded', '4': 'No Data Recorded', '5': 'No Data Recorded', '6': 'No Data Recorded', '7': 'No Data Recorded', '8': 'No Data Recorded', '9': 'No Data Recorded', '10': 'No Data Recorded', '11': 'No Data Recorded', '12': 'No Data Recorded', '13': 'No Data Recorded', '14': 'No Data Recorded', '15': 'No Data Recorded'}
Time segment: 1 ,  ego: stop
Time segment: 2 ,  ego: accelerate
Time segment: 3 ,  NaN: NaN
Time segment: 4 ,  NaN: NaN
Time segment: 5 ,  NaN: NaN
Time segment: 6 ,  NaN: NaN
Time segment: 7 ,  NaN: NaN
Time segment: 8 ,  NaN: NaN
Time segment: 9 ,  NaN: NaN
Time segment: 10 ,  NaN: NaN
Time segment: 11 ,  NaN: NaN
Time segment: 12 ,  NaN: NaN
Time segment: 13 ,  NaN: NaN
Time segment: 14 ,  NaN: NaN
Time segment: 15 ,  NaN: NaN
Scene:  {'1': ['light'], '2':

In [None]:
# Count how many actors have associated actions
actor_count = 0
action_count = 0
for idx in range(len(sdlObjectList)):
    so = sdlObjectList[idx]
    for timestep in so.actors:
        for actor in so.actors[timestep]:
            if actor.description == "NaN":
                break
            actor_count+=1
            if actor.action != "":
                action_count+=1

print("There are %i actors and %i actions. Completeness: %f"%(actor_count,action_count,action_count/actor_count))

# Display the top 10 unclassified verbs
print("Top 10 unclassified verbs:")
dd = sorted(util.debug.items(), key = lambda kv:(-len(kv[1]),kv[0]))
cutoff = 10
for (key,value) in dd:
    print("  %s: %i"%(key,len(value)))
    cutoff-=1
    if cutoff <= 0:
        break

There are 31314 actors and 28396 actions. Completeness: 0.906815
Top 10 unclassified verbs:
  _None: 1968
  is: 159
  ha: 27
  are: 22
  get: 21
  take: 21
  begin: 20
  straight: 19
  approach: 18
  jam: 18


# SDL Representation with Matrices

In [None]:
import numpy as np 
'''

For each SDL object in sdlObjectList, (2 x n) matrix represents the action-actor pair with n being the number of actor action pairs, (1 x m) matrix 
where m represents the number of scene elements

'''
actor_encoding = {'light vehicle': 0, 'heavy vehicle': 1, 'cyclist': 2, 'pedestrian': 3, 'traffic': 4, 'ego': 5, 'NaN': 6}

action_encoding = {'turn': 0, 'turn left': 1, 'turn right': 3, 'merge': 4, 'accelerate': 5, 'brake': 6, 'stop': 7, 
                   'forward': 8, 'walk': 9, 'park': 10, 'drive': 11, 'reverse': 12, 'merge center': 13, 'merge left': 14, 
                   'merge right': 15, 'turn through': 16, 'merge u turn': 17, 'u-turn': 18, 'NaN': 19, '':20}

scene_encoding = {'intersection': 0, 'crosswalk': 1, 'bridge': 2, 'green light': 3, 'stop sign': 4, 'yield sign': 5, 'sign': 6, 
                  'u-turn': 7, 'traffic light': 8, 'traffic signal': 9, 'turn lane': 10, 'crosswalks': 11, 'green traffic light': 12, 
                  'light': 13, 'lights': 14, 'red light': 15, 'red traffic light': 16, 'signs': 17, 'traffic lights': 18, 
                  'yellow light': 19, 'yellow traffic light': 20, 'NaN': 21}

sdl_embeddings = []

for example in range(len(sdlObjectList)): #loops through 6996 sdl objects in sdlObjectList
    # Each sdl object has a 7x21x22 one hot encoding representing its action, actor and scene element
    for a in range(len(sdlObjectList[example].actors)): #loops through 15 time segments
      actorsIndex = str(a+1)
      actor_list = []
      action_list = []
      scene_list = []
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        actor_list.append(sdlObjectList[example].actors[actorsIndex][j].description)
        action_list.append(sdlObjectList[example].actors[actorsIndex][j].action)

      scene_list = sdlObjectList[example].scene[actorsIndex]
      
      # print("actor list: ", actor_list)
      # print("action list: ", action_list)
      # print("scene list: ", scene_list)

      actor_indices = []
      action_indices = []
      scene_indices = []

      if (len(actor_list) != len(action_list)):
        print("Actor and action list don't match up, this may cause 1 to 1 actor to action correspondence errors")
        break
      
      sdl_matrices = []
      actor_action_matrix = np.zeros((len(actor_list), 2))


      scene_length = 0
      if (len(scene_list) == 0):
          scene_length = 1
      else:
          scene_length = len(scene_list)    
      scene_matrix = np.zeros((scene_length,))


      for a_index in actor_list:
        actor_indices.append(actor_encoding[a_index])

      for act_index in action_list:
        action_indices.append(action_encoding[act_index])

      if ( ((len(scene_list)) > 0) and scene_list[0] != 'NaN'):
        for i in scene_list:
            scene_indices.append(scene_encoding[i])
      else:
        scene_indices.clear()
        scene_indices.append(21)

      if (len(actor_indices) != len(action_indices)):
        print("make sure each actor is matched up with an action")
        break

      for i, actor_index in enumerate(actor_indices):
          actor_action_matrix[i][0] = actor_index

      for j, action_index in enumerate(action_indices):
          actor_action_matrix[j][1] = action_index

      for k, scene_index in enumerate(scene_indices):
          scene_matrix[k] = scene_index

      sdl_matrices.append(actor_action_matrix)
      sdl_matrices.append(scene_matrix)
      sdl_embeddings.append(sdl_matrices)

assert len(sdl_embeddings) == 104940, "length of sdlEmbeddings should be 104940"

In [None]:
sdl_embeddings[45][1]

array([21.])

# Similarity metric for matrix SDL ** do not use --> dated


In [None]:
actor_weight = 2.0
action_weight = 1.0
vec3 = sdl_embeddings[45][0]
vec4 = sdl_embeddings[480][0]
temp = np.subtract(np.transpose(vec3), vec4)
temp[np.nonzero(temp)] = 1
actor_action_weights = np.array([actor_weight, action_weight])
temp = np.multiply(temp, actor_action_weights)
print(temp.shape)
C = np.einsum('ij,ij->ij' ,temp,temp)

(2, 2)


ValueError: ignored

In [None]:
actor_weight = 2.0
action_weight = 1.0
vec3 = sdl_embeddings[45][0]
vec4 = sdl_embeddings[480][0]
temp = np.subtract(np.transpose(vec3), vec4)
print(temp)
temp[np.nonzero(temp)] = 1
actor_action_weights = np.array([actor_weight, action_weight])
temp = np.multiply(temp, actor_action_weights)


[[ 0. -3.]
 [ 3.  1.]]


In [None]:
diag_range = range(-temp.shape[0]+2,1, 2) if temp.shape[0]>temp.shape[1] else range(0, temp.shape[1], 2)
diags = [np.abs(temp).trace(i) for i in diag_range]
np.sum(diags)

1.0

In [None]:
# prep list of sentences
bddx_statements = []
for i in range(len(sdlList)):
  bddx_statements.append(sdlList[i]['1AJ'])
  bddx_statements.append(sdlList[i]['2AJ'])
  bddx_statements.append(sdlList[i]['3AJ'])
  bddx_statements.append(sdlList[i]['4AJ'])
  bddx_statements.append(sdlList[i]['5AJ'])
  bddx_statements.append(sdlList[i]['6AJ'])
  bddx_statements.append(sdlList[i]['7AJ'])
  bddx_statements.append(sdlList[i]['8AJ'])
  bddx_statements.append(sdlList[i]['9AJ'])
  bddx_statements.append(sdlList[i]['10AJ'])
  bddx_statements.append(sdlList[i]['11AJ'])
  bddx_statements.append(sdlList[i]['12AJ'])
  bddx_statements.append(sdlList[i]['13AJ'])
  bddx_statements.append(sdlList[i]['14AJ'])
  bddx_statements.append(sdlList[i]['15AJ'])
assert len(bddx_statements) == 104940, "length of bddx_statements should be 104940"

In [None]:
print(bddx_statements[480])

The car is stopped because the light is red.
