# Imports

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# NLTK to find word stems
import nltk
#nltk.download('punkt')
#nltk.download('averaged_perceptron_tagger')

# Berkeley Neural Parser
# https://github.com/nikitakit/self-attentive-parser
import benepar
#benepar.download('benepar_en2')


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Data Cleaning

In [None]:
import pandas as pd # Pandas library enables data manipulation
data_url = "./revisedBDDX.csv"
def load_bddx_data(csv_name):
    column_names = ['Index', 'InputVideo', '1S', '1E', '1A', '1J', '2S', '2E', '2A', '2J', '3S', '3E', '3A', '3J',
                    '4S', '4E', '4A', '4J','5S', '5E', '5A', '5J','6S', '6E', '6A', '6J','7S', '7E', '7A', '7J',
                    '8S', '8E', '8A', '8J','9S', '9E', '9A', '9J','10S', '10E', '10A', '10J','11S', '11E', '11A', '11J',
                    '12S', '12E', '12A', '12J','13S', '13E', '13A', '13J','14S', '14E', '14A', '14J','15S', '15E', '15A', '15J']
    
    return pd.read_csv(csv_name, names=column_names)
bddx = load_bddx_data(data_url)
bddx = bddx.drop(['1S', '1E','2S', '2E','3S', '3E','4S', '4E','5S', '5E','6S', '6E','7S', '7E','8S', '8E','9S', '9E','10S', '10E','11S', '11E','12S', '12E','13S', '13E','14S', '14E','15S', '15E', ], axis=1)
bddx = bddx.fillna("")

bddx['1AJ'] = bddx[['1A', '1J']].agg(' '.join, axis=1)
bddx['2AJ'] = bddx[['2A', '2J']].agg(' '.join, axis=1)
bddx['3AJ'] = bddx[['3A', '3J']].agg(' '.join, axis=1)
bddx['4AJ'] = bddx[['4A', '4J']].agg(' '.join, axis=1)
bddx['5AJ'] = bddx[['5A', '5J']].agg(' '.join, axis=1)
bddx['6AJ'] = bddx[['6A', '6J']].agg(' '.join, axis=1)
bddx['7AJ'] = bddx[['7A', '7J']].agg(' '.join, axis=1)
bddx['8AJ'] = bddx[['8A', '8J']].agg(' '.join, axis=1)
bddx['9AJ'] = bddx[['9A', '9J']].agg(' '.join, axis=1)
bddx['10AJ'] = bddx[['10A', '10J']].agg(' '.join, axis=1)
bddx['11AJ'] = bddx[['11A', '11J']].agg(' '.join, axis=1)
bddx['12AJ'] = bddx[['12A', '12J']].agg(' '.join, axis=1)
bddx['13AJ'] = bddx[['13A', '13J']].agg(' '.join, axis=1)
bddx['14AJ'] = bddx[['14A', '14J']].agg(' '.join, axis=1)
bddx['15AJ'] = bddx[['15A', '15J']].agg(' '.join, axis=1)

bddx = bddx.drop(['Index', '1A', '1J', '2A', '2J', '3A', '3J', '4A', '4J', '5A', '5J', '6A', '6J', '7A', '7J', '8A', '8J', '9A', '9J', '10A', '10J', '11A', '11J', '12A', '12J', '13A', '13J', '14A', '14J', '15A', '15J', ], axis=1)
bddx = bddx.drop(bddx.index[0])

# SDL Class

In [None]:
class Actor:
    def __init__(self, description):
        """
        self.description should be one of the following:
          ego
          light vehicle
          heavy vehicle
          cyclist
          pedestrian
        """
        self.description = description
        self.action = ""

In [None]:
class SDL_Util:
    def __init__(self):
        self.stemmer = nltk.stem.PorterStemmer()
        # stemmer allows us to match words with the same roots: e.g., it identifies "turns", "turning", and "turn" as the same word
        # But it recognizes "slow" and "slowly" as different, and it thinks "go" and "goes" are different
        self.parser = benepar.Parser("benepar_en2")
        # allows us to turn sentences into parse trees

        self.actor_list = {'car':'light vehicle',
                           'bus':'heavy vehicle',
                           'truck':'heavy vehicle',
                           'cyclist':'cyclist',
                           'pedestrian':'pedestrian',
                           'ambulance':'heavy vehicle',
                           'minivan':'light vehicle',
                           'traffic':'traffic',
                           }
        self.scene_list = ['intersect', # intersection stem
                           'crosswalk',
                           'bridge',
                           'light',
                           'sign',
                           'stop sign',
                           'yield sign',
                           'traffic light',
                           'traffic signal',
                           'turn lane',
                           ]

        init_action_list = {'turn':'turn',
                            'merge':'merge',
                            'swerve':'merge',
                            'veer':'merge',
                            'switch':'merge',
                            'accelerate':'accelerate',
                            'pick':'accelerate',
                            'brake':'brake',
                            'slow':'brake',
                            'reduce':'brake',
                            'decelerate':'brake',
                            'stop':'stop',
                            'wait':'stop',
                            'sit':'stop',
                            'forward':'forward',
                            'move':'forward',
                            'stay':'forward',
                            'maintain':'forward',
                            'proceeds':'forward',
                            'proceed':'forward',
                            'inch':'forward',
                            'pass':'forward',
                            'roll':'forward',
                            'advance':'forward',
                            'drive':'drive',
                            'steer':'drive',
                            'go':'drive',
                            'goes':'drive',
                            'head':'drive',
                            'pull':'drive',
                            'travel':'drive',
                            'flow':'drive',
                            'reverse':'reverse',
                            'walk':'walk',
                            'cross':'walk',
                            'park':'park',
                            'drift':'forward', # Not sure about this one, but it's not changing lanes so I'm calling it "forward"
                            'block':'stop', # Vehicles that are blocking generally are stopped in the path of the ego
                            'enter':'merge',
                            'straighten':'forward',
                            'follow':'forward',
                            'shift':'merge',
                            'change':'merge',
                            'stand':'stop', # Standing still
                            'curve':'forward',
                           }
        self.adjective_action = {'clear':'_negative', # traffic is clear
                                 'stationary':'stop', # car is stationary
                                 'complete':'stop', # is at a complete stop
                                 'heavy':'forward', # traffic is heavy
                                 'accelerating':'accelerate', # car is accelerating
                                 'light':'_negative', # traffic is light
                                 'slow':'forward', # traffic is slow
                                 'parallel':'reverse', # car is parallel parking
                                 'rolling':'forward', # car is rolling forward
                                 'driving':'drive', # car is driving
                                 'full':'stop', # car is at a full stop
                                 'stopped':'stop', # car is stopped
                                 'busy':'forward', # traffic is busy
                                 'double':'stop', # car is double parked
                                 'double-parked':'stop', # car is double-parked
                                 'idle':'stop', # car is idle
                                 'stopping':'brake', # car is stopping
                                 'minimal':'_negative', # traffic is minimal
                                 'empty':'_negative', # traffic is empty
                                 'moderate':'forward', # traffic is moderate
                                }
        self.action_list = {}
        for a in init_action_list:
            self.action_list[self.stemmer.stem(a)] = init_action_list[a]

        self.light_status = ["green","yellow","red"]
        self.directions = ["left","right","u-turn","uturn","through","backward","down","straight"]

        self.debug = {"_None":[]}
        
        self.count = 0
    def update_debug(self,word,phrase):
        if not word in self.debug:
            self.debug[word] = []
        self.debug[word].append(phrase)

util = SDL_Util()


In [None]:
class SDL:
    def __init__(self, index, statements, link):
        self.statements = statements
        self.index = index
        self.videoLink = link

        self.actors = {'1':[], '2':[], '3':[], '4':[], '5':[], '6':[], '7':[], 
                       '8':[], '9':[], '10':[], '11':[], '12':[], '13':[], '14':[], '15':[]}
        self.scene = {'1':[], '2':[], '3':[], '4':[], '5':[], '6':[], '7':[], 
                       '8':[], '9':[], '10':[], '11':[], '12':[], '13':[], '14':[], '15':[]}
                
    def getDescriptors(self, statement, timeSegment):

        if(statement == "No Data Recorded"):
            # No caption => no SDL
            self.actors[timeSegment].append(Actor("NaN"))
            self.actors[timeSegment][-1].action = "NaN"
            self.scene[timeSegment].append("NaN")
            return

        parse_tree = util.parser.parse(statement.lower()) # Create a parse tree        
        self.sbar2sdl(parse_tree,timeSegment) # Search for actors and actions
        
        # Take care of bad verbs (e.g., "backward forward")
        for i,actor in enumerate(self.actors[timeSegment]):
            long_verb = actor.action.split(" ")
            if len(long_verb) == 1:
                # verb is only 1 word, can be assumed to be viable
                continue
            if long_verb[1] == 'drive' or long_verb[1] == 'forward':
                # Trying to make a "left forward" implies a left turn
                if long_verb[0] == 'left' or long_verb[0] == 'right':
                    self.actors[timeSegment][i].action = z[0]+" turn"
                elif long_verb[0] == 'backward':
                    self.actors[timeSegment][i].action = "reverse"
                else:
                    self.actors[timeSegment][i].action = "forward"
            elif long_verb[0] == "uturn" or long_verb[0] == 'u-turn':
                #u-turn turn => u-turn
                self.actors[timeSegment][i].action = "u-turn"
            elif long_verb[1] != 'turn' and long_verb[1] != 'merge':
                # down stop => stop
                self.actors[timeSegment][i].action = long_verb[1]
        
    def sbar2sdl(self, tree, timeSegment):
        if type(tree) == str or len(tree) == 0 or type(tree[0]) == str:
            return # tree is a leaf and not a tree
        todo = [] # Keep track of sub clauses to recursively check for more SDL elements
        for i,node in enumerate(tree):
            if node.label()[0] == "S":
                self.sbar2sdl(node, timeSegment)
            elif node.label() == "NP":
                if len(self.actors[timeSegment]) > 0 and node.leaves() == ['the','car'] and \
                (i == len(tree)-1 or tree[i+1].label() != "PP"):
                    continue # Ego has been referenced a second time. Ignore it.
                self.np2sdl(node, timeSegment)
            elif node.label() == "VP":
                self.vp2sdl(node, timeSegment)
            elif node.label() == "PP":
                self.sbar2sdl(node, timeSegment)
                            
    def np2sdl(self, tree, timeSegment, neg=False):
        noun = ""
        verb = ""
        adj = ""
        todo = []
        if len(tree) == 2 and tree[0].label() == "NP" and tree[1].label() == "PP" and \
        tree[0].leaves() == ['the','car'] and len(self.actors[timeSegment]) > 0:
            # The phrase "the car" can refer to the ego, but it doesn't in this case.
            noun = "car"
            tree = []
        elif tree.leaves() == ['the','car'] and len(self.actors[timeSegment]) > 0:
            return # Ego has been referenced a second time. Ignore it.
        for i,node in enumerate(tree):
            if type(node) == str: # tree is a leaf and not a tree
                break
            if node.label()[0] == "N" and len(node.leaves()) == 1:
                # Found the noun in the noun phrase
                if neg: # Actor is NOT present (e.g., "no traffic")
                    neg = False
                    continue
                elif node.leaves()[0] in util.light_status:
                    adj = node.leaves()[0]
                    if len(self.scene[timeSegment]) > 0 and (self.scene[timeSegment][-1] == "light" or self.scene[timeSegment][-1] == "traffic light"):
                        self.scene[timeSegment][-1] = adj+" "+self.scene[timeSegment][-1]
                    elif len(self.scene[timeSegment]) > 1 and (self.scene[timeSegment][-2] == "light" or self.scene[timeSegment][-2] == "traffic light"):
                        #Sometimes the light will be at an intersection
                        self.scene[timeSegment][-2] = adj+" "+self.scene[timeSegment][-2]
                elif noun == "":
                    noun = node.leaves()[0]
                else: #elif noun != "":
                    noun = noun+" "+node.leaves()[0]
                if i > 0 and tree[i-1].label()[0] == "V" and len(tree[i-1].leaves()) == 1:
                    # Verb before the noun is an action
                    verb = tree[i-1].leaves()[0]
                elif i > 0 and tree[i-1].label()[0] == "J" and len(tree[i-1].leaves()) == 1:
                    adj = tree[i-1].leaves()[0]
            elif node.label()[0] == "S":
                todo.append((node,"S",None))
                neg = False
            elif node.label() == "NP":
                todo.append((node,"N",neg))
                neg = False
            elif node.label() == "VP":
                todo.append((node,"V",neg))
                neg = False
            elif node.label() == "PP":
                todo.append((node,"S",None))
                neg = False
            elif node.label() == "DT" and len(node.leaves()) == 1 and node.leaves()[0] == "no":
                neg = True
        noun = util.stemmer.stem(noun)
        verb = util.stemmer.stem(verb)
        if noun in util.scene_list:
            # Noun was a scene, add it to the scene list
            if noun == "light" and adj in util.light_status:
                # Check if the modifier describes the color of the traffic light
                noun = adj+" "+noun
            self.scene[timeSegment].append(noun)
        elif noun in util.actor_list:
            if len(self.actors[timeSegment]) == 0:
                noun = "ego"
            else:
                noun = util.actor_list[noun]
            self.actors[timeSegment].append(Actor(noun))
            if verb != "" and verb in util.action_list:
                verb = util.action_list[verb]
                # Add action to the associated actor
                self.actors[timeSegment][-1].action = verb
        elif noun in util.directions and len(self.actors[timeSegment]) > 0 and \
        self.actors[timeSegment][-1].action == "":
            self.actors[timeSegment][-1].action = noun+" turn"
        elif noun in util.action_list and len(self.actors[timeSegment]) > 0 and \
        self.actors[timeSegment][-1].action == "":
            noun = util.action_list[noun]
            self.actors[timeSegment][-1].action = noun
        for node,label,pass_neg in todo:
            if label == "S":
                self.sbar2sdl(node, timeSegment)
            elif label == "N":
                self.np2sdl(node, timeSegment, pass_neg)
            elif label == "V":
                self.vp2sdl(node, timeSegment, pass_neg)
    
    def vp2sdl(self, tree, timeSegment, neg=False):
        verb = ""
        adj = ""
        adv = ""
        todo = []
        flip = False
        for i,node in enumerate(tree):
            if type(node) == str: # tree is a leaf and not a tree
                break
            if node.label()[0] == "V" and len(node.leaves()) == 1:
                verb = node.leaves()[0]
                if neg:
                    flip = True
                neg = False
            elif node.label() == "ADJP" and len(node.leaves()) == 1:
                adj = node.leaves()[0]
                neg = False
            elif node.label() == "ADVP" and len(node.leaves()) == 1:
                adv = node.leaves()[0]
                neg = False
            elif node.label() == "RB" and len(node.leaves()) == 1 and \
            node.leaves()[0] == "not" or node.leaves()[0] == "n't":
                neg = True
            elif node.label()[0] == "S":
                todo.append((node,"S",None))
                neg = False
            elif node.label() == "NP":
                todo.append((node,"N",neg))
                neg = False
            elif node.label() == "VP":
                todo.append((node,"V",neg))
                neg = False
            elif node.label() == "PP":
                pp_verb = False
                if len(node.leaves()) < 5 and len(node.leaves()) > 0:
                    pot_verb = node.leaves()[-1]
                    pot_verb = util.stemmer.stem(pot_verb)
                    if pot_verb in util.action_list:
                        verb = pot_verb
                        pp_verb = True
                if not pp_verb:
                    todo.append((node,"S",None))
                neg = False
        verb = util.stemmer.stem(verb)
        if adj in util.light_status:
            if len(self.scene[timeSegment]) > 0 and (self.scene[timeSegment][-1] == "light" or self.scene[timeSegment][-1] == "traffic light"):
                self.scene[timeSegment][-1] = adj+" "+self.scene[timeSegment][-1]
            elif len(self.scene[timeSegment]) > 1 and (self.scene[timeSegment][-2] == "light" or self.scene[timeSegment][-2] == "traffic light"):
                #Sometimes the light will be at an intersection
                self.scene[timeSegment][-2] = adj+" "+self.scene[timeSegment][-2]
        if verb in util.action_list and len(self.actors[timeSegment]) > 0 and \
        self.actors[timeSegment][-1].action == "":
            verb = util.action_list[verb]
            if flip and (verb == "forward" or verb == "drive"):
                verb = util.action_list["stop"]
            elif flip and (verb == "stop"):
                verb = util.action_list["forward"]
            elif adv in util.directions:
                verb = adv+" "+verb
            self.actors[timeSegment][-1].action = verb
        for node,label,pass_neg in todo:
            if label == "S":
                self.sbar2sdl(node, timeSegment)
            elif label == "N":
                self.np2sdl(node, timeSegment, pass_neg)
            elif label == "V":
                self.vp2sdl(node, timeSegment, pass_neg)


In [None]:
sdlList = []
for index, row in bddx.iterrows():
    sdlList.append(row.astype(str))
assert len(sdlList) == 6996, "length of sdl list should be 6996"

In [None]:
sdlStatements = []
for i in range(len(sdlList)):
    sdlStatements.append({'1': sdlList[i]['1AJ'], '2': sdlList[i]['2AJ'], '3': sdlList[i]['3AJ'], '4': sdlList[i]['4AJ'], 
                          '5': sdlList[i]['5AJ'], '6': sdlList[i]['6AJ'], '7': sdlList[i]['7AJ'], '8': sdlList[i]['8AJ'], 
                          '9': sdlList[i]['9AJ'], '10': sdlList[i]['10AJ'], '11': sdlList[i]['11AJ'], '12': sdlList[i]['12AJ'], 
                          '13': sdlList[i]['13AJ'], '14': sdlList[i]['14AJ'], '15': sdlList[i]['15AJ']})

for i in range(len(sdlStatements)):
    keysTemp = list(sdlStatements[i].keys())
    for j in keysTemp:
        if (sdlStatements[i][j]==" "):
            sdlStatements[i][j] = "No Data Recorded"

sdlObjectList = []
for i in range(len(sdlList)):
    sdlObjectList.append(SDL(i, sdlStatements[i], sdlList[i]['InputVideo']))

In [None]:
for i in range(len(sdlObjectList)):
    if i%100 == 0:
        print(i)
    keysTemp = list(sdlObjectList[i].statements.keys())
    for j in (keysTemp):
        sdlObjectList[i].getDescriptors(sdlObjectList[i].statements[j], j)


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900


In [None]:
examples = [1,5730,1999]

for example in examples:
    print('Object %i: '%(example))
    print("Actors: ")
    print(sdlObjectList[example].statements)

    for a in range(len(sdlObjectList[example].actors)):
      actorsIndex = str(a+1)
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        print('Time segment:', actorsIndex, ",  %s: %s"%(sdlObjectList[example].actors[actorsIndex][j].description,sdlObjectList[example].actors[actorsIndex][j].action))
    print('Scene: ', sdlObjectList[example].scene)

Object 1: 
Actors: 
{'1': 'The car is stopped. The car is at an intersection with a red light.', '2': 'The car is accelerating through the intersection. The light at the intersection has changed to green', '3': 'No Data Recorded', '4': 'No Data Recorded', '5': 'No Data Recorded', '6': 'No Data Recorded', '7': 'No Data Recorded', '8': 'No Data Recorded', '9': 'No Data Recorded', '10': 'No Data Recorded', '11': 'No Data Recorded', '12': 'No Data Recorded', '13': 'No Data Recorded', '14': 'No Data Recorded', '15': 'No Data Recorded'}
Time segment: 1 ,  ego: stop
Time segment: 2 ,  ego: accelerate
Time segment: 3 ,  NaN: NaN
Time segment: 4 ,  NaN: NaN
Time segment: 5 ,  NaN: NaN
Time segment: 6 ,  NaN: NaN
Time segment: 7 ,  NaN: NaN
Time segment: 8 ,  NaN: NaN
Time segment: 9 ,  NaN: NaN
Time segment: 10 ,  NaN: NaN
Time segment: 11 ,  NaN: NaN
Time segment: 12 ,  NaN: NaN
Time segment: 13 ,  NaN: NaN
Time segment: 14 ,  NaN: NaN
Time segment: 15 ,  NaN: NaN
Scene:  {'1': ['intersect', '

In [None]:
# Count how many actors have associated actions
actor_count = 0
action_count = 0
for idx in range(len(sdlObjectList)):
    so = sdlObjectList[idx]
    for timestep in so.actors:
        for actor in so.actors[timestep]:
            if actor.description == "NaN":
                break
            actor_count+=1
            if actor.action != "":
                action_count+=1

print("There are %i actors and %i actions. Completeness: %f"%(actor_count,action_count,action_count/actor_count))

# Display the top 10 unclassified verbs
print("Top 10 unclassified verbs:")
dd = sorted(util.debug.items(), key = lambda kv:(-len(kv[1]),kv[0]))
cutoff = 10
for (key,value) in dd:
    print("  %s: %i"%(key,len(value)))
    cutoff-=1
    if cutoff <= 0:
        break

There are 35397 actors and 31971 actions. Completeness: 0.903212
Top 10 unclassified verbs:
  _None: 0


# SDL Representation with Matrices

In [None]:
import numpy as np 
'''

For each SDL object in sdlObjectList, (2 x n) matrix represents the action-actor pair with n being the number of actor action pairs, (1 x m) matrix 
where m represents the number of scene elements

'''
actor_encoding = {'light vehicle': 0, 'heavy vehicle': 1, 'cyclist': 2, 'pedestrian': 3, 'traffic': 4, 'ego': 5, 'NaN': 6}

action_encoding = {'turn': 0, 'left turn': 1, 'right turn': 3, 'merge': 4, 'accelerate': 5, 'brake': 6, 'stop': 7, 
                   'forward': 8, 'walk': 9, 'park': 10, 'drive': 11, 'reverse': 12, 'center merge': 13, 'left merge': 14, 
                   'right merge': 15, 'turn through': 16, 'merge u turn': 17, 'u-turn': 18, 'NaN': 19, '':20}

scene_encoding = {'intersect': 0, 'crosswalk': 1, 'bridge': 2, 'green light': 3, 'stop sign': 4, 'yield sign': 5, 'sign': 6, 
                  'u-turn': 7, 'traffic light': 8, 'traffic signal': 9, 'turn lane': 10, 'crosswalks': 11, 'green traffic light': 12, 
                  'light': 13, 'lights': 14, 'red light': 15, 'red traffic light': 16, 'signs': 17, 'traffic lights': 18, 
                  'yellow light': 19, 'yellow traffic light': 20, 'NaN': 21}

sdl_embeddings = []

for example in range(len(sdlObjectList)): #loops through 6996 sdl objects in sdlObjectList
    # Each sdl object has a 7x21x22 one hot encoding representing its action, actor and scene element
    for a in range(len(sdlObjectList[example].actors)): #loops through 15 time segments
      if sdlObjectList[example].statements[str(a+1)] == 'No Data Recorded':
        continue
      actorsIndex = str(a+1)
      actor_list = []
      action_list = []
      scene_list = []
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        actor_list.append(sdlObjectList[example].actors[actorsIndex][j].description)
        action_list.append(sdlObjectList[example].actors[actorsIndex][j].action)

      scene_list = sdlObjectList[example].scene[actorsIndex]
      
      # print("actor list: ", actor_list)
      # print("action list: ", action_list)
      # print("scene list: ", scene_list)

      actor_indices = []
      action_indices = []
      scene_indices = []

      if (len(actor_list) != len(action_list)):
        print("Actor and action list don't match up, this may cause 1 to 1 actor to action correspondence errors")
        break
      
      sdl_matrices = []
      actor_action_matrix = np.zeros((len(actor_list), 2))


      scene_length = 0
      if (len(scene_list) == 0):
          scene_length = 1
      else:
          scene_length = len(scene_list)    
      scene_matrix = np.zeros((scene_length,))


      for a_index in actor_list:
        actor_indices.append(actor_encoding[a_index])

      for act_index in action_list:
        action_indices.append(action_encoding[act_index])

      if ( ((len(scene_list)) > 0) and scene_list[0] != 'NaN'):
        for i in scene_list:
            scene_indices.append(scene_encoding[i])
      else:
        scene_indices.clear()
        scene_indices.append(21)

      if (len(actor_indices) != len(action_indices)):
        print("make sure each actor is matched up with an action")
        break

      for i, actor_index in enumerate(actor_indices):
          actor_action_matrix[i][0] = actor_index

      for j, action_index in enumerate(action_indices):
          actor_action_matrix[j][1] = action_index

      for k, scene_index in enumerate(scene_indices):
          scene_matrix[k] = scene_index

      sdl_matrices.append(actor_action_matrix)
      sdl_matrices.append(scene_matrix)
      sdl_embeddings.append(sdl_matrices)

#assert len(sdl_embeddings) == 104940, "length of sdlEmbeddings should be 104940"
print(len(sdl_embeddings))

26539


In [None]:
sdl_embeddings[45][1]

array([[5., 1.]])

# Matrix Representation Distance

In [None]:
extraction = []
for i in range(len(sdl_embeddings)):
    srt_ind = np.lexsort((sdl_embeddings[i][0][:,1],sdl_embeddings[i][0][:,0]))
    extraction.append([])
    extraction[-1].append([sdl_embeddings[i][0][ind,0:1].item() for ind in srt_ind]) # actors
    extraction[-1].append([sdl_embeddings[i][0][ind,1:2].item() for ind in srt_ind]) # actions
    extraction[-1].append(sorted(list(sdl_embeddings[i][1]))) # scenes
print(len(extraction))
# Each item in extraction will be of the form [actors], [actions], [scenes]

26539


In [None]:
uniq = []
for e in extraction:
    if not e in uniq:
        uniq.append(e)
print(len(uniq))

1182


In [None]:
def key(gen):
    return tuple(a for b in gen for a in b)

d = {}
for i in range(len(uniq)):
    d[key(uniq[i])] = i

In [None]:
from collections import Counter as cset

def dist(a,b):
    vector = []
    missing_scale = 2 # punish missing elements more than extra elements
    actor_scale = 9
    action_scale = 3
    scene_scale = 1
    # Actors
    actor_set1 = cset(a[0])
    actor_set2 = cset(b[0])
    actor_N = actor_set1 & actor_set2 # iNtersection
    vector.append(actor_scale*sum((actor_set2-actor_N).values())) # extra elements in b
    vector.append(actor_scale*missing_scale*sum((actor_set1-actor_N).values())) # elements missing from b
    actor_set1 = None
    actor_set2 = None
    # Actions
    action_set1 = cset(a[1])
    action_set2 = cset(b[1])
    action_N = action_set1 & action_set2 # iNtersection
    vector.append(action_scale*sum((action_set2-action_N).values())) # extra elements in b
    vector.append(action_scale*missing_scale*sum((action_set1-action_N).values())) # elements missing from b
    action_set1 = None
    action_set2 = None
    # Scenes
    scene_set1 = cset(a[2])
    scene_set2 = cset(b[2])
    scene_N = scene_set1 & scene_set2 # iNtersection
    vector.append(scene_scale*sum((scene_set2-scene_N).values())) # extra elements in b
    vector.append(scene_scale*missing_scale*sum((scene_set1-scene_N).values())) # elements missing from b
    return np.linalg.norm(vector) # Euclidean distance
    

In [None]:
size = len(uniq)
small_dist = np.zeros((size,size)) # a condensed distance matrix
for i in range(size):
    for j in range(size):
        small_dist[i][j] = dist(uniq[i],uniq[j])


In [None]:
import pickle as pkl
import numpy as np

path = "./Scenario2Vector"
dataset = "crowd_dict.pkl"
ground_truth = "groundtruth.pkl"

with open("%s/%s"%(path,dataset),"rb") as _in:
    similarity_dataset = pkl.load(_in)
    
with open("%s/%s"%(path,ground_truth),"rb") as _in:
    gt = pkl.load(_in)

In [None]:
print(similarity_dataset[10109])
print(gt[10109])

[14058, 2992, 590, 5262, 3269, 3812]
[0, 1, 2, 5, 3, 4]


In [None]:
metric_name = "bert_cond"
metric_output = {}

for base in similarity_dataset:
    metric_output[base] = [0]*len(similarity_dataset[base])
    output = []
    for comparison in similarity_dataset[base]:
        output.append( small_dist[d[key(extraction[base])]][d[key(extraction[comparison])]] )
    order = np.argsort(output)
    skip = 0
    for i,sort_order in enumerate(order):
        if skip > 0: # Skip videos that were already processed by the score
            skip-=1
            continue
        if output.count(output[sort_order]) > 1: # Multiple vids have the same score
            skip = output.count(output[sort_order])-1
            vals = list(range(i,i+skip+1))
            tie_breaker = []
            for j,value in enumerate(output):
                if value == output[sort_order]:
                    tie_breaker.append((j,gt[base][j]))
            nomatch = []
            for tie in tie_breaker:
                if tie[1] in vals:
                    vals.remove(tie[1])
                    metric_output[base][tie[0]] = tie[1]
                else:
                    nomatch.append(tie[0])
            for j,idx in enumerate(nomatch):
                metric_output[base][idx] = vals[j]
        else: # No ties in the metric score
            metric_output[base][sort_order] = i

print(metric_output[10109])
print(gt[10109])

[0, 1, 2, 4, 3, 5]
[0, 1, 2, 5, 3, 4]


In [None]:
import os

masterfile = "Scenario2Vector/master_ranks.pkl"

if os.path.isfile(masterfile):
    with open(masterfile,"rb") as _in:
        master_dict = pkl.load(_in)
else:
    master_dict = {}

abort = False
if metric_name in master_dict:
    print("Metric name already exists!, Type 'y' to overwrite:")
    char = input("> ")
    abort = True
    if char == 'y':
        abort = False
        
if not abort:
    master_dict[metric_name] = metric_output
    with open(masterfile,"wb") as _out:
        pkl.dump(master_dict,_out)

print(len(master_dict))


7
