# Imports

In [13]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# NLTK to find word stems
import nltk

# Data Cleaning

In [14]:
import pandas as pd # Pandas library enables data manipulation
data_url = "https://raw.githubusercontent.com/linklab-uva/Scenario2Vector/master/BDDX.csv?token=AH3QIX7CFN4LG3QSSDV5HOK67EBYU"
def load_bddx_data(csv_name):
    column_names = ['Index', 'InputVideo', '1S', '1E', '1A', '1J', '2S', '2E', '2A', '2J', '3S', '3E', '3A', '3J',
                    '4S', '4E', '4A', '4J','5S', '5E', '5A', '5J','6S', '6E', '6A', '6J','7S', '7E', '7A', '7J',
                    '8S', '8E', '8A', '8J','9S', '9E', '9A', '9J','10S', '10E', '10A', '10J','11S', '11E', '11A', '11J',
                    '12S', '12E', '12A', '12J','13S', '13E', '13A', '13J','14S', '14E', '14A', '14J','15S', '15E', '15A', '15J']
    
    return pd.read_csv(csv_name, names=column_names)
bddx = load_bddx_data(data_url)
bddx = bddx.drop(['1S', '1E','2S', '2E','3S', '3E','4S', '4E','5S', '5E','6S', '6E','7S', '7E','8S', '8E','9S', '9E','10S', '10E','11S', '11E','12S', '12E','13S', '13E','14S', '14E','15S', '15E', ], axis=1)
bddx = bddx.fillna("")

bddx['1AJ'] = bddx[['1A', '1J']].agg(' '.join, axis=1)
bddx['2AJ'] = bddx[['2A', '2J']].agg(' '.join, axis=1)
bddx['3AJ'] = bddx[['3A', '3J']].agg(' '.join, axis=1)
bddx['4AJ'] = bddx[['4A', '4J']].agg(' '.join, axis=1)
bddx['5AJ'] = bddx[['5A', '5J']].agg(' '.join, axis=1)
bddx['6AJ'] = bddx[['6A', '6J']].agg(' '.join, axis=1)
bddx['7AJ'] = bddx[['7A', '7J']].agg(' '.join, axis=1)
bddx['8AJ'] = bddx[['8A', '8J']].agg(' '.join, axis=1)
bddx['9AJ'] = bddx[['9A', '9J']].agg(' '.join, axis=1)
bddx['10AJ'] = bddx[['10A', '10J']].agg(' '.join, axis=1)
bddx['11AJ'] = bddx[['11A', '11J']].agg(' '.join, axis=1)
bddx['12AJ'] = bddx[['12A', '12J']].agg(' '.join, axis=1)
bddx['13AJ'] = bddx[['13A', '13J']].agg(' '.join, axis=1)
bddx['14AJ'] = bddx[['14A', '14J']].agg(' '.join, axis=1)
bddx['15AJ'] = bddx[['15A', '15J']].agg(' '.join, axis=1)

bddx = bddx.drop(['Index', '1A', '1J', '2A', '2J', '3A', '3J', '4A', '4J', '5A', '5J', '6A', '6J', '7A', '7J', '8A', '8J', '9A', '9J', '10A', '10J', '11A', '11J', '12A', '12J', '13A', '13J', '14A', '14J', '15A', '15J', ], axis=1)
bddx = bddx.drop(bddx.index[0])

# SDL Class

In [15]:
class Actor:
    def __init__(self, description):
        """
        self.description should be one of the following:
          ego
          light vehicle
          heavy vehicle
          cyclist
          pedestrian
        """
        self.description = description
        self.action = ""

In [16]:
class SDL_Util:
    def __init__(self):
        self.stemmer = nltk.stem.PorterStemmer()
        # stemmer allows us to match words with the same roots: e.g., it identifies "turns", "turning", and "turn" as the same word
        # But it recognizes "slow" and "slowly" as different, and it thinks "go" and "goes" are different

        self.actor_list = {'car':'light vehicle',
                           'bus':'heavy vehicle',
                           'truck':'heavy vehicle',
                           'cyclist':'cyclist',
                           'pedestrian':'pedestrian',
                           'ambulance':'heavy vehicle',
                           'minivan':'light vehicle',
                           'traffic':'traffic',
                           'the car':'light vehicle'
                           }
        self.scene_list = ['intersection',
                           'crosswalk',
                           'bridge',
                           'light',
                           'sign',
                           'traffic light',
                           'traffic signal',
                           'turn lane',
                           ]

        init_action_list = {'turn':'turn',
                            'merge':'merge',
                            'swerve':'merge',
                            'veer':'merge',
                            'switch':'merge',
                            'accelerate':'accelerate',
                            'pick':'accelerate',
                            'brake':'brake',
                            'slow':'brake',
                            'reduce':'brake',
                            'decelerate':'brake',
                            'stop':'stop',
                            'wait':'stop',
                            'sit':'stop',
                            'forward':'forward',
                            'move':'forward',
                            'stay':'forward',
                            'maintain':'forward',
                            'proceeds':'forward',
                            'proceed':'forward',
                            'inch':'forward',
                            'pass':'forward',
                            'roll':'forward',
                            'advance':'forward',
                            'drive':'drive',
                            'steer':'drive',
                            'go':'drive',
                            'goes':'drive',
                            'head':'drive',
                            'pull':'drive',
                            'travel':'drive',
                            'flow':'drive',
                            'reverse':'reverse',
                            'walk':'walk',
                            'cross':'walk',
                            'park':'park',
                            'drift':'forward', # Not sure about this one, but it's not changing lanes so I'm calling it "forward"
                            'block':'stop', # Vehicles that are blocking generally are stopped in the path of the ego
                            'enter':'merge',
                            'straighten':'forward',
                            'follow':'forward',
                            'shift':'merge',
                            'change':'merge',
                            'stand':'stop', # Standing still
                            'curve':'forward',
                           }
        self.adjective_action = {'clear':'_negative', # traffic is clear
                                 'stationary':'stop', # car is stationary
                                 'complete':'stop', # is at a complete stop
                                 'heavy':'forward', # traffic is heavy
                                 'accelerating':'accelerate', # car is accelerating
                                 'light':'_negative', # traffic is light
                                 'slow':'forward', # traffic is slow
                                 'parallel':'reverse', # car is parallel parking
                                 'rolling':'forward', # car is rolling forward
                                 'driving':'drive', # car is driving
                                 'full':'stop', # car is at a full stop
                                 'stopped':'stop', # car is stopped
                                 'busy':'forward', # traffic is busy
                                 'double':'stop', # car is double parked
                                 'double-parked':'stop', # car is double-parked
                                 'idle':'stop', # car is idle
                                 'stopping':'brake', # car is stopping
                                 'minimal':'_negative', # traffic is minimal
                                 'empty':'_negative', # traffic is empty
                                 'moderate':'forward', # traffic is moderate
                                }
        self.action_list = {}
        for a in init_action_list:
            self.action_list[self.stemmer.stem(a)] = init_action_list[a]

        self.light_status = ["green","yellow","red"]
        self.directions = ["left","right","u-turn","uturn","through","backward","down","straight"]

        self.debug = {"_None":[]}
    def update_debug(self,word,phrase):
        if not word in self.debug:
            self.debug[word] = []
        self.debug[word].append(phrase)

util = SDL_Util()

class SDL:
  def __init__(self, index, statements, link):
    self.statements = statements
    self.index = index
    self.videoLink = link
    
    self.actors = {'1':[], '2':[], '3':[], '4':[], '5':[], '6':[], '7':[], 
                   '8':[], '9':[], '10':[], '11':[], '12':[], '13':[], '14':[], '15':[]}
    self.scene = {'1':[], '2':[], '3':[], '4':[], '5':[], '6':[], '7':[], 
                   '8':[], '9':[], '10':[], '11':[], '12':[], '13':[], '14':[], '15':[]}
        
  def getDescriptors(self, statement, timeSegment): 

    init_phrase = nltk.word_tokenize(statement.lower())
    phrase = nltk.pos_tag(init_phrase)

    if(statement == "No Data Recorded"):
      self.actors[timeSegment].append(Actor("NaN"))
      self.actors[timeSegment][-1].action = "NaN"
      self.scene[timeSegment].append("NaN")
    
    skip = 0
    actor_locs = []
    for idx,word in enumerate(phrase):
        while skip > 0:
            skip-=1
            continue
        if word[1][0] == "N": # Word is a noun
            element = word[0]
            for i in range(idx+1,len(phrase)):
                if phrase[i][1][0] == "N": # consecutive nouns should be together, e.g. "traffic light"
                    element+=" "+phrase[i][0]
                    skip+=1
                elif element == "traffic" and (phrase[i][0] == "light" or phrase[i][0] == "signal"):
                    element+=" "+phrase[i][0]
                    skip+=1
                else:
                    break
            if element == "car" and (idx > 0 and phrase[idx-1][0] == "the") and (idx+1 < len(phrase) and phrase[idx+1] != 'in'):
                element = "the car" # "the car" always refers to the ego, except in the case of "the car in front"
            elif element == "sign":
                # Find the type of sign, e.g. "stop sign"
                if idx > 0 and phrase[idx-1][1][0] == "J":
                    element = phrase[idx-1][0]+" "+element
            if util.stemmer.stem(element) in util.actor_list or util.stemmer.stem(element) in util.scene_list:
                actor_locs.append((element,idx))
        elif word[1] == ".":
            actor_locs.append((".",idx))
    for idx in range(len(actor_locs)):
        if idx+1 < len(actor_locs):
            search_phrase = phrase[actor_locs[idx][1]:actor_locs[idx+1][1]]
        else:
            search_phrase = phrase[actor_locs[idx][1]:]
        if util.stemmer.stem(actor_locs[idx][0]) in util.actor_list:
            is_positive = True
            negative = ["no","clear","lack","free"]
            for i in range(max(0,actor_locs[idx][1]-3),actor_locs[idx][1]):
                if phrase[i][0] in negative:
                    is_positive = False # don't add an actor if the phrase is "there are no cars"
                    break
            if is_positive:
                self.compose_actor(search_phrase,actor_locs[idx][0],timeSegment)
        elif util.stemmer.stem(actor_locs[idx][0]) in util.scene_list:
            #TODO: Check lights to see if the color is 1 word earlier (e.g., "red light")
            self.compose_scene(search_phrase,actor_locs[idx][0],timeSegment)
                
  def compose_actor(self,phrase,actor,timeSegment):
    if len(self.actors[timeSegment]) == 0:
        self.actors[timeSegment].append(Actor("ego"))
        action = self.find_action(phrase)
        if action and action != "_negative":
            self.actors[timeSegment][-1].action = action
    elif actor == 'the car':
        return # "the car" always refers to ego
    elif util.stemmer.stem(actor) == 'pedestrian': # Pedestrians always walk
        self.actors[timeSegment].append(Actor(util.actor_list[util.stemmer.stem(actor)]))
        self.actors[timeSegment][-1].action = util.action_list[util.stemmer.stem("walk")]
    else:
        action = self.find_action(phrase)
        if action == "_negative":
            return # don't add an actor if the phrase is "the traffic is clear"
        self.actors[timeSegment].append(Actor(util.actor_list[util.stemmer.stem(actor)]))
        if action:
            self.actors[timeSegment][-1].action = action
            
  def find_action(self,phrase):
    delay_verbs = ["come","make","continu","begin","start","complet","tri"] # Verbs where the actual action is later in the sentence
    exist_verbs = ["is","are","remain"]
    action = ""
    action_index = 0
    for i in range(1,len(phrase)):
        if phrase[i][1][0:2] == "RB": # Ignore adverbs
            continue
        elif phrase[i][1] == ".": # Sentence has ended. Stop looking for a verb
            break
        if phrase[i][1][0] == "V":
            action = phrase[i][0]
            action_index = i
            for j in range(i+1,len(phrase)):
                if phrase[j][1][0:2] == "RB": # Ignore adverbs
                    continue
                elif phrase[j][1][0] == "V": # If there are multiple verbs in a row, take the last
                    if phrase[j][0] == "left": # "left" is getting misidentified as a verb, ignore it
                        break
                    action = phrase[j][0]
                    action_index = j
                else:
                    break
            break
    if action == "":
        util.update_debug("_None",phrase)
        return None
    elif util.stemmer.stem(action) in util.action_list:
        action = util.action_list[util.stemmer.stem(action)]
    # Start of hand-crafted rules exclusive to this dataset
    elif action_index+1 < len(phrase) and phrase[action_index+1][0] == "forward":
        return util.action_list[util.stemmer.stem("forward")]
    elif util.stemmer.stem(action) in delay_verbs:
        better_action = False
        for i in range(action_index+1,len(phrase)):
            if phrase[i][1][0:2] == "RB": # Ignore adverbs
                continue
            if util.stemmer.stem(phrase[i][0]) in util.action_list:
                word = phrase[i][0]
                for j in range(i+1,len(phrase)):
                    if phrase[j][1][0:2] == "RB": # Ignore adverbs
                        continue
                    elif phrase[i][1][0] == "V" and phrase[j][1][0] == "V":
                        word = phrase[j][0]
                    elif phrase[i][1][0] == "N" and phrase[j][1][0] == "N":
                        word = phrase[j][0]
                    else:
                        break
                if util.stemmer.stem(word) in util.action_list:
                    action = util.action_list[util.stemmer.stem(word)]
                    better_action = True
                elif word == "hand":
                    action = util.action_list["turn"]
                    better_action = True
            elif phrase[i][0] in util.directions:
                action = util.action_list["drive"]
                better_action = True
            if better_action:
                break
        if not better_action:
            if util.stemmer.stem(action) == "continu":
                # The only continues unclassified at this point are "forward"
                return util.action_list[util.stemmer.stem("forward")]
            elif util.stemmer.stem(action) == util.stemmer.stem("come"):
                if action_index+1 < len(phrase) and phrase[action_index+1][0] == "to":
                    return util.action_list[util.stemmer.stem("stop")] # the car comes to a stop
                else:
                    action = "drive"
            else:
                util.update_debug(util.stemmer.stem(action),phrase)
                return None
    elif util.stemmer.stem(action) in exist_verbs:
        if action_index+1 < len(phrase) and phrase[action_index+1][0] == "at":
            # traffic is at a stop / standstill / red
            return util.action_list[util.stemmer.stem("stop")]
        adj = ""
        for i in range(action_index+1,len(phrase)):
            if phrase[i][1][0] == "R":
                continue # Ignore adverbs
            elif phrase[i][1][0] == "J":
                adj = phrase[i][0]
                break
            else:
                break
        if adj in util.adjective_action:
            action = util.adjective_action[adj]
        else:
            util.update_debug(util.stemmer.stem(action),phrase)
            return None
    elif util.stemmer.stem(action) == util.stemmer.stem("speed"):
        if action_index+1 < len(phrase) and phrase[action_index+1][0] == "up":
            return util.action_list[util.stemmer.stem("accelerate")]
        elif action_index+1 < len(phrase) and phrase[action_index+1][0] == "down":
            return util.action_list[util.stemmer.stem("forward")]
        else:
            util.update_debug(util.stemmer.stem(action),phrase)
            return None
    elif util.stemmer.stem(action) == util.stemmer.stem("back"):
        if action == "backed" and action_index > 0 and phrase[action_index-1][0] == "is":
            return util.action_list[util.stemmer.stem("stop")] # traffic is backed up
        else:
            return util.action_list[util.stemmer.stem("reverse")]
    elif util.stemmer.stem(action) == util.stemmer.stem("clear"):
        if action_index > 0 and phrase[action_index-1][0] == "to":
            return util.action_list[util.stemmer.stem("forward")] # waiting for traffic to clear
        else:
            return "_negative" # traffic has cleared
    else:
        util.update_debug(util.stemmer.stem(action),phrase)
        return None

    if action == "drive": # "drive" needs more information
        direction = self.find_direction(phrase,action_index)
        if direction == "left" or direction == "right":
            return "turn "+direction
        elif direction == "through" or direction == "down" or direction == "straight":
            return "forward"
        elif direction == "backward":
            return "reverse"
        elif direction == "u-turn":
            return "u-turn"
        else:
            return "forward"
    elif action == "turn":
        direction = self.find_direction(phrase,action_index)
        if direction == "left" or direction == "right":
            return "turn "+direction
        elif direction == "u-turn":
            return "u-turn"
        else:
            return "turn"
    elif action == "merge":
        direction = self.find_direction(phrase,action_index)
        if direction == "left" or direction == "right":
            return "merge "+direction
        else:
            return "merge"
    else:
        return action
    
  def find_direction(self,phrase,index):
    if index > 0 and (phrase[index-1][0] == "left" or phrase[index-1][0] == "right"):
        return phrase[index-1][0]
    for i in range(index+1,len(phrase)):
        if phrase[i][0] in util.directions:
            if phrase[i][0] == "left":
                if i+2 < len(phrase) and phrase[i+1][0] == "to" and phrase[i+2][0] == "right":
                    return "right"
                else:
                    return "left"
            elif phrase[i][0] == "right":
                if i+2 < len(phrase) and phrase[i+1][0] == "to" and phrase[i+2][0] == "left":
                    return "left"
                else:
                    return "right"
            elif phrase[i][0] == "u-turn" or phrase[i][0] == "uturn":
                return "u-turn"
            else:
                return phrase[i][0]
        elif phrase[i][0] == "front" and (i > 0 and phrase[i-1][0] == "in"):
            #n-gram "in front"
            return "center"
        elif phrase[i][0] == "u" and (i+1 < len(phrase) and phrase[i+1][0] == "turn"):
            #n-gram "u turn"
            return "u-turn"
        elif phrase[i][0] == "its" and (i+1 < len(phrase) and phrase[i+1][0] == "lane"):
            #n-gram "its lane"
            return "center"
        if phrase[i][1][0] == "V":
            return None
    return None
            
  def compose_scene(self,phrase,scene,timeSegment):
    element = scene.split(" ")
    if element[-1] == "light":
        status = ""
        for ls in util.light_status:
            for word in phrase:
                if word[0] == ls:
                    status = ls
                    break
            if status != "":
                break
        if status != "":
            self.scene[timeSegment].append(status+" "+scene)
            return
        else:
            self.scene[timeSegment].append(scene)
            return
    else:
        self.scene[timeSegment].append(scene)
        return



In [17]:
sdlList = []
for index, row in bddx.iterrows():
    sdlList.append(row.astype(str))
assert len(sdlList) == 6996, "length of sdl list should be 6996"

In [18]:
sdlStatements = []
for i in range(len(sdlList)):
  sdlStatements.append({'1': sdlList[i]['1AJ'], '2': sdlList[i]['2AJ'], '3': sdlList[i]['3AJ'], '4': sdlList[i]['4AJ'], 
                        '5': sdlList[i]['5AJ'], '6': sdlList[i]['6AJ'], '7': sdlList[i]['7AJ'], '8': sdlList[i]['8AJ'], 
                        '9': sdlList[i]['9AJ'], '10': sdlList[i]['10AJ'], '11': sdlList[i]['11AJ'], '12': sdlList[i]['12AJ'], 
                        '13': sdlList[i]['13AJ'], '14': sdlList[i]['14AJ'], '15': sdlList[i]['15AJ']})

for i in range(len(sdlStatements)):
  keysTemp = list(sdlStatements[i].keys())
  for j in keysTemp:
    if (sdlStatements[i][j]==" "):
      sdlStatements[i][j] = "No Data Recorded"

sdlObjectList = []
for i in range(len(sdlList)):
  sdlObjectList.append(SDL(i, sdlStatements[i], sdlList[i]['InputVideo']))

In [19]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
for i in range(len(sdlObjectList)):
  keysTemp = list(sdlObjectList[i].statements.keys())
  for j in (keysTemp):
    sdlObjectList[i].getDescriptors(sdlObjectList[i].statements[j], j)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [20]:
examples = [1,5730,1999] 

for example in examples:
    print('Object %i: '%(example))
    print("Actors: ")
    print(sdlObjectList[example].statements)

    for a in range(len(sdlObjectList[example].actors)):
      actorsIndex = str(a+1)
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        print('Time segment:', actorsIndex, ",  %s: %s"%(sdlObjectList[example].actors[actorsIndex][j].description,sdlObjectList[example].actors[actorsIndex][j].action))
    print('Scene: ', sdlObjectList[example].scene)

Object 1: 
Actors: 
{'1': 'The car is stopped. The car is at an intersection with a red light.', '2': 'The car is accelerating through the intersection. The light at the intersection has changed to green', '3': 'No Data Recorded', '4': 'No Data Recorded', '5': 'No Data Recorded', '6': 'No Data Recorded', '7': 'No Data Recorded', '8': 'No Data Recorded', '9': 'No Data Recorded', '10': 'No Data Recorded', '11': 'No Data Recorded', '12': 'No Data Recorded', '13': 'No Data Recorded', '14': 'No Data Recorded', '15': 'No Data Recorded'}
Time segment: 1 ,  ego: stop
Time segment: 2 ,  ego: accelerate
Time segment: 3 ,  NaN: NaN
Time segment: 4 ,  NaN: NaN
Time segment: 5 ,  NaN: NaN
Time segment: 6 ,  NaN: NaN
Time segment: 7 ,  NaN: NaN
Time segment: 8 ,  NaN: NaN
Time segment: 9 ,  NaN: NaN
Time segment: 10 ,  NaN: NaN
Time segment: 11 ,  NaN: NaN
Time segment: 12 ,  NaN: NaN
Time segment: 13 ,  NaN: NaN
Time segment: 14 ,  NaN: NaN
Time segment: 15 ,  NaN: NaN
Scene:  {'1': ['light'], '2':

In [21]:
# Count how many actors have associated actions
actor_count = 0
action_count = 0
for idx in range(len(sdlObjectList)):
    so = sdlObjectList[idx]
    for timestep in so.actors:
        for actor in so.actors[timestep]:
            if actor.description == "NaN":
                break
            actor_count+=1
            if actor.action != "":
                action_count+=1

print("There are %i actors and %i actions. Completeness: %f"%(actor_count,action_count,action_count/actor_count))

# Display the top 10 unclassified verbs
print("Top 10 unclassified verbs:")
dd = sorted(util.debug.items(), key = lambda kv:(-len(kv[1]),kv[0]))
cutoff = 10
for (key,value) in dd:
    print("  %s: %i"%(key,len(value)))
    cutoff-=1
    if cutoff <= 0:
        break

There are 31314 actors and 28396 actions. Completeness: 0.906815
Top 10 unclassified verbs:
  _None: 1968
  is: 159
  ha: 27
  are: 22
  get: 21
  take: 21
  begin: 20
  straight: 19
  approach: 18
  jam: 18


# One-hot tensor encoding

In [22]:
import tensorflow as tf
import numpy as np

import sys
np.set_printoptions(threshold=sys.maxsize) #print whole numpy array for debuggingg purposes


print(tf.__version__)

2.2.0


![alt text](https://drive.google.com/uc?id=1BUKjTESqh-TERLFVFF2aogMLEsFMi4qM)

In [23]:
# Returns a list of unique actors, actions, and scenes

actors = []
actions = []
scenes = []

# Find unique identifiers in each category
for sdl in sdlObjectList:
    for timestep in range(1,len(sdl.actors)+1):
        for obj in sdl.actors[str(timestep)]:
            if not obj.description in actors:
                actors.append(obj.description)
            if not obj.action in actions:
                actions.append(obj.action)
        for obj in sdl.scene[str(timestep)]:
            if not obj in scenes:
                scenes.append(obj)

# Transform lists into (key,index) pairs
actor_encoding = {}
scene_encoding = {}
action_encoding = {}
for idx in range(len(actors)):
    actor_encoding[actors[idx]] = idx
for idx in range(len(scenes)):
    scene_encoding[scenes[idx]] = idx
for idx in range(len(actions)):
    action_encoding[actions[idx]] = idx


In [24]:
'''
- Each sdl object has a 7x21x22 one hot encoding representing its action, actor and scene element
- These one hot encodings are stacked on top of each other to produce a 4D tensor [examples x actor x action x scene]
- Last element of each dimension represents an NaN value (or empty string for action)

For each SDL object in sdlObjectList, 15 (7 x 21 x 22) numpy arrays are generated to represent the 15 time segments in each object

'''
actor_encoding = {'light vehicle': 0, 'heavy vehicle': 1, 'cyclist': 2, 'pedestrian': 3, 'traffic': 4, 'ego': 5, 'NaN': 6}

action_encoding = {'turn': 0, 'turn left': 1, 'turn right': 3, 'merge': 4, 'accelerate': 5, 'brake': 6, 'stop': 7, 
                   'forward': 8, 'walk': 9, 'park': 10, 'drive': 11, 'reverse': 12, 'merge center': 13, 'merge left': 14, 
                   'merge right': 15, 'turn through': 16, 'merge u turn': 17, 'u-turn': 18, 'NaN': 19, '':20}

scene_encoding = {'intersection': 0, 'crosswalk': 1, 'bridge': 2, 'green light': 3, 'stop sign': 4, 'yield sign': 5, 'sign': 6, 
                  'u-turn': 7, 'traffic light': 8, 'traffic signal': 9, 'turn lane': 10, 'crosswalks': 11, 'green traffic light': 12, 
                  'light': 13, 'lights': 14, 'red light': 15, 'red traffic light': 16, 'signs': 17, 'traffic lights': 18, 
                  'yellow light': 19, 'yellow traffic light': 20, 'NaN': 21}

one_hot_sdlEmbedding = []

examples = [3, 413]
for example in range(len(sdlObjectList)): #loops through 6996 sdl objects in sdlObjectList
    # Each sdl object has a 7x21x22 one hot encoding representing its action, actor and scene element
    for a in range(len(sdlObjectList[example].actors)): #loops through 15 time segments
      #print("time segments: ", len(sdlObjectList[example].actors))
      #print(a)
      indices = np.zeros((7,21,22))
      actorsIndex = str(a+1)
      actor_list = []
      action_list = []
      scene_list = []
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        actor_list.append(sdlObjectList[example].actors[actorsIndex][j].description)
        action_list.append(sdlObjectList[example].actors[actorsIndex][j].action)

      scene_list.append(sdlObjectList[example].scene[actorsIndex])
      
      #print("actor list: ", actor_list)
      #print("action list: ", action_list)
      #print("scene list: ", scene_list)

      actor_indices = []
      action_indices = []
      scene_indices = []


      if(len(actor_list) != len(action_list)):
        print("Actor and action list don't match up, this may cause 1 to 1 actor to action correspondence errors")
        break
      
      for a_index in actor_list:
        actor_indices.append(actor_encoding[a_index])

      for act_index in action_list:
        action_indices.append(action_encoding[act_index])

      if( ((len(scene_list)) > 0) and scene_list[0] != 'NaN'):
        for i in scene_list:
          for j in i:
            scene_indices.append(scene_encoding[j])
      else:
        scene_indices.clear()
        scene_indices.append(21)

      if(len(actor_indices) != len(action_indices)):
        print("make sure each actor is matched up with an action")
        break
      # if each sdl has an actor paired with each action, how do we account for multiple scene elements
      #print("actor indices: ", actor_indices)
      #print("action indices: ", action_indices)
      #print("scene indices: ", scene_indices)

      if(len(scene_indices) > 0):
        for scene_index in scene_indices:
          for i, actor_index in enumerate(actor_indices):
            # since there is a one to one mapping between actor and actions, we can use the same index
            action_index = action_indices[i]
            #print("for")
            #print("actor index: ", actor_index, " action index: ", action_index, " scene_index: ", scene_index)
            indices[actor_index][action_index][scene_index] = 1.0
      else:
        scene_index = 21
        for i, actor_index in enumerate(actor_indices):
          action_index = action_indices[i]
          #print("else")
          #print("actor index: ", actor_index, " action index: ", action_index, " scene_index: ", scene_index)
          indices[actor_index][action_index][scene_index] = 1.0

      one_hot_sdlEmbedding.append(tf.convert_to_tensor(indices))
assert len(one_hot_sdlEmbedding) == 104940, "length of one_hot_sdlEmbedding should be 104940"

In [25]:
sparse_tensor_list = []
for item in one_hot_sdlEmbedding:
  item = tf.expand_dims(item, axis=0)
  sparse_tensor_list.append(tf.sparse.from_dense(item))

assert len(sparse_tensor_list) == 104940, "length of sparse_tensor list should be 104940"
sdl_encoding= tf.sparse.concat(0, sparse_tensor_list)
sdl_encoding.shape

TensorShape([104940, 7, 21, 22])

# SDL ID Generation Based Loosely on https://oas.voyage.auto/scenarios/

In [26]:
'''
Format of SDL embedding: Actor:Action, Actor:Action...-SceneElement
'''
actor_encoding = {'light vehicle': 0, 'heavy vehicle': 1, 'cyclist': 2, 'pedestrian': 3, 'traffic': 4, 'ego': 5, 'NaN': 6}

action_encoding = {'turn': 0, 'turn left': 1, 'turn right': 3, 'merge': 4, 'accelerate': 5, 'brake': 6, 'stop': 7, 
                   'forward': 8, 'walk': 9, 'park': 10, 'drive': 11, 'reverse': 12, 'merge center': 13, 'merge left': 14, 
                   'merge right': 15, 'turn through': 16, 'merge u turn': 17, 'u-turn': 18, 'NaN': 19, '':20}

scene_encoding = {'intersection': 0, 'crosswalk': 1, 'bridge': 2, 'green light': 3, 'stop sign': 4, 'yield sign': 5, 'sign': 6, 
                  'u-turn': 7, 'traffic light': 8, 'traffic signal': 9, 'turn lane': 10, 'crosswalks': 11, 'green traffic light': 12, 
                  'light': 13, 'lights': 14, 'red light': 15, 'red traffic light': 16, 'signs': 17, 'traffic lights': 18, 
                  'yellow light': 19, 'yellow traffic light': 20, 'NaN': 21}
scene_encoding_keysList = list(scene_encoding.keys())
scene_encoding_valuesList = list(scene_encoding.values())
sdl_ids = []

for example in range(len(sdlObjectList)): #loops through 6996 sdl objects in sdlObjectList
    # Each sdl object has a 7x21x22 one hot encoding representing its action, actor and scene element
    for a in range(len(sdlObjectList[example].actors)): #loops through 15 time segments
      #print("time segments: ", len(sdlObjectList[example].actors))
      #print(a)
      sdl_id = []
      actorsIndex = str(a+1)
      actor_list = []
      action_list = []
      scene_list = []
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        actor_list.append(sdlObjectList[example].actors[actorsIndex][j].description)
        action_list.append(sdlObjectList[example].actors[actorsIndex][j].action)

      scene_list.append(sdlObjectList[example].scene[actorsIndex])
      
      #print("actor list: ", actor_list)
      #rint("action list: ", action_list)
      #print("scene list: ", scene_list)

      actor_indices = []
      action_indices = []
      scene_indices = []


      if(len(actor_list) != len(action_list)):
        print("Actor and action list don't match up, this may cause 1 to 1 actor to action correspondence errors")
        break
      
      for a_index in actor_list:
        actor_indices.append(actor_encoding[a_index])

      for act_index in action_list:
        action_indices.append(action_encoding[act_index])

      if( ((len(scene_list)) > 0) and scene_list[0] != 'NaN'):
        for i in scene_list:
          for j in i:
            scene_indices.append(scene_encoding[j])
      else:
        scene_indices.clear()
        scene_indices.append(21)

      if(len(actor_indices) != len(action_indices)):
        print("make sure each actor is matched up with an action")
        break
      # if each sdl has an actor paired with each action, how do we account for multiple scene elements
      #print("actor indices: ", actor_indices)
      #print("action indices: ", action_indices)
      #print("scene indices: ", scene_indices)

      if(len(scene_indices) > 0):
        for i, actor in enumerate(actor_list):
        # since there is a one to one mapping between actor and actions, we can use the same index
            sdl_id.append(",")
            sdl_id.append(actor)
            sdl_id.append(":") 
            sdl_id.append(action_list[i])
            sdl_id.append("-")
        for j in scene_indices:
            sdl_id.append(scene_encoding_keysList[scene_encoding_valuesList.index(j)])
        sdl_id.append(".")

      else:
        for i, actor in enumerate(actor_list):
            sdl_id.append(",")
            sdl_id.append(actor)
            sdl_id.append(":") 
            sdl_id.append(action_list[i])
            #sdl_id.append("-")
        for j in scene_indices:
            sdl_id.append(scene_encoding_keysList[scene_encoding_valuesList.index(j)])
        sdl_id.append(".")

      sdl_id.pop(0)
      sdl_ids.append("".join(sdl_id))
assert len(sdl_ids) == 104940, "length of sdl_ids should be 104940"

# Doc2Vec

Rough implementation of Doc2Vec for bddx dataset, and a simple distance measurement between two "similar sentences" and our corresponding sdl_embedding. 

In [64]:
# prep list of sentences
bddx_statements = []
for i in range(len(sdlList)):
  bddx_statements.append(sdlList[i]['1AJ'])
  bddx_statements.append(sdlList[i]['2AJ'])
  bddx_statements.append(sdlList[i]['3AJ'])
  bddx_statements.append(sdlList[i]['4AJ'])
  bddx_statements.append(sdlList[i]['5AJ'])
  bddx_statements.append(sdlList[i]['6AJ'])
  bddx_statements.append(sdlList[i]['7AJ'])
  bddx_statements.append(sdlList[i]['8AJ'])
  bddx_statements.append(sdlList[i]['9AJ'])
  bddx_statements.append(sdlList[i]['10AJ'])
  bddx_statements.append(sdlList[i]['11AJ'])
  bddx_statements.append(sdlList[i]['12AJ'])
  bddx_statements.append(sdlList[i]['13AJ'])
  bddx_statements.append(sdlList[i]['14AJ'])
  bddx_statements.append(sdlList[i]['15AJ'])
assert len(bddx_statements) == 104940, "length of bddx_statements should be 104940"

https://medium.com/@mishra.thedeepak/doc2vec-simple-implementation-example-df2afbbfbad5

In [None]:
bddx_statements = ["nan" if (x == "" or x==" ") else x for x in bddx_statements]

In [None]:
#Import all the dependencies
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.tokenize import word_tokenize

In [None]:
tagged_data = [TaggedDocument(words=word_tokenize(_d.lower()), tags=[str(i)]) for i, _d in enumerate(bddx_statements)]

In [None]:
max_epochs = 40
vec_size = 20
alpha = 0.025

model = Doc2Vec(size=vec_size,
                alpha=alpha, 
                min_alpha=0.00025,
                min_count=1,
                dm =1)
  
model.build_vocab(tagged_data)

for epoch in range(max_epochs):
    print('iteration {0}'.format(epoch))
    model.train(tagged_data,
                total_examples=model.corpus_count,
                epochs=model.iter)
    # decrease the learning rate
    model.alpha -= 0.0002
    # fix the learning rate, no decay
    model.min_alpha = model.alpha

model.save("d2v.model")
print("Model Saved")



iteration 0




iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
iteration 6
iteration 7
iteration 8
iteration 9
iteration 10
iteration 11
iteration 12
iteration 13
iteration 14
iteration 15
iteration 16
iteration 17
iteration 18
iteration 19
iteration 20
iteration 21
iteration 22
iteration 23
iteration 24
iteration 25
iteration 26
iteration 27
iteration 28
iteration 29
iteration 30
iteration 31
iteration 32
iteration 33
iteration 34
iteration 35
iteration 36
iteration 37
iteration 38
iteration 39


  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


Model Saved


In [None]:
from gensim.models.doc2vec import Doc2Vec

model= Doc2Vec.load("d2v.model")

assert len(model.docvecs) == 104940, "Number of vectors, should be 104940

Number of vectors, should be 104940 == sentences in dataset == size of sdl embedding:  104940


  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


# Doc2Vec for Open Autonomous Vehicle Embedding 


In [27]:
#Import all the dependencies
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.tokenize import word_tokenize

In [28]:
tagged_data = [TaggedDocument(words=word_tokenize(_d.lower()), tags=[str(i)]) for i, _d in enumerate(sdl_ids)]

In [29]:
max_epochs = 150
vec_size = 100
alpha = 0.0025

model = Doc2Vec(size=vec_size,
                alpha=alpha, 
                min_alpha=0.00025,
                min_count=1,
                dm =1)
  
model.build_vocab(tagged_data)

for epoch in range(max_epochs):
    print('iteration {0}'.format(epoch))
    model.train(tagged_data,
                total_examples=model.corpus_count,
                epochs=model.iter)
    # decrease the learning rate
    model.alpha -= 0.0002
    # fix the learning rate, no decay
    model.min_alpha = model.alpha

model.save("d2v_sdl_ids.model")
print("Model Saved")



iteration 0




iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
iteration 6
iteration 7
iteration 8
iteration 9
iteration 10
iteration 11
iteration 12
iteration 13
iteration 14
iteration 15
iteration 16
iteration 17
iteration 18
iteration 19
iteration 20
iteration 21
iteration 22
iteration 23
iteration 24
iteration 25
iteration 26
iteration 27
iteration 28
iteration 29
iteration 30
iteration 31
iteration 32
iteration 33
iteration 34
iteration 35
iteration 36
iteration 37
iteration 38
iteration 39


  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


Model Saved


In [30]:
from gensim.models.doc2vec import Doc2Vec

model_sdl_ids = Doc2Vec.load("d2v_sdl_ids.model")
assert len(model_sdl_ids.docvecs) == 104940, "Number of vectors, should be 104940"

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [46]:
# 1. prove that doc2vec model can generate similar SDL objects
# 2. show that similar SDLs have similar vector distances
# 3. generate a list of similar SDLs for a single reference SDL 
# 4. generate a list of similar SDLs to the same reference SDL using the previous doc2vec model
# 5. compare the two generated lists for similar returned SDLs and the extra ones returned by our SDL model 

from scipy import spatial
first_text = sdl_ids[6195]
second_text = sdl_ids[45]
print(sdl_ids[30])
print('first text: ', first_text)
print('second text: ', second_text)

vec1 = model.docvecs[6195]
vec2 = model.docvecs[30]

similairty = spatial.distance.cosine(vec1, vec2)
euclidean = spatial.distance.euclidean(vec1, vec2)
print(euclidean)
print(similairty)

ego:accelerate-,traffic:forward-green light.
first text:  ego:forward,traffic:forward.
second text:  ego:forward,traffic:forward.
3.860210657119751
0.8689534217119217


In [86]:
print(sdl_ids[50])
print(bddx_statements[50])
bddx_statements_condensed = list(filter(lambda a: a != " ", bddx_statements))
print(len(bddx_statements_condensed))
sdl_ids_condensed = list(filter(lambda a: a != "NaN:NaN-NaN.", sdl_ids))
print(len(sdl_ids_condensed))


NaN:NaN-NaN.
 
26539
26539


# Experiment for Open Autonomous Safety SDLs

In [76]:
# https://stackoverflow.com/questions/50861237/is-there-an-alternative-to-difflib-get-close-matches-that-returns-indexes-l
import difflib
from heapq import nlargest as _nlargest

def get_close_matches_indexes(word, possibilities, n=3, cutoff=0.6):
    """Use SequenceMatcher to return a list of the indexes of the best 
    "good enough" matches. word is a sequence for which close matches 
    are desired (typically a string).
    possibilities is a list of sequences against which to match word
    (typically a list of strings).
    Optional arg n (default 3) is the maximum number of close matches to
    return.  n must be > 0.
    Optional arg cutoff (default 0.6) is a float in [0, 1].  Possibilities
    that don't score at least that similar to word are ignored.
    """

    if not n >  0:
        raise ValueError("n must be > 0: %r" % (n,))
    if not 0.0 <= cutoff <= 1.0:
        raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
    result = []
    s = difflib.SequenceMatcher()
    s.set_seq2(word)
    for idx, x in enumerate(possibilities):
        s.set_seq1(x)
        if s.real_quick_ratio() >= cutoff and \
           s.quick_ratio() >= cutoff and \
           s.ratio() >= cutoff:
            result.append((s.ratio(), idx))

    # Move the best scorers to head of list
    result = _nlargest(n, result)

    # Strip scores for the best n matches
    return [x for score, x in result]

In [None]:
scores = []
for index, id in enumerate(sdl_ids_condensed[:5000]):
    print("index: ", index)
    sdl_ids_similar = set(get_close_matches_indexes(id, sdl_ids_condensed))
    bddx_statements_similar = set(get_close_matches_indexes(bddx_statements_condensed[index], bddx_statements_condensed))
    intersection = list(sdl_ids_similar.intersection(bddx_statements_similar))
    count = len(intersection) 
    scores.append(count)

In [81]:
scores = []
for index, id in enumerate(sdl_ids_condensed):
    print("index: ", index)
    sdl_ids_similar = set(get_close_matches_indexes(id, sdl_ids_condensed))
    bddx_statements_similar = set(get_close_matches_indexes(bddx_statements_condensed[index], bddx_statements_condensed))
    intersection = list(sdl_ids_similar.intersection(bddx_statements_similar))
    count = len(intersection) 
    scores.append(count)

index:  0
index:  1
index:  2
index:  3
index:  4
index:  5
index:  6
index:  7
index:  8
index:  9
index:  10
index:  11
index:  12
index:  13
index:  14
index:  15
index:  16
index:  17
index:  18
index:  19
index:  20
index:  21
index:  22
index:  23
index:  24
index:  25
index:  26
index:  27
index:  28
index:  29
index:  30
index:  31
index:  32
index:  33
index:  34
index:  35
index:  36
index:  37
index:  38
index:  39
index:  40
index:  41
index:  42
index:  43
index:  44
index:  45
index:  46
index:  47
index:  48
index:  49
index:  50
index:  51
index:  52
index:  53
index:  54
index:  55
index:  56
index:  57
index:  58
index:  59
index:  60
index:  61
index:  62
index:  63


KeyboardInterrupt: ignored

# Experiment 1: SDL Validation by Visual Inspection

In [None]:
# sdl 3: one_hot_sdlEmbedding[46] = one_hot_sdlEmbedding[(15*3)] ----> 15 time segments per sdl + 1st time segment in sdl object 3
# sdl 413: one_hot_sdlEmbedding[6195] = one_hot_sdlEmbedding[(15*413)]----> 15 time segments per sdl + 1st time segment in sdl object 413
# sdl 571: one_hot_sdlEmbedding[8568] = one_hot_sdlEmbedding[(15*571)+2]----> 15 time segments per sdl + 3rd time segment in sdl object 571
examples = [3,413,571] 

for example in examples:
    print('Object %i: '%(example))
    print("Actors: ")
    print(sdlObjectList[example].statements)

    for a in range(len(sdlObjectList[example].actors)):
      actorsIndex = str(a+1)
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        print('Time segment:', actorsIndex, ",  %s: %s"%(sdlObjectList[example].actors[actorsIndex][j].description,sdlObjectList[example].actors[actorsIndex][j].action))
    print('Scene: ', sdlObjectList[example].scene)

Object 3: 
Actors: 
{'1': 'The car is driving forward as traffic flows freely.', '2': 'The car merges into the lane to its left to get around a slower car in front of it.', '3': 'The car drives at a normal speed as traffic moves freely.', '4': "The car slows and veers slightly right due to a car in the neighboring lane entering the car's lane.", '5': 'The car drives forward as there are no nearby cars in its lane.', '6': 'No Data Recorded', '7': 'No Data Recorded', '8': 'No Data Recorded', '9': 'No Data Recorded', '10': 'No Data Recorded', '11': 'No Data Recorded', '12': 'No Data Recorded', '13': 'No Data Recorded', '14': 'No Data Recorded', '15': 'No Data Recorded'}
Time segment: 1 ,  ego: forward
Time segment: 1 ,  traffic: forward
Time segment: 2 ,  ego: merge left
Time segment: 2 ,  light vehicle: 
Time segment: 4 ,  ego: brake
Time segment: 4 ,  light vehicle: merge
Time segment: 5 ,  ego: forward
Time segment: 6 ,  NaN: NaN
Time segment: 7 ,  NaN: NaN
Time segment: 8 ,  NaN: NaN


## Experiment 1: MSE

In [None]:
A = one_hot_sdlEmbedding[45].numpy()
B = one_hot_sdlEmbedding[6195].numpy()
C = one_hot_sdlEmbedding[8567].numpy()

diff1 = one_hot_sdlEmbedding[61].numpy()


mseAB = (np.square(A - B)).mean(axis=None)
mseBC = (np.square(B - C)).mean(axis=None)
mseAC = (np.square(A - C)).mean(axis=None)

mseDiffA = (np.square(A - diff1)).mean(axis=None)
mseDiffB = (np.square(B - diff1)).mean(axis=None)
mseDiffC = (np.square(C - diff1)).mean(axis=None)

In [None]:
print(bddx_statements[45])
print(bddx_statements[6195])
print(bddx_statements[8567])
print("different statement: ", bddx_statements[61])

The car is driving forward as traffic flows freely.
The car is driving forward as traffic flows freely.
The car is driving forward as traffic flows freely.
different statement:  The car is accelerating as the light turned green.


In [None]:
print("MSE between SDL object 3 and 413: ", mseAB)
print("MSE between SDL object 413 and 571: ", mseBC)
print("MSE between SDL object 571 and 3: ", mseAC)

print("MSE between SDL object 3 and diff: ", mseDiffA)
print("MSE between SDL object 413 and diff: ", mseDiffB)
print("MSE between SDL object 571 and diff: ", mseDiffC)

MSE between SDL object 3 and 413:  0.0
MSE between SDL object 413 and 571:  0.0
MSE between SDL object 571 and 3:  0.0
MSE between SDL object 3 and diff:  0.0009276437847866419
MSE between SDL object 413 and diff:  0.0009276437847866419
MSE between SDL object 571 and diff:  0.0009276437847866419


## Experiment 1: Doc2Vec Results

In [None]:
# finds top 10 most similar sentences to test data 
test = "The car is driving forward as traffic flows freely.".split(" ")  

ivec = model.infer_vector(doc_words=test, steps=150, alpha=0.00025)
model.docvecs.most_similar(positive=[ivec], topn=10)

  if np.issubdtype(vec.dtype, np.int):


[('12933', 0.855712890625),
 ('35611', 0.8320037722587585),
 ('50312', 0.8197561502456665),
 ('35685', 0.8095692992210388),
 ('30840', 0.8091429471969604),
 ('101418', 0.808503270149231),
 ('42153', 0.8056596517562866),
 ('35402', 0.8028759956359863),
 ('71176', 0.8016964197158813),
 ('68010', 0.7989912033081055)]

In [None]:
print(bddx_statements[12933])
print(bddx_statements[35611])
print(bddx_statements[101418])
print(bddx_statements[35685])
print(bddx_statements[50312])
print(bddx_statements[8567])
print(bddx_statements[30840])
print(bddx_statements[37050])
print(bddx_statements[71176])
print(bddx_statements[35402])

The car drives forward because traffic is moving normal.
The car drives forward because traffic in front of it is moving.
The car is driving forward as its lane of traffic flows freely.
The car drives down a street because traffic is moving and lights are green.
The car proceeds down the highway as traffic ahead is flowing.
The car is driving forward as traffic flows freely.
The car is driving forward because traffic is traveling at a normal rate.
The car is driving forward because traffic in front of it is moving at a normal rate.
The car drives down the highway. because the road is clear and traffic is moving.
The car drives forward because traffic in front of it is moving forward.


## Experiment 1: Doc2Vec Validation

In [None]:
# BDDX returned these three objects as the most similar to the sentence: The car is driving forward as traffic flows freely.
# sdl objects 862,2374,6761
doc_vec_A = one_hot_sdlEmbedding[12933].numpy()
doc_vec_B = one_hot_sdlEmbedding[35611].numpy()
doc_vec_C = one_hot_sdlEmbedding[101418].numpy()

doc_vec_diff1 = one_hot_sdlEmbedding[61].numpy()
reference = one_hot_sdlEmbedding[45].numpy()


mseAB_doc_vec = (np.square(doc_vec_A - doc_vec_B)).mean(axis=None)
mseBC_doc_vec = (np.square(doc_vec_B - doc_vec_C)).mean(axis=None)
mseAC_doc_vec = (np.square(doc_vec_A - doc_vec_C)).mean(axis=None)

mseDiffA_doc_vec = (np.square(doc_vec_A - doc_vec_diff1)).mean(axis=None)
mseDiffB_doc_vec = (np.square(doc_vec_B - doc_vec_diff1)).mean(axis=None)
mseDiffC_doc_vec = (np.square(doc_vec_C - doc_vec_diff1)).mean(axis=None)

mseReferenceA_doc_vec = (np.square(doc_vec_A - reference)).mean(axis=None)
mseReferenceB_doc_vec = (np.square(doc_vec_B - reference)).mean(axis=None)
mseReferenceC_doc_vec = (np.square(doc_vec_C - reference)).mean(axis=None)

In [None]:
print("MSE between SDL object 862 and 2374: ", mseAB_doc_vec)
print("MSE between SDL object 2374 and 6761: ", mseBC_doc_vec)
print("MSE between SDL object 862 and 6761: ", mseAC_doc_vec)

print("MSE between SDL object 862 and diff: ", mseDiffA_doc_vec)
print("MSE between SDL object 2374 and diff: ", mseDiffB_doc_vec)
print("MSE between SDL object 6761 and diff: ", mseDiffC_doc_vec)

print("MSE between SDL object 862 and reference: ", mseReferenceA_doc_vec)
print("MSE between SDL object 2374 and reference: ", mseReferenceB_doc_vec)
print("MSE between SDL object 6761 and reference: ", mseReferenceC_doc_vec)

MSE between SDL object 862 and 2374:  0.0
MSE between SDL object 2374 and 6761:  0.0
MSE between SDL object 862 and 6761:  0.0
MSE between SDL object 862 and diff:  0.0009276437847866419
MSE between SDL object 2374 and diff:  0.0009276437847866419
MSE between SDL object 6761 and diff:  0.0009276437847866419
MSE between SDL object 862 and reference:  0.0
MSE between SDL object 2374 and reference:  0.0
MSE between SDL object 6761 and reference:  0.0


In [None]:
# corresponding SDL embeddings
# time segment 4
# time segment 2
# time segment 4

examples = [862,2374,6761] 

for example in examples:
    print('Object %i: '%(example))
    print("Actors: ")
    print(sdlObjectList[example].statements)

    for a in range(len(sdlObjectList[example].actors)):
      actorsIndex = str(a+1)
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        print('Time segment:', actorsIndex, ",  %s: %s"%(sdlObjectList[example].actors[actorsIndex][j].description,sdlObjectList[example].actors[actorsIndex][j].action))
    print('Scene: ', sdlObjectList[example].scene)

Object 862: 
Actors: 
{'1': 'The car is stopped because traffic in front of it is stopped.', '2': 'The car accelerates slowly as traffic in front of it starts to move.', '3': 'The car veers slightly left and back to get around a parked vehicle.', '4': 'The car drives forward because traffic is moving normal.', '5': 'No Data Recorded', '6': 'No Data Recorded', '7': 'No Data Recorded', '8': 'No Data Recorded', '9': 'No Data Recorded', '10': 'No Data Recorded', '11': 'No Data Recorded', '12': 'No Data Recorded', '13': 'No Data Recorded', '14': 'No Data Recorded', '15': 'No Data Recorded'}
Time segment: 1 ,  ego: stop
Time segment: 1 ,  traffic: stop
Time segment: 2 ,  ego: accelerate
Time segment: 2 ,  traffic: forward
Time segment: 4 ,  ego: forward
Time segment: 4 ,  traffic: forward
Time segment: 5 ,  NaN: NaN
Time segment: 6 ,  NaN: NaN
Time segment: 7 ,  NaN: NaN
Time segment: 8 ,  NaN: NaN
Time segment: 9 ,  NaN: NaN
Time segment: 10 ,  NaN: NaN
Time segment: 11 ,  NaN: NaN
Time seg

# Experiment 2: SDL Validation by Visual Inspection

In [None]:
# sdl 4: one_hot_sdlEmbedding[60] = one_hot_sdlEmbedding[(15*4)] ----> 15 time segments per sdl + 1st time segment in sdl object 4
# sdl 32: one_hot_sdlEmbedding[480] = one_hot_sdlEmbedding[(15*32)]----> 15 time segments per sdl + 1st time segment in sdl object 32
# sdl 34: one_hot_sdlEmbedding[510] = one_hot_sdlEmbedding[(15*34)]----> 15 time segments per sdl + 1st time segment in sdl object 34
examples = [4,32,34] 

for example in examples:
    print('Object %i: '%(example))
    print("Actors: ")
    print(sdlObjectList[example].statements)

    for a in range(len(sdlObjectList[example].actors)):
      actorsIndex = str(a+1)
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        print('Time segment:', actorsIndex, ",  %s: %s"%(sdlObjectList[example].actors[actorsIndex][j].description,sdlObjectList[example].actors[actorsIndex][j].action))
    print('Scene: ', sdlObjectList[example].scene)

Object 4: 
Actors: 
{'1': 'The car is stopped because the light is red.', '2': 'The car is accelerating as the light turned green.', '3': 'The car is slowing because traffic in front of it is stopped.', '4': 'The car is driving forward slowly due to slow traffic in front of it.', '5': 'The car is merging into the left lane to make a left turn.', '6': 'No Data Recorded', '7': 'No Data Recorded', '8': 'No Data Recorded', '9': 'No Data Recorded', '10': 'No Data Recorded', '11': 'No Data Recorded', '12': 'No Data Recorded', '13': 'No Data Recorded', '14': 'No Data Recorded', '15': 'No Data Recorded'}
Time segment: 1 ,  ego: stop
Time segment: 2 ,  ego: accelerate
Time segment: 3 ,  ego: brake
Time segment: 3 ,  traffic: stop
Time segment: 4 ,  ego: forward
Time segment: 4 ,  traffic: 
Time segment: 5 ,  ego: merge left
Time segment: 6 ,  NaN: NaN
Time segment: 7 ,  NaN: NaN
Time segment: 8 ,  NaN: NaN
Time segment: 9 ,  NaN: NaN
Time segment: 10 ,  NaN: NaN
Time segment: 11 ,  NaN: NaN
Tim

## Experiment 2: MSE

In [None]:
print(bddx_statements[60])
print(bddx_statements[480])
print(bddx_statements[510])
print("different: ", bddx_statements[45])

The car is stopped because the light is red.
The car is stopped because the light is red.
The car is stopped because the light is red.
different:  The car is driving forward as traffic flows freely.


In [None]:
D = one_hot_sdlEmbedding[60].numpy()
E = one_hot_sdlEmbedding[480].numpy()
F = one_hot_sdlEmbedding[510].numpy()

diff2 = one_hot_sdlEmbedding[45].numpy()

mseDE = (np.square(D - E)).mean(axis=None)
mseEF = (np.square(E - F)).mean(axis=None)
mseDF = (np.square(D - F)).mean(axis=None)

mseDiffD = (np.square(D - diff2)).mean(axis=None)
mseDiffE = (np.square(E - diff2)).mean(axis=None)
mseDiffF = (np.square(F - diff2)).mean(axis=None)

In [None]:
print("MSE between SDL object 4 and 32: ", mseDE)
print("MSE between SDL object 32 and 34: ", mseEF)
print("MSE between SDL object 4 and 34: ", mseDF)

print("MSE between SDL object 4 and diff: ", mseDiffD)
print("MSE between SDL object 32 and diff: ", mseDiffE)
print("MSE between SDL object 34 and diff: ", mseDiffF)

MSE between SDL object 4 and 32:  0.0
MSE between SDL object 32 and 34:  0.0
MSE between SDL object 4 and 34:  0.0
MSE between SDL object 4 and diff:  0.0009276437847866419
MSE between SDL object 32 and diff:  0.0009276437847866419
MSE between SDL object 34 and diff:  0.0009276437847866419


## Experiment 2: Doc2Vec Results

In [None]:
# finds top 10 most similar sentences to test data 
test = "The car is stopped because the light is red.".split(" ")  

ivec = model.infer_vector(doc_words=test, steps=150, alpha=0.00025)
model.docvecs.most_similar(positive=[ivec], topn=10)

  if np.issubdtype(vec.dtype, np.int):


In [None]:
print(bddx_statements[8673])
print(bddx_statements[104430])
print(bddx_statements[3138])
print(bddx_statements[1397])
print(bddx_statements[47820])
print(bddx_statements[48226])
print(bddx_statements[902])
print(bddx_statements[6859])
print(bddx_statements[6904])
print(bddx_statements[13545])

The car is stopped because the light is red.
The car is stopped because the light is red.
The car is stopped because the light is red.
The car is stopped because the light is red.
The is stopped because the light is red.
The car is stopped. because the light is red.
The car is stopped because the light is red.
The car is stopped because the light is red.
The car is stopped because the light is red.
The car is stopped because the light is red.


## Experiment 2: Doc2Vec Validation

In [None]:
# BDDX returned these three objects as the most similar to the sentence: The car is driving forward as traffic flows freely.
# sdl objects 862,2374,6761
doc_vec_D = one_hot_sdlEmbedding[8673].numpy()
doc_vec_E = one_hot_sdlEmbedding[104430].numpy()
doc_vec_F = one_hot_sdlEmbedding[3138].numpy()

doc_vec_diff2 = one_hot_sdlEmbedding[45].numpy()
reference = one_hot_sdlEmbedding[60].numpy()


mseDE_doc_vec = (np.square(doc_vec_D - doc_vec_E)).mean(axis=None)
mseEF_doc_vec = (np.square(doc_vec_E - doc_vec_F)).mean(axis=None)
mseDF_doc_vec = (np.square(doc_vec_D - doc_vec_F)).mean(axis=None)

mseDiffD_doc_vec = (np.square(doc_vec_D - doc_vec_diff2)).mean(axis=None)
mseDiffE_doc_vec = (np.square(doc_vec_E - doc_vec_diff2)).mean(axis=None)
mseDiffF_doc_vec = (np.square(doc_vec_F - doc_vec_diff2)).mean(axis=None)

mseReferenceD_doc_vec = (np.square(doc_vec_D - reference)).mean(axis=None)
mseReferenceE_doc_vec = (np.square(doc_vec_E - reference)).mean(axis=None)
mseReferenceF_doc_vec = (np.square(doc_vec_F - reference)).mean(axis=None)

In [None]:
print("MSE between SDL object 8673 and 104430: ", mseDE_doc_vec)
print("MSE between SDL object 104430 and 3138: ", mseEF_doc_vec)
print("MSE between SDL object 8673 and 3138: ", mseDF_doc_vec)

print("MSE between SDL object 8673 and diff: ", mseDiffD_doc_vec)
print("MSE between SDL object 104430 and diff: ", mseDiffE_doc_vec)
print("MSE between SDL object 3138 and diff: ", mseDiffF_doc_vec)

print("MSE between SDL object 8673 and reference: ", mseReferenceD_doc_vec)
print("MSE between SDL object 104430 and reference: ", mseReferenceE_doc_vec)
print("MSE between SDL object 3138 and reference: ", mseReferenceF_doc_vec)

MSE between SDL object 8673 and 104430:  0.0
MSE between SDL object 104430 and 3138:  0.0
MSE between SDL object 8673 and 3138:  0.0
MSE between SDL object 8673 and diff:  0.0009276437847866419
MSE between SDL object 104430 and diff:  0.0009276437847866419
MSE between SDL object 3138 and diff:  0.0009276437847866419
MSE between SDL object 8673 and reference:  0.0
MSE between SDL object 104430 and reference:  0.0
MSE between SDL object 3138 and reference:  0.0


In [None]:
# corresponding SDL embeddings
# time segment 4
# time segment 1
# time segment 4

examples = [578,6962,209] 

for example in examples:
    print('Object %i: '%(example))
    print("Actors: ")
    print(sdlObjectList[example].statements)

    for a in range(len(sdlObjectList[example].actors)):
      actorsIndex = str(a+1)
      for j in range(len(sdlObjectList[example].actors[actorsIndex])):
        print('Time segment:', actorsIndex, ",  %s: %s"%(sdlObjectList[example].actors[actorsIndex][j].description,sdlObjectList[example].actors[actorsIndex][j].action))
    print('Scene: ', sdlObjectList[example].scene)

Object 578: 
Actors: 
{'1': 'The car accelerates slowly to make a left turn.', '2': 'The car maintains speed because the road is clear.', '3': 'The car slows to a stop because the light is red.', '4': 'The car is stopped because the light is red.', '5': 'No Data Recorded', '6': 'No Data Recorded', '7': 'No Data Recorded', '8': 'No Data Recorded', '9': 'No Data Recorded', '10': 'No Data Recorded', '11': 'No Data Recorded', '12': 'No Data Recorded', '13': 'No Data Recorded', '14': 'No Data Recorded', '15': 'No Data Recorded'}
Time segment: 1 ,  ego: accelerate
Time segment: 3 ,  ego: brake
Time segment: 4 ,  ego: stop
Time segment: 5 ,  NaN: NaN
Time segment: 6 ,  NaN: NaN
Time segment: 7 ,  NaN: NaN
Time segment: 8 ,  NaN: NaN
Time segment: 9 ,  NaN: NaN
Time segment: 10 ,  NaN: NaN
Time segment: 11 ,  NaN: NaN
Time segment: 12 ,  NaN: NaN
Time segment: 13 ,  NaN: NaN
Time segment: 14 ,  NaN: NaN
Time segment: 15 ,  NaN: NaN
Scene:  {'1': [], '2': [], '3': ['red light'], '4': ['red ligh

# Experiment 3

In [None]:
first_error = []
second_error = []
third_error = []
fourth_error = []
fifth_error = []
for i,j in enumerate(one_hot_sdlEmbedding):
    if (i != 6996):
        test = sdlObjectList[i].statements['1'].split(" ")  
        ivec = model.infer_vector(doc_words=test, steps=150, alpha=0.00025)
        similar_sdls = model.docvecs.most_similar(positive=[ivec], topn=10)
        
        first = one_hot_sdlEmbedding[int(similar_sdls[0][0])].numpy()
        second = one_hot_sdlEmbedding[int(similar_sdls[1][0])].numpy()
        third = one_hot_sdlEmbedding[int(similar_sdls[2][0])].numpy()
        fourth = one_hot_sdlEmbedding[int(similar_sdls[3][0])].numpy()
        fifth = one_hot_sdlEmbedding[int(similar_sdls[4][0])].numpy()
        
        first_error.append((np.square(j - first)).mean(axis=None))
        second_error.append((np.square(j - second)).mean(axis=None))
        third_error.append((np.square(j - third)).mean(axis=None))
        fourth_error.append((np.square(j - fourth)).mean(axis=None))
        fifth_error.append((np.square(j - fifth)).mean(axis=None))
    else:
        break




  if np.issubdtype(vec.dtype, np.int):


In [None]:
len(first_error)

In [None]:
count_first = len([i for i in first_error if i >= 0.0009276437847866419]) 
count_second = len([i for i in second_error if i >= 0.0009276437847866419]) 
count_third = len([i for i in third_error if i >= 0.0009276437847866419]) 
count_fourth = len([i for i in fourth_error if i >= 0.0009276437847866419]) 
count_fifth = len([i for i in fifth_error if i >= 0.0009276437847866419]) 

In [None]:
print(count_first)
print(count_second)
print(count_third)
print(count_fourth)
print(count_fifth)

2257
2195
2172
2185
2129
