# Expand transitions
Natalia Velez, April 2021

In this file, we:
   1. Parse the transition files
   2. Expand category transitions into transitions between individual objects, with a separate workflow for pattern transitions
   3. Upload expanded transitions to database

In [1]:
import os, re, glob, json
from os.path import join as opj
import numpy as np
import pandas as pd
import json
import networkx as nx 
import pymongo
import pprint
from tqdm import notebook
from itertools import cycle

# Embed html-formatted text
# (Used to do QA on transitions)
from IPython.core.display import display, HTML
def embed(s): return display(HTML(s))

## Load data from database

Connect:

In [2]:
keyfile = '../6_database/credentials.key'

#Connection string
creds = open(keyfile, "r").read().splitlines()
myclient = pymongo.MongoClient('134.76.24.75', username=creds[0], password=creds[1], authSource='ohol') 
print(myclient)

ohol = myclient.ohol
print(ohol)

MongoClient(host=['134.76.24.75:27017'], document_class=dict, tz_aware=False, connect=True, authsource='ohol')
Database(MongoClient(host=['134.76.24.75:27017'], document_class=dict, tz_aware=False, connect=True, authsource='ohol'), 'ohol')


Load objects:

In [3]:
objects = list(ohol.objects.find())
objects = pd.DataFrame(objects)

print('Loading %i objects' % objects.shape[0])
objects.head()

Loading 4161 objects


Unnamed: 0,_id,id,name,containable,containSize,vertSlotRot,permanent,minPickupAge,heldInHand,blocksWalking,...,floorHugging,slotsLocked,noFlip,sideAccess,creationSoundForce,invisCont,slotPos,vert,spritesDrawnBehind,spritesAdditiveBlend
0,60750c9e68dcd62afd70767e,11,Skin Tone A &B &C &D &E &F,0,1.0,0.0,0,3,0,0,...,,,,,,,,,,
1,60750c9e68dcd62afd70767f,19,Female001 D,0,1.0,0.0,0,3,0,0,...,0.0,,,,,,,,,
2,60750c9e68dcd62afd707680,30,Wild Gooseberry Bush,0,1.0,0.0,1,3,0,0,...,0.0,,,,,,,,,
3,60750c9e68dcd62afd707681,31,Gooseberry,1,1.0,0.0,0,3,1,0,...,0.0,0.0,,,,,,,,
4,60750c9e68dcd62afd707682,32,Big Hard Rock,0,1.0,0.0,1,3,0,0,...,0.0,,,,,,,,,


Load categories:

In [4]:
categories = list(ohol.categories.find())
categories = pd.DataFrame(categories)

print('Loading %i categories' % categories.shape[0])
categories.head()

Loading 269 categories


Unnamed: 0,_id,parentID,numObjects,probabilistic,pattern,children,probs
0,60750cbd68dcd62afd7086bf,316,3,False,True,"[2505, 2517, 3002]",
1,60750cbd68dcd62afd7086c0,319,3,False,True,"[2506, 2522, 3003]",
2,60750cbd68dcd62afd7086c1,320,3,False,True,"[2510, 2520, 3004]",
3,60750cbd68dcd62afd7086c2,321,3,False,True,"[2507, 2524, 3007]",
4,60750cbd68dcd62afd7086c3,322,3,False,True,"[2508, 2523, 3008]",


Load transitions:

In [5]:
trans_cols = ['origActor', 'origTarget', 'newActor', 'newTarget', 'lastUseActor', 'lastUseTarget']
cat_cols = [c+'_cat' for c in trans_cols]
pattern_cols = [c+'_pattern' for c in trans_cols]

transitions = list(ohol.transitions.find())
transitions = pd.DataFrame(transitions)
transitions = transitions[trans_cols]

print('Loading %i transitions' % transitions.shape[0])
transitions.head()

Loading 4847 transitions


Unnamed: 0,origActor,origTarget,newActor,newTarget,lastUseActor,lastUseTarget
0,67,2970,0,2966,False,False
1,235,1890,1892,0,False,False
2,441,445,441,444,False,False
3,0,2671,2539,2665,False,False
4,239,335,327,291,False,False


## Expand transitions between categories

### Helper functions

Useful functions for debugging:

In [6]:
def obj_name_elem(o):
    if o in [0,-1]:
        return 'empty'
    else:
        return objects[objects.id==o].name.values[0]
obj_name = np.vectorize(obj_name_elem)

def display_transition(row):
    # Get item names
    names = row.apply(obj_name)
    
    # Generate HTML tags
    tag = '<mark style="background-color: %s;">%s</mark>'
    actor_color = '#91d8f2'
    target_color = '#ffbf6b'
    out = '(%s,%s) &#x2192; (%s,%s)' % (tag % (actor_color, names['origActor']),
                                        tag % (target_color, names['origTarget']),
                                        tag % (actor_color, names['newActor']),
                                        tag % (target_color, names['newTarget']))
    
    embed(out)

In [7]:
display_transition(transitions.loc[1])

Helper functions: Identify categories

In [8]:
# Helper function: Does this item name correspond to a category?
cat_names = categories.parentID.values
def is_cat_elem(objID): return objID in cat_names
is_cat = np.vectorize(is_cat_elem)

# Helper function: Is this category a "pattern" category?
# (Pattern categories are parsed differently from regular categories - see below)
pattern_cats = categories[categories.pattern].parentID.values
def is_pattern_elem(cat): return cat in pattern_cats
is_pattern = np.vectorize(is_pattern_elem)

In [9]:
# Some examples
print(obj_name(33))
print('Category?: %s' % is_cat_elem(33))
print('Pattern?: %s\n' % is_pattern_elem(33))

print(obj_name(903))
print('Category?: %s' % is_cat_elem(903))
print('Pattern?: %s\n' % is_pattern_elem(903))

print(obj_name(3831))
print('Category?: %s' % is_cat_elem(3831))
print('Pattern?: %s\n' % is_pattern_elem(3831))

Stone
Category?: False
Pattern?: False

@ Tillable Row
Category?: True
Pattern?: False

@ Non-Dug track
Category?: True
Pattern?: True



Helper functions to read out information about categories:

In [10]:
def is_real_obj(cat):
    '''
    Is this a real object, or just a generic category label?
    e.g., @ Tillable row --> False
    
    Input: int (needs to match an entry in categories.parentID)
    Output: bool
    '''
    name = objects[objects.id==cat]['name'].values[0]
    return not name[0] == '@'

def cat_children(cat, pattern=False):
    '''
    Return object ID (if not category) or list of children (if regular category)
    
    Inputs:
    cat: int
    
    Output: numpy array of categories
    '''
    if is_cat(cat):
        children = categories[categories.parentID == cat]['children'].values[0]
        if pattern & is_real_obj(cat): 
            return [cat] + children
        else:
            return children
    else:
        return np.array([cat])
    
def n_children_elem(cat):
    '''
    Get number of children
    '''
    if cat in [0, -1]:
        return 0
    else:
        return len(cat_children(cat))
n_children = np.vectorize(n_children_elem)

In [11]:
# Some examples
print(obj_name(903))
print('Real object? %s' % is_real_obj(903))
print('%i children:' % n_children_elem(903))
print(*[obj_name(o) for o in cat_children(903)], sep='\n')

@ Tillable Row
Real object? False
2 children:
Shallow Tilled Row - groundOnly +biomeBlock4
Fertile Soil Pile


Put together combinations of different categories:

In [12]:
def expand_pairs(arr1, arr2):
    '''
    Expand one array in a set (e.g., old and new targets) if they are not the same length
    
    Inputs:
    arr1, arr2: lists
    
    Outputs:
    pairs: list of tuples
    '''
    if len(arr2) == 1:
        pairs = zip(arr1, cycle(arr2))
    elif len(arr1) == 1:
        pairs = zip(cycle(arr1), arr2)
    else:
        pairs = zip(arr1, arr2)
    
    return list(pairs)

In [13]:
expand_pairs(['pancakes'], ['bacon', 'egg', 'sausage'])

[('pancakes', 'bacon'), ('pancakes', 'egg'), ('pancakes', 'sausage')]

### Expand non-pattern categories

In [14]:
def expand_transitions(row):
    '''
    Takes a row from the original transitions dataset, expands any transitions containing category labels,
    and returns the set of child transitions

    Inputs:
    row: pd.Series object (must contain fields: origActor, newActor, origTarget, newTarget)
    
    Outputs:
    ex_transitions: list of tuples (which will later be turned into a dataframe)
    '''
    # Find categories to expand
    row_cats = row.apply(is_cat)
    row_pats = row.apply(is_pattern)
    valid_categories = row_cats & (~row_pats)
    
    # Expand category labels into lists of children
    ex_inputs = pd.Series([cat_children(e)
                           if valid else [e] 
                           for e,valid in zip(row,valid_categories)], 
                          index=row.index)

     # Match up children to make expanded transitions
    actors = expand_pairs(ex_inputs['origActor'], ex_inputs['newActor'])
    targets = expand_pairs(ex_inputs['origTarget'], ex_inputs['newTarget'])

    ex_transitions = pd.DataFrame([(oa, ot, na, nt, row['lastUseActor'], row['lastUseTarget'])
                                   for oa,na in actors
                                   for ot,nt in targets],
                                 columns = trans_cols)
    
    return ex_transitions

In [15]:
nonpattern_transition = transitions.loc[5]
display_transition(transitions.loc[5])

nonpattern_expanded = expand_transitions(nonpattern_transition)

print('After expansion:')
for _,row in nonpattern_expanded.iterrows():
    display_transition(row)

After expansion:


### Expand **pattern** categories

According to [this comment](https://github.com/JustinLove/onetech/blob/mapping/process/src/TransitionImporter.js) in the onetech repository, pattern categories need to be parsed differently:
```
  // Pattern categories work differently than regular categories:
  // 1. The parentID is an actual object and should stick around
  // 2. A transition is only considered if all matching pattern
  //    categories have the same number of objectIDs
  // 3. For each objectID, a new transition is created which maps
  //    each other pattern category objectID to the new object 
```

After digging through examples of particular transitions, I found three exceptions to this comment:

1. Some parent items are generic category labels. Category objects start with the "@" character (e.g., "@ High Heat Source", "@ Tillable Row") and do not correspond to any one item. In these cases, we don't add the parent IDs to the expanded transitions.
2. Some transitions violate (2), but the parent IDs alone specify a valid transition. These invalid transitions are often superseded by a transition between specific items (e.g., yellow paint + sheet metal --> empty bucket). In the main loop, we'll check for pattern transitions that introduce contradictions and will filter those out.

e.g., (<strong>1904</strong>, 4652) --> (659, 4675)<br />
(<strong>bucket of {blue, yellow, black, green} paint</strong>, stack of sheet metal) --> (empty bucket, stack of blue sheet metal)

3. Some transitions violate (2) because the new target/object are empty. These transitions should still be added.

e.g., (-1, <strong>1769</strong>) --> (0, 0)<br />
(decay, <strong>dead {German Shepherd, Collie, Beagle, Pit Bull, ...}</strong>) --> (empty, empty) :(((


In [16]:
def expand_pattern_transitions(row):
    trans_in = row[trans_cols]
    
    # Check for valid categories
    row_cats = trans_in.apply(is_pattern)
    orig_cats = row_cats[['origActor', 'origTarget']]
    
    cat_lengths = trans_in.apply(n_children)
    valid_pattern = (cat_lengths[row_cats].nunique() == 1) & orig_cats.any()
    real_parents = trans_in[row_cats].apply(is_real_obj).all()

    # If it's a real pattern: Expand out
    if valid_pattern: 
        ex_inputs = np.where(row_cats,
                             trans_in.apply(lambda c: cat_children(c, pattern=True)),
                             trans_in.apply(lambda c: np.array([c])))
        ex_inputs = pd.Series(ex_inputs, index=trans_cols)

    # If it's not and the parent items are a real item: Treat as a single transition    
    elif real_parents: 
        ex_inputs = trans_in.copy()

    # Otherwise: Just toss it
    else:
        ex_inputs = pd.Series(([],[],[],[]),index=trans_cols)

    # Convert into dataframe and expand out
    trans_df = pd.DataFrame(ex_inputs).T
    trans_df = trans_df.apply(lambda col: col.explode(), axis=0).reset_index(drop=True)
    
    return trans_df

In [17]:
pattern_transition = transitions.loc[258]
display_transition(transitions.loc[258])

pattern_expanded = expand_pattern_transitions(pattern_transition)

print('After expansion:')
for _,row in pattern_expanded.iterrows():
    display_transition(row)

After expansion:


### Main loop: Assemble dataframe of expanded transitions

Helper function: Is this category probabilistic? (only probabilistic categories can have multiple outcomes)

In [18]:
def is_probabilistic_elem(cat):
    if is_cat(cat):
        return categories[categories.parentID == cat].probabilistic.values[0]
    else:
        return False
    
is_probabilistic = np.vectorize(is_probabilistic_elem)

print(obj_name(1966))
print(is_probabilistic_elem(1966))

Random Die Roll
True


Rearrange transitions in priority order:

Single objects --> Non-pattern categories --> Pattern categories

In [19]:
ordered_transitions = transitions.copy()
ordered_transitions['prob'] = ordered_transitions.apply(is_probabilistic).apply(any, axis=1)
ordered_transitions['category'] = ordered_transitions.apply(is_cat).apply(any, axis=1)*1
ordered_transitions['pattern'] = ordered_transitions.apply(is_pattern).apply(any, axis=1)*1
ordered_transitions['priority'] = ordered_transitions['category'] + ordered_transitions['pattern']
ordered_transitions = ordered_transitions.sort_values(by='priority').reset_index()

ordered_transitions

Unnamed: 0,index,origActor,origTarget,newActor,newTarget,lastUseActor,lastUseTarget,prob,category,pattern,priority
0,0,67,2970,0,2966,False,False,False,0,0,0
1,3106,4435,-1,4431,4434,False,False,False,0,0,0
2,3107,467,468,467,469,False,False,False,0,0,0
3,3108,2531,557,0,2688,False,False,False,0,0,0
4,3110,2256,67,0,2264,False,False,False,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
4842,880,-1,1018,0,1019,False,False,False,1,1,2
4843,3810,323,-1,239,321,False,False,False,1,1,2
4844,1471,394,1207,394,1208,False,False,False,1,1,2
4845,3102,964,1015,964,1016,False,False,False,1,1,2


Main loop:

In [20]:
expanded_list = []
print('Original transitions: %i' % ordered_transitions.shape[0])

for idx, row in notebook.tqdm(ordered_transitions.iterrows(), total=ordered_transitions.shape[0]):
    
    if row['pattern'] == 1:
        pre_expanded_df = expand_transitions(row)
        row_df = pd.concat([expand_pattern_transitions(row) for _,row in pre_expanded_df.iterrows()])
    elif row['category'] == 1:
        row_df = expand_transitions(row)
    else:
        row_df = pd.DataFrame(row[trans_cols]).T
        
    row_df['prob'] = row['prob']
    expanded_list.append(row_df)

# Turn to dataframe        
expanded_df = pd.concat(expanded_list)
n_orig = expanded_df.shape[0]

# Remove duplicate rows
expanded_df = expanded_df.drop_duplicates().reset_index(drop=True)
n_unique = expanded_df.shape[0]

# Remove contradictory transitions
prob_trans = expanded_df[expanded_df.prob]
det_trans = expanded_df[~expanded_df.prob]
det_trans = det_trans.groupby(['origActor','origTarget', 'lastUseActor', 'lastUseTarget']).first().reset_index()
expanded_df = pd.concat([prob_trans, det_trans]).reset_index(drop=True)
n_final = expanded_df.shape[0]

print('After expansion: %i' % n_orig)
print('Removed %i duplicate transitions' % (n_orig-n_unique))
print('Removed %i contradictory transitions' % (n_unique-n_final))
print('Final expanded # of transitions: %i' % n_final)

Original transitions: 4847


  0%|          | 0/4847 [00:00<?, ?it/s]

After expansion: 36412
Removed 16699 duplicate transitions
Removed 95 contradictory transitions
Final expanded # of transitions: 19618


## Manually add special cases

Some objects can be created in the game, but you wouldn't know it from the transition files! These need to be handled separately.

### Case 1: Radio transmissions

From [onetech](https://github.com/JustinLove/onetech/blob/mapping/process/src/TransitionImporter.js):
```
  // Global triggers have a name like ">global1" and will
  // cause a transition somewhere else on the map.
  // They have an "away" transition for the receiver, but no
  // "towards" transition. This looks for a transmitter which
  // has "*global1" in the name and adds this as an extra object
```

In [21]:
signals = objects[objects.name.str.startswith('>')]
radio_transitions = []

for _,sig in signals.iterrows():

    transmitter_name = sig['name'].replace('>', '\*')
    transmitters = objects[objects.name.str.contains(transmitter_name)]

    for _,transmitter in transmitters.iterrows():

        trans_id = transmitter['id']
        trans_to = expanded_df[(expanded_df.newTarget == trans_id) | (expanded_df.newActor == trans_id)]
        trans_to = trans_to.replace(trans_id, sig['id'])

        radio_transitions.append(trans_to)
        
radio_transitions = pd.concat(radio_transitions)
for _,row in radio_transitions.iterrows():
    display_transition(row)
    print(row)

origActor         2490
origTarget        2607
newActor             0
newTarget         2627
lastUseActor     False
lastUseTarget    False
prob             False
Name: 17950, dtype: object


origActor         2490
origTarget        2608
newActor             0
newTarget         2629
lastUseActor     False
lastUseTarget    False
prob             False
Name: 17951, dtype: object


origActor         2490
origTarget        2609
newActor             0
newTarget         2631
lastUseActor     False
lastUseTarget    False
prob             False
Name: 17952, dtype: object


### Case 2: Property fences

From [onetech](https://github.com/JustinLove/onetech/blob/mapping/process/src/TransitionImporter.js):
```
  // The proposed property fence needs transitions added for all directions.
  // Only "+horizontalA" transition is included so we should duplicate this
  // for the other directions.
```

```
addDirectionalTransitions(objects) {
    const directions = ["+cornerA", "+verticalA"];
    const horizontalObjects = Object.values(objects).filter(o => o.name.match(/\+horizontalA\b/));
    for (let horizontalObject of horizontalObjects) {
      for (let direction of directions) {
        const otherName = horizontalObject.name.replace("+horizontalA", direction);
        const otherObject = Object.values(objects).find(o => o.name === otherName);
        if (otherObject) {
          // console.log(`Copying transitions from ${horizontalObject.name} to ${otherObject.name}`);
          this.copyTransitionsToward(horizontalObject.id, otherObject.id);
        } else {
          console.log(`Unable to find object with name ${otherName} in addDirectionalTransitions`);
        }
      }
    }
  }
```

In [22]:
horiz_fences = objects[objects.name.str.contains(r'\+horizontalA$')]
directions = ["+cornerA", "+verticalA"]
fence_transitions = []

for _,f in horiz_fences.iterrows():
    
    # Get information about horizontal fence
    f_name = f['name']
    f_id = f['id']
    f_trans = expanded_df[(expanded_df.newTarget == f_id) | (expanded_df.newActor == f_id)]
    
    # Replace with other directions
    for d in directions:
        d_fence = f['name'].replace('+horizontalA', d)
        d_data = objects[objects.name == d_fence]
        d_id = d_data.iloc[0]['id']
        
        d_trans = f_trans.replace(f_id, d_id)
        fence_transitions.append(d_trans)
        
fence_transitions = pd.concat(fence_transitions)

for _,row in fence_transitions.iterrows():
    display_transition(row)
    print(row)

origActor         2957
origTarget          -1
newActor          2957
newTarget         2972
lastUseActor     False
lastUseTarget    False
prob             False
Name: 18326, dtype: object


origActor         2957
origTarget          -1
newActor          2957
newTarget         2968
lastUseActor     False
lastUseTarget    False
prob             False
Name: 18326, dtype: object


## Upload to database

In [23]:
complete_transitions = pd.concat([expanded_df, radio_transitions, fence_transitions]).reset_index(drop=True)
complete_records = complete_transitions.to_dict('records')

print('Uploading %i records to database' % len(complete_records))
trans_col = ohol.expanded_transitions
trans_col.insert_many(complete_records)

Uploading 19623 records to database


<pymongo.results.InsertManyResult at 0x2b2d3d36b9c0>