# Build tech tree
Grace Deng, August 2020

Last modified by Natalia Velez, April 2021

This file does the following:
   1. Parse the transition files (expand on categories and keep only the forward transitions)
   2. Calculates the depth (defined as the number of unique ingredients) of each object in the tech tree
   3. Calculates the empowerment of each object (defined as the number of immediate products that can be made from this object)

In [1]:
import os, re, glob, json
from os.path import join as opj
import numpy as np
import pandas as pd
import json
import networkx as nx
import pymongo
import pprint
from tqdm import notebook
from itertools import cycle

## Load data from database

Connect:

In [2]:
keyfile = '../6_database/credentials.key'

#Connection string
creds = open(keyfile, "r").read().splitlines()
myclient = pymongo.MongoClient('134.76.24.75', username=creds[0], password=creds[1], authSource='ohol') 
print(myclient)

ohol = myclient.ohol
print(ohol)

MongoClient(host=['134.76.24.75:27017'], document_class=dict, tz_aware=False, connect=True, authsource='ohol')
Database(MongoClient(host=['134.76.24.75:27017'], document_class=dict, tz_aware=False, connect=True, authsource='ohol'), 'ohol')


Load objects:

In [3]:
objects = list(ohol.objects.find())
objects = pd.DataFrame(objects)

print('Loading %i objects' % objects.shape[0])
objects.head()

Loading 4161 objects


Unnamed: 0,_id,id,name,containable,containSize,vertSlotRot,permanent,minPickupAge,heldInHand,blocksWalking,...,floorHugging,slotsLocked,noFlip,sideAccess,creationSoundForce,invisCont,slotPos,vert,spritesDrawnBehind,spritesAdditiveBlend
0,60750c9e68dcd62afd70767e,11,Skin Tone A &B &C &D &E &F,0,1.0,0.0,0,3,0,0,...,,,,,,,,,,
1,60750c9e68dcd62afd70767f,19,Female001 D,0,1.0,0.0,0,3,0,0,...,0.0,,,,,,,,,
2,60750c9e68dcd62afd707680,30,Wild Gooseberry Bush,0,1.0,0.0,1,3,0,0,...,0.0,,,,,,,,,
3,60750c9e68dcd62afd707681,31,Gooseberry,1,1.0,0.0,0,3,1,0,...,0.0,0.0,,,,,,,,
4,60750c9e68dcd62afd707682,32,Big Hard Rock,0,1.0,0.0,1,3,0,0,...,0.0,,,,,,,,,


Load categories:

In [4]:
categories = list(ohol.categories.find())
categories = pd.DataFrame(categories)

print('Loading %i categories' % categories.shape[0])
categories.head()

Loading 269 categories


Unnamed: 0,_id,parentID,numObjects,probabilistic,pattern,children,probs
0,60750cbd68dcd62afd7086bf,316,3,False,True,"[2505, 2517, 3002]",
1,60750cbd68dcd62afd7086c0,319,3,False,True,"[2506, 2522, 3003]",
2,60750cbd68dcd62afd7086c1,320,3,False,True,"[2510, 2520, 3004]",
3,60750cbd68dcd62afd7086c2,321,3,False,True,"[2507, 2524, 3007]",
4,60750cbd68dcd62afd7086c3,322,3,False,True,"[2508, 2523, 3008]",


Load transitions:

In [5]:
trans_cols = ['origActor', 'origTarget', 'newActor', 'newTarget']
cat_cols = [c+'_cat' for c in trans_cols]
pattern_cols = [c+'_pattern' for c in trans_cols]

transitions = list(ohol.transitions.find())
transitions = pd.DataFrame(transitions)
transitions = transitions[trans_cols]

print('Loading %i transitions' % transitions.shape[0])
transitions.head()

Loading 4847 transitions


Unnamed: 0,origActor,origTarget,newActor,newTarget
0,67,2970,0,2966
1,235,1890,1892,0
2,441,445,441,444
3,0,2671,2539,2665
4,239,335,327,291


## Expand transitions between categories

Helper functions: Identify categories

In [6]:
# Helper function: Does this item name correspond to a category?
cat_names = categories.parentID.values
def is_cat_elem(objID): return objID in cat_names
is_cat = np.vectorize(is_cat_elem)

# Helper function: Is this category a "pattern" category?
# (Pattern categories are parsed differently from regular categories - see below)
pattern_cats = categories[categories.pattern].parentID.values
def is_pattern_elem(cat):
    if is_cat(cat):
        return cat in pattern_cats
    else:
        return False
is_pattern = np.vectorize(is_pattern_elem)

Helper functions: Expand transitions containing category labels

In [83]:
def cat_children(cat, pattern=False):
    '''
    Return object ID (if not category) or list of children (if regular category)
    
    Inputs:
    cat: int
    
    Output: numpy array of categories
    '''
    if is_cat(cat):
        children = categories[categories.parentID == cat]['children'].values[0]
        if pattern: 
            print('Added pattern parent')
            return [cat] + children
        else:
            return children
    else:
        return np.array([cat])
    
def expand_pairs(arr1, arr2):
    '''
    Expand one array in a set (e.g., old and new targets) if they are not the same length
    
    Inputs:
    arr1, arr2: lists
    
    Outputs:
    pairs: list of tuples
    
    Note: This function works if arr1 and arr2 are of the same length, or if one array is of length 1.
    It is written *deliberately* to fail if arr1 and arr2 have multiple elements and are mismatched,
    which should only happen if we haven't processed a transition with pattern categories properly.
    '''
    if len(arr2) == 1:
        pairs = zip(arr1, cycle(arr2))
    elif len(arr1) == 1:
        pairs = zip(cycle(arr1), arr2)
    elif len(arr1) == len(arr2):
        pairs = zip(arr1, arr2)
    
    return list(pairs)
    
def expand_transitions(row):
    '''
    Takes a row from the original transitions dataset, expands any transitions containing category labels,
    and returns the set of child transitions

    Inputs:
    row: pd.Series object (must contain fields: origActor, newActor, origTarget, newTarget)
    
    Outputs:
    ex_transitions: list of tuples (which will later be turned into a dataframe)
    '''
    # Expand category labels into lists of children
    inputs = row[trans_cols]
    ex_inputs = inputs.apply(cat_children)

    # Match up children to make expanded transitions
    actors = expand_pairs(ex_inputs['origActor'], ex_inputs['newActor'])
    targets = expand_pairs(ex_inputs['origTarget'], ex_inputs['newTarget'])

    ex_transitions = [(oa, ot, na, nt)
                      for oa,na in actors
                      for ot,nt in targets]
    
    return ex_transitions

Debug: Tag whether each element in a transition is a category

In [28]:
trans_is_cat = transitions.apply(is_cat)
trans_is_pattern = transitions.apply(is_pattern)
trans_cats = transitions.merge(trans_is_cat, left_index=True, right_index=True, suffixes=('', '_cat'))
trans_cats = trans_cats.merge(trans_is_pattern, left_index=True, right_index=True, suffixes=('', '_pattern'))

print(trans_cats.shape)
trans_cats.head(10)

(4847, 12)


Unnamed: 0,origActor,origTarget,newActor,newTarget,origActor_cat,origTarget_cat,newActor_cat,newTarget_cat,origActor_pattern,origTarget_pattern,newActor_pattern,newTarget_pattern
0,67,2970,0,2966,False,False,False,False,False,False,False,False
1,235,1890,1892,0,False,False,False,False,False,False,False,False
2,441,445,441,444,False,False,False,False,False,False,False,False
3,0,2671,2539,2665,False,False,False,False,False,False,False,False
4,239,335,327,291,False,False,False,False,False,False,False,False
5,405,254,0,262,True,False,False,False,False,False,False,False
6,0,4302,236,4303,False,False,False,False,False,False,False,False
7,0,2359,2356,2243,False,False,False,False,False,False,False,False
8,-1,420,0,429,False,False,False,False,False,False,False,False
9,4542,-1,4543,4535,False,False,False,False,False,False,False,False


Helper function: Parse pattern categories

According to [this comment](https://github.com/JustinLove/onetech/blob/mapping/process/src/TransitionImporter.js) in the onetech repository, pattern categories need to be parsed differently:
```
  // Pattern categories work differently than regular categories:
  // 1. The parentID is an actual object and should stick around
  // 2. A transition is only considered if all matching pattern
  //    categories have the same number of objectIDs
  // 3. For each objectID, a new transition is created which maps
  //    each other pattern category objectID to the new object 
```

In [60]:
#def expand_pattern(row):
 
row = trans_cats.loc[4496]
row

origActor                -1
origTarget             3766
newActor                  0
newTarget              3765
origActor_cat         False
origTarget_cat         True
newActor_cat          False
newTarget_cat          True
origActor_pattern     False
origTarget_pattern     True
newActor_pattern      False
newTarget_pattern      True
Name: 4496, dtype: object

In [90]:
objects[objects.id == 3765]

Unnamed: 0,_id,id,name,containable,containSize,vertSlotRot,permanent,minPickupAge,heldInHand,blocksWalking,...,floorHugging,slotsLocked,noFlip,sideAccess,creationSoundForce,invisCont,slotPos,vert,spritesDrawnBehind,spritesAdditiveBlend
3286,60750c9e68dcd62afd708354,3765,@ Eastward Moving Cart,0,1.0,0.0,0,3,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,


In [97]:
categories[categories.parentID == 3765].children.values[0]

[1522,
 1568,
 1591,
 3785,
 3794,
 1522,
 1568,
 1591,
 3785,
 3794,
 1522,
 1568,
 1591,
 3785,
 3794,
 1522,
 1568,
 1591,
 3785,
 3794,
 1522,
 1568,
 1591,
 3785,
 3794]

In [101]:
objects[objects.id.isin(categories[categories.parentID == 3765].children.values[0])].name

1254                    Track Cart -moving East
1285           Track Cart -moving East left end
1307         Track Cart -moving East left cross
3306    Track Cart -moving East left north east
3314    Track Cart -moving East left south east
Name: name, dtype: object

In [74]:
len([3766] + categories[categories.parentID == 3766].children.values[0])

26

In [102]:
objects[objects.id == 3766].name

3287    @ Eastward Moving Cart Leaving
Name: name, dtype: object

In [62]:
categories[categories.parentID == 4496]

Unnamed: 0,_id,parentID,numObjects,probabilistic,pattern,children,probs


Expand transitions

In [57]:
expanded_list = []
print('Original transitions: %i' % trans_cats.shape[0])

for idx, row in notebook.tqdm(trans_cats.iterrows(), total=trans_cats.shape[0]):
#     trans = expand_transitions(row)
#     expanded_transitions += trans
    if any(row[pattern_cols]):
        print('Skipping transition %i' % idx)

    else: # Add non-category transitions (which is most transitions as-is
        trans = expand_transitions(row)
        expanded_list += trans

# Turn to dataframe        
expanded_df = pd.DataFrame(expanded_list, columns=trans_cols)
expanded_size_orig = expanded_df.shape[0]

# Remove duplicate rows
expanded_df = expanded_df.drop_duplicates().reset_index(drop=True)
expanded_size_final = expanded_df.shape[0]

print('Original expanded transitions: %i' % expanded_size_orig)
print('Removed %i duplicate transitions' % (expanded_size_orig-expanded_size_final))
print('Final expanded of transitions: %i' % expanded_size_final)
    
# Drop duplicate transitions

Original transitions: 4847


  0%|          | 0/4847 [00:00<?, ?it/s]

Skipping transition 77
Skipping transition 97
Skipping transition 126
Skipping transition 154
Skipping transition 190
Skipping transition 191
Skipping transition 203
Skipping transition 251
Skipping transition 258
Skipping transition 259
Skipping transition 265
Skipping transition 298
Skipping transition 304
Skipping transition 317
Skipping transition 340
Skipping transition 348
Skipping transition 356
Skipping transition 375
Skipping transition 378
Skipping transition 408
Skipping transition 411
Skipping transition 419
Skipping transition 421
Skipping transition 426
Skipping transition 432
Skipping transition 443
Skipping transition 446
Skipping transition 451
Skipping transition 453
Skipping transition 488
Skipping transition 510
Skipping transition 525
Skipping transition 579
Skipping transition 595
Skipping transition 630
Skipping transition 654
Skipping transition 655
Skipping transition 677
Skipping transition 678
Skipping transition 681
Skipping transition 692
Skipping transitio

Skipping transition 4738
Skipping transition 4762
Skipping transition 4772
Skipping transition 4773
Skipping transition 4777
Skipping transition 4781
Skipping transition 4792
Skipping transition 4823
Original xpanded transitions: 16461
Removed 6 duplicate transitions
Final expanded of transitions: 16455


Upload expanded transitions to database

In [51]:
len(expanded_transitions)

104729

In [48]:
pd.DataFrame(expanded_transitions, columns=trans_cols)

Unnamed: 0,origActor,origTarget,newActor,newTarget
0,67,2970,0,2966
1,235,1890,1892,0
2,441,445,441,444
3,0,2671,2539,2665
4,239,335,327,291
...,...,...,...,...
16456,59,3288,59,106
16457,684,-1,858,0
16458,4434,4426,0,4467
16459,4434,4453,0,4467


## Search through Tech tree

Find naturally-occurring objects -- these are the roots of the Tech tree:

In [None]:
natural_obj_df = objects[objects['mapChance'] > 0][['id', 'name']]
natural_objs = natural_obj_df.id

len('Seeding tree with %i naturally-occurring objects' % len(natural_objs))
natural_obj_df

Helper function: Find all transitions where a given object is an ingredient

In [None]:
# Initalize with natural objects
ingredients = natural_objs
previously_visited = natural_objs

# o = 33
# transitions[(transitions.origActor == o) | (transitions.origTarget == o)]
