In [256]:
import os, re, glob, json
from os.path import join as opj
import numpy as np
import pandas as pd

import networkx as nx

import ohol_transitions as transition
import ohol_categories as cat
import ohol_object as obj

Find object and transition files:

In [2]:
a = !pwd
baseDir = '/'.join(a[0].split('/')[0:-2])
baseDir

'/Users/dengwenning/Documents/GitHub'

Transitions:

In [3]:
gsearch = lambda *args: glob.glob(opj(*args))
data_dir = '../../OneLifeData7/'
trans_dir  = opj(data_dir, 'transitions')
trans_files = gsearch(trans_dir, '*.txt')

print(*trans_files[:5], sep='\n')

../../OneLifeData7/transitions/-1_2574.txt
../../OneLifeData7/transitions/0_702.txt
../../OneLifeData7/transitions/314_235.txt
../../OneLifeData7/transitions/2165_2165.txt
../../OneLifeData7/transitions/0_1692.txt


Objects:

In [4]:
obj_dir = opj(data_dir, 'objects')
obj_files = gsearch(obj_dir, '*txt')

print(*obj_files[:5], sep='\n')

../../OneLifeData7/objects/3644.txt
../../OneLifeData7/objects/1053.txt
../../OneLifeData7/objects/1735.txt
../../OneLifeData7/objects/3122.txt
../../OneLifeData7/objects/2228.txt


Build a dictionary of {object: mapChance} pairs, and identify grave object

In [5]:
str_extract = lambda pattern, s: re.search(pattern, s).group(0)
int_extract = lambda pattern, s: int(str_extract(pattern, s))
grave_list = []
obj_dict = {}
for o in obj_files:
    is_obj = re.search('nextObjectNumber|groundHeat', o) is None
    
    if is_obj:
        o_num = int_extract('[0-9]+(?=.txt)', o)
        o_data = obj.read_obj(o_num)
        if 'Grave' in o_data['name']:
            grave_list = np.append(grave_list, int(o_num))
        obj_dict[o_num] = o_data['mapChance']
#         craftable_dict[o_num] = o_data['name']

Build a dictionary of {category: children} pairs

In [6]:
l = os.listdir('../../OneLifeData7/categories')
cat_arr = [int_extract('[0-9]+(?=.txt)', o) for o in l]
len(cat_arr)

230

In [7]:
cat_tot = []
for j in cat_arr:
    curr_file = obj.read_obj_file(str(j))
    cat_name = [i for i in curr_file if '@' in i] 
    if len(cat_name):
        cat_id = curr_file[0].split('=')[1]
        cat_tot = np.append(cat_tot, cat_id)

In [9]:
cat_dict = {}
for i in cat_arr:
    children = cat.cat_children(i)
    cat_dict[i] = children
    
len(cat_dict)

230

### Build a dataframe of transitions:

In [14]:
tech_df = pd.DataFrame()
for i in trans_files:
    a = transition.read_transition(opj(trans_dir, i))
    tech_df = pd.concat([tech_df, pd.DataFrame.from_records(a, index=[0])], sort=True)
tech_df.head()

Unnamed: 0,actorMinUseFraction,autoDecaySeconds,desiredMoveDist,isTool,lastUseActor,lastUseTarget,move,newActor,newActorName,newTarget,newTargetName,noUseActor,noUseTarget,origActor,origActorName,origTarget,origTargetName,reverseUseActor,reverseUseTarget,targetMinUseFraction
0,0.0,10,1.0,False,False,False,0.0,0,Empty,2578,Cool Glass,0.0,0.0,-1,Empty,2574,Molten Glass,0.0,0.0,0.0
0,0.0,0,1.0,False,False,False,0.0,425,Wolf Skin,695,Wolf Crown,,,0,Empty,702,Wolf Crown with Wolf Skin,0.0,0.0,0.0
0,,0,,True,False,False,,0,Empty,317,Crucible with Iron,,,314,Wrought Iron,235,Clay Bowl - empty,,,
0,0.0,0,1.0,True,False,False,0.0,235,Clay Bowl - empty,3699,Bowl with Raw Rubber Balls,0.0,0.0,2165,Bowl with Raw Rubber Ball,2165,Bowl with Raw Rubber Ball,0.0,1.0,0.0
0,0.0,0,1.0,False,False,False,0.0,1719,Hungry Schnauser Puppy - held,1706,Schnauser with Puppies -2,0.0,0.0,0,Empty,1692,Schnauser with Puppies -3,0.0,0.0,0.0


Only keeps origActor, origTarget, newActor, newTarget

In [15]:
tech_tree_df = tech_df[['origActor','origTarget','newActor','newTarget']].reset_index(drop = True)
tech_tree_df.head()

Unnamed: 0,origActor,origTarget,newActor,newTarget
0,-1,2574,0,2578
1,0,702,425,695
2,314,235,0,317
3,2165,2165,235,3699
4,0,1692,1719,1706


### expand on categories

In [334]:
#Note this only expand on the situations where 1) all are categories and 2) both sides include categories
tech_tree_newdf = pd.DataFrame(columns = ['origActor','origTarget','newActor','newTarget'])
arr1 = []
arr2 = []
arr3 = []
arr4 = []


for i in tech_tree_df.itertuples():
    origActor = i[1]
    origTarget = i[2]
    newActor = i[3]
    newTarget = i[4]
    expanded = 0
    
    ##expand categories if both origActor and newActor or both origTarget and new Target are caetgories
    if (origActor in cat_dict) and (newActor in cat_dict) and (origTarget in cat_dict) and (newTarget in cat_dict) \
    and (newActor != origActor) and (origTarget != newTarget):
        arr1 = np.append(arr1, cat.cat_children(origActor))
        arr2 = np.append(arr2, cat.cat_children(origTarget))
        arr3 = np.append(arr3, cat.cat_children(newActor))
        arr4 = np.append(arr4, cat.cat_children(newTarget))
        expanded = 1
        
    elif (origActor in cat_dict) and (newActor in cat_dict) and (newActor != origActor):
        if len(cat.cat_children(origActor)) == len(cat.cat_children(newActor)):
            arr1 = np.append(arr1, cat.cat_children(origActor))
            arr3 = np.append(arr3, cat.cat_children(newActor))
            times = len(cat.cat_children(origActor))
            arr2 = np.append(arr2, np.repeat(origTarget, times))
            arr4 = np.append(arr4, np.repeat(newTarget, times))
            expanded = 1
        else:
            print(i)
            
    elif (origTarget in cat_dict) and (newTarget in cat_dict) and (origTarget != newTarget):
        if len(cat.cat_children(origTarget)) == len(cat.cat_children(newTarget)):
            arr2 = np.append(arr2, cat.cat_children(origTarget))
            arr4 = np.append(arr4, cat.cat_children(newTarget))
            times = len(cat.cat_children(origTarget))
            arr1 = np.append(arr1, np.repeat(origActor, times))
            arr3 = np.append(arr3, np.repeat(newActor, times))
            expanded = 1
        else:
            print(i)


    #keep this row if the above conditions are not met
    if not expanded:
        tech_tree_newdf = tech_tree_newdf.append(tech_tree_df.iloc[i[0]].copy(), ignore_index=True)

Pandas(Index=1973, origActor=59, origTarget=2226, newActor=0, newTarget=2220)


In [335]:
sub = pd.DataFrame({'origActor':arr1, 'origTarget':arr2, 'newActor':arr3, 'newTarget':arr4}).astype(int)
tech_tree_newdf = tech_tree_newdf.append(sub).reset_index(drop = True)
tech_tree_newdf.head()

Unnamed: 0,origActor,origTarget,newActor,newTarget
0,-1,2574,0,2578
1,0,702,425,695
2,314,235,0,317
3,2165,2165,235,3699
4,0,1692,1719,1706


In [336]:
arr2 = np.repeat(cat.cat_children(2226), len(cat.cat_children(2220)))
arr4 = cat.cat_children(2220)
arr1 = np.repeat(origActor, len(arr4))
arr3 = np.repeat(newActor, len(arr4))
tech_tree_newdf = tech_tree_newdf[tech_tree_newdf.origTarget != 2226]
tech_tree_newdf = tech_tree_newdf.append(pd.DataFrame({'origActor':arr1, 'origTarget':arr2,\
                                     'newActor':arr3, 'newTarget':arr4}).astype(int)).reset_index(drop = True)
tech_tree_newdf = tech_tree_newdf.astype(int)
tech_tree_newdf.head()

Unnamed: 0,origActor,origTarget,newActor,newTarget
0,-1,2574,0,2578
1,0,702,425,695
2,314,235,0,317
3,2165,2165,235,3699
4,0,1692,1719,1706


### Helper Functions:

In [330]:
##helper function that finds the ingredients given the product from the transition dictionary
def find_ingredients(value):
    ingredient_list = []
    for idx in d.items():
        for item in d[idx[0]].items():
            if item[0] != 'category':
                if value in item[1]:
                    ingredient_list.append(item[0])
    return list(set(ingredient_list))       


In [278]:
##Helper function that searches for the known ingredients of an object
#not useful here
def search_for_known_ingredients(product, roots):
    known = []
    ingredients = find_ingredients(product)
    for i in ingredients:
        if i in roots:
            known.append(i)
    return known

In [279]:
##Helper function that finds the key associated with (/smaller than) a certain value in the dictionary
def find_value(dictionary, value, multiple):
    mylist = []
    for item in dictionary.items():
        if multiple == 0:
            if value == item[1]:
                mylist.append(item[0])
        elif multiple == 1:
            if value > item[1]:
                mylist.append(item[0])
    return mylist

In [280]:
##helper function that returns the category for a given object
def find_parent(mydict, child):
    for i in mydict.keys():
        if child in mydict[i]:
            return i

In [318]:
##helpfer function that checks if there's any category id left in the dictionary
def check_dict():
    prob = []
    for item in d:
        for i in cat_dict:
            if i in list(d[item].keys()):
                prob.append(item)
    return prob

make a list of items

In [299]:
max_item_id = max(np.max(tech_tree_newdf).to_list())
#replace negative values
tech_tree_newdf = tech_tree_newdf.replace(-1, max_item_id+1)
tech_tree_newdf = tech_tree_newdf.replace(-2, max_item_id+2)
#make a list of all objects
items = np.arange(max_item_id+3)
print(max_item_id)

4347


### create a nested dictionary of ingredients - products pair

In [331]:
#dictionary problem has been fixed. Ingredients will not override
#Update: also expands on situations where a category is in one side of the transition
d = {}
transition_empt = {}
for item in items:
    transition_empt[item] = []
    if item in cat_dict:
        d[item] = {'category':cat_dict[item]} #category item indicated by key
    else:
        d[item] = {}
        recipy = tech_tree_newdf.query('(origActor == @item) or (origTarget == @item)')
        if len(recipy):
            for j in range(0,len(recipy)):
                ingredients = recipy.iloc[j].tolist()[0:2]
                ingredient = np.delete(ingredients, np.where(ingredients == item)[0][0])[0] 
                products =[]
                for p in recipy.iloc[j].tolist()[2:4]:
                    if (p!=item) and (p!=ingredient): #remove unchanged item from product
                        if p in cat_dict: #expand on categories in product: find the product associated with the item
                            products = np.append(products, cat_dict[p])
#                             print(products)
                        else:
                            products = np.append(products, p)
                        products = np.append(products, p)
                products = list(map(int, products))
                
                if ingredient in cat_dict: #expand on categories in key ingredient
                    all_ingredients = cat_dict[ingredient]
                else:
                    all_ingredients = [ingredient]
                for t in all_ingredients: 
                    #this is to prevent that transitions with the same ingredients but different products override
                    if t in d[item].keys():
                        d[item][t] = list(map(int, set(np.append(d[item][t], products))))
                    else:
                        d[item][t] = products          
                

### search for objects that cannot be crafted (with no ingredients)

In [337]:
#find objects with no ingredients
no_ingredients = []
new_child = []
for i in items:
    if i not in cat_dict: #it's not a category
        ingredients = find_ingredients(i)
        if len(ingredients) == 0: #does not have ingredients
            no_ingredients = np.append(no_ingredients, i)
no_ingredients = no_ingredients.astype(int)


len(no_ingredients)

778

### determine natural objects and add them to roots

In [333]:
nat_obj = []
for item in obj_dict.items():
    if item[1] != 0 :
        nat_obj.append(item[0])
roots = list(set(np.append(nat_obj, no_ingredients)))
len(roots)

825

In [288]:
for i in no_ingredients:
    try:
        print(obj.read_obj(str(i))['name'])
    except:
        SyntaxError

Skin Tone A &B &C &D &E &F
Female001 D
Big Hard Rock
Seeding Wild Carrot
Milkweed Stump -Fertile
Tule Reeds
Flint
Male002 D
Female003 D
Female004 C
Male005 D
Female006 C
Male007 C
Male008 C
Edge
Jason Test
Long Play Phonorecord Album
Phonograph
Flooded Pond
Willow Tree
Bald Cypress Tree
Parts
Gold Vein
Indigo
Rose Madder
Dead Tree
Monolith
Burdock
Wild Onion
Rag Hat
Rag Shirt
Rag Loincloth
Rag Shoe
Fence Rail
Female009 D
Male010 D
Female011 C
Male012 C
Wild Rose with Fruit
Full Deep Well - +famUse100
Full Shallow Well - +famUse100
Teosinte
Wild Potato
Raw Potato - just dug
Wild Bean Plant
Wild Squash Plant
Wild Cabbage
Wet Planted Cabbage Seed
Goose Beheaded
Canada Goose Pond with Egg
Tutorial Stone - tutorial 1
Tutorial Stone - tutorial 2
Tutorial Stone - tutorial 3
Tutorial Stone - tutorial 99 done
Tutorial Stone - tutorial 4
Tutorial Stone - tutorial 5
Tutorial Stone - tutorial 6
Tutorial Stone - tutorial 7
Tutorial Stone - tutorial 8
Tutorial Stone - tutorial 9
Tutorial Stone - tut

In [325]:
#make sure all categories are not in the transition dict (might forgot to remove some)
while len(check_dict()) != 0:
    for item in d:
        for i in cat_dict:
            if i in list(d[item].keys()):
                d[item].pop(i)

In [289]:
#very strange that still some categories are in here
for item in d:
    for j in list(d[item].values()):
        for k in j:
            if k in cat_dict:
                print([item,k])

[0, 969]
[0, 969]
[0, 969]
[0, 969]
[0, 969]
[0, 969]
[0, 1941]
[0, 1948]
[0, 1724]
[0, 1749]
[0, 1638]
[0, 1638]
[0, 1638]
[0, 1638]
[0, 1638]
[0, 1638]
[0, 1638]
[0, 1638]
[0, 1024]
[0, 1024]
[0, 778]
[0, 1036]
[0, 1037]
[0, 1036]
[0, 1037]
[0, 1724]
[0, 1601]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 1950]
[0, 778]
[0, 1206]
[0, 1206]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601]
[0, 1601

### loop over all pairs

In [290]:
#initialize depth
orig_depth = np.inf
depth = {}
for item in items:
    if item in roots:
        depth[item] = 0
    else:
        depth[item] = orig_depth
depth[0] = 0 #empty hand
depth[len(depth)-1] = 0 #empty ground

In [303]:
#approach 2 where ingredients and products are all expanded into items rather than category
root_items = roots
iterated_items = []
order_list = []
transition_from = transition_empt

while len(root_items):
    #take an item from root and remove it
    root_items = list(set(root_items)) #remove repeated items
    item = root_items[0]
    root_items = np.delete(root_items,0)
    while item in cat_dict:
        sub_item = cat_dict[item]
        root_items = np.append(root_items, sub_item)
        item = root_items[0]
        root_items = np.delete(root_items,0)
    iterated_items = np.append(iterated_items, item)
    

    #put all products associated with this item into root
    for key in d[item]: #looping through other ingredients
#         if key == 'category' :
#             keys = d[item][key]
#             kidx = np.argmin([depth[k] for k in keys])
#             key = keys[kidx]

        #calculate the depths of the products
        for product in list(set(d[item][key])):
            #if this product is made of known-depth ingredients, add this product to root_items list
            if depth[product] > (depth[item] + depth[key] + 1):
                depth[product] = depth[item] + depth[key] + 1
#                 if product in cat_dict: #if this is a category, push all its' children to root_items
#                     for c in cat_dict[product]:
#                         depth[c] = min(depth[c], depth[product])
#                         if c not in iterated_items: #avoid repeated iterating over the same item
#                             root_items = np.append(root_items, c)
#                 else:
#                     root_items = np.append(root_items, product)
#                     print(product, cat_dict[product])    

                #if the ingredients have NOT been used in one of the steps of producing the object
                if (item not in transition_from[product]):
                    transition_from[product] = np.append(transition_from[product],\
                                                         np.append(transition_from[item], item))
                if (key not in transition_from[product]):
                    transition_from[product] = np.append(transition_from[product],\
                                                         np.append(transition_from[key], key))
                root_items = np.append(root_items, product)
                order_list = np.append(order_list,product)
                

In [304]:
np.set_printoptions(threshold=np.inf)
len(order_list)

4874

In [305]:
depth.values()

dict_values([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 3, 0, 0, 0, 2, 2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 2, 1, 2, 3, 2, 1, 3, 6, 0, 1, 2, 0, 1, 0, 1, 1, 3, 2, 9, 11, 2, 10, 13, 15, 0, 18, 20, 8, 7, 0, 3, 6, 0, 4, 5, 15, 16, 17, 0, 0, 8, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 2, 4, 5, 6, 17, 15, 16, 13, 18, 19, 20, 0, 19, 0, 0, 3, 2, 3, 0, 1, 2, 10, 0, 0, 2, 1, 0, 2, 2, 0, 3, 2, 3, inf, 1, 3, 2, 2, 0, 5, inf, 2, 16, 2, 9, 13, 0, 12, 10, 11, 9, 7, 8, 2, 0, 4, 7, 3, 2, 1, 5, 12, 11, 0, 15, 14, 4, 13, 16, 14, 17, 15, 18, 15, 19, 18, 19, 22, 22, 27, 26, 0, 30, 29, 2, 6, 6, 7, 16, 17, 28, 110, 46, 26, 66, 86, 30, 39, 59, 79, 23, 103, 2, 2, 0, 111, 7, 9, 12, 9, 12, 13, 13, 3, 0, 0, 1, 2, 3, 5, 2, 9, 12, 13, 3, 0, 2, 3, 2, 2, 6, 9, 5, 9, 28, 0, 14, 3, 4, 0, 9, 5, 15, 16, 5, 8, 4, 22, 24, 24, 27, 7, 29, 9, 26, 25, 27, 11, 16, 21, 41, 19, 39, 36, 38, 33, 36, 53, 38, 55, 56, 58, 1, 0, 12, 18, 27, 8, 5, 31, 8, 40, 8, 2, 0, 2, 21, 

In [306]:
not_updated = find_value(depth, np.inf, 0)
len(not_updated)

439

In [307]:
for i in not_updated:
    print(obj.read_obj(str(i))['name'])

Tied Skewer
Featherless Arrow
Hot Forged Steel Crucible
Forged Steel Crucible
Cool Steel Crucible in Wooden Tongs
Crucible with Steel
@ Kindling Source
@ Pond Water Source
@ Full Portable Water Source
@ Carrot
@ Wet Canada Goose Pond
Attacking Wolf
Home Marker - eveHomeMarker
@ Yew Bow
@ Food Burner
@ Edible Pie
@ Skinning Tool
@ Lamb Food Bowl
@ Small Trash
@ Planted Stakes
@ Unshot Grizzly Bear
@ Empty Portable Water Source
@ New Scrap Steel
@ Shallow Digger
@ Rough Cutter
@ Simmering Liquid
Escaped Horse-Drawn Cart
@ Partial Adobe Build
@ Stone Rubble Sources
@ Row Tiller
@ Non-tilling Skewer
@ Decaying Hat
@ Decaying Shirt
@ Decaying Bottom
@ Decaying Shoe
@ Tillable Row
Locked Wooden Door - $10
Open Locked Wooden Door - $10
@ Decaying Basket
@ Broken Steel Source
@ Weak Chisel Breaker
Sapling with Cutting
Sapling Cutting
Dry Sapling Cutting
Wet Sapling Cutting
Domestic Sapling
Cut Domestic Sapling Skewer
Domestic Sapling with Cutting
@ Fine Cutter
@ Weak Froe Breaker
Locked Wooden

@ Westward Moving Cart Entered
@ Westward Entering Tracks
@ Northward Leaving Tracks
@ Northward Entering Tracks
@ Northward Moving Cart
@ Northward Moving Cart Leaving
@ Northward Moving Cart Entered
@ Southward Leaving Tracks
@ Southward Entering Tracks
@ Southward Moving Cart
@ Southward Moving Cart Leaving
@ Southward Moving Cart Entered
@ Partially Dug track
@ Non-Dug track
@ Bendable Track
@ Non-Bendable Track
Shears with Red Rose
Shears with White Rose
Shears with Pink Rose
Shears with Blue Rose
Shears with Black Rose
Shears with Green Rose
Shears with Yellow Rose
Green Rose - just dropped
Yellow Rose - just dropped
Black Rose - just dropped
Blue Rose - just dropped
Black Plaster Wall - +causeAutoOrient
Black Plaster Wall - +causeAutoOrient
Green Plaster Wall - +causeAutoOrient
Green Plaster Wall - +causeAutoOrient
@ New Scrap Copper
Fetching German Shepherd
Fetching German Shepherd - done
Fetching German Shepherd - A
Fetching German Shepherd - B
Shaky Property Fence Box - +hori

In [308]:
empt = []
for i in transition_from.keys():
    if len(transition_from[i]) == 0:
        if (i not in roots) and (i not in cat_dict) and (i not in grave_list):
            empt = np.append(empt, i)
empt = empt.astype(int)
len(empt)

271

In [310]:
most_ingredients = np.argmax([len(transition_from[i]) for i in transition_from.keys()])
len(transition_from[most_ingredients])

562583

In [312]:
hardest_item = max([i for i in list(depth.values()) if i < np.inf])
myitem = find_value(depth, hardest_item, 0)[0]
tech_df.query('(newTarget == @myitem) or (newActor == @myitem)')

Unnamed: 0,actorMinUseFraction,autoDecaySeconds,desiredMoveDist,isTool,lastUseActor,lastUseTarget,move,newActor,newActorName,newTarget,newTargetName,noUseActor,noUseTarget,origActor,origActorName,origTarget,origTargetName,reverseUseActor,reverseUseTarget,targetMinUseFraction
0,0.0,0,1.0,True,False,False,0.0,0,Empty,4208,Northwestward Gradient Dry Tarry Spot,0.0,0.0,4207,Southwestward Gradient Dry Tarry Spot,4207,Southwestward Gradient Dry Tarry Spot,0.0,0.0,0.0


In [313]:
myitem

4208

In [314]:
depth[myitem]

353023