# Build Tech Tree

This file turns the transition data into a tree strucutre and calculates the depth of each object in the tech tree.

In [85]:
import os, re, glob, json
from os.path import join as opj
import numpy as np
import pandas as pd
import json
import networkx as nx
import pickle

import ohol_transitions as trans
import ohol_categories as cat
import ohol_objects as obj

## Find object and transition files:

Transitions:

In [2]:
gsearch = lambda *args: glob.glob(opj(*args))
data_dir = '../../OneLifeData7/'
trans_dir  = opj(data_dir, 'transitions')
trans_files = gsearch(trans_dir, '*.txt')

print(*trans_files[:5], sep='\n')

../../OneLifeData7/transitions/-1_2574.txt
../../OneLifeData7/transitions/0_702.txt
../../OneLifeData7/transitions/314_235.txt
../../OneLifeData7/transitions/2165_2165.txt
../../OneLifeData7/transitions/0_1692.txt


Objects:

In [3]:
obj_dir = opj(data_dir, 'objects')
obj_files = gsearch(obj_dir, '*txt')

print(*obj_files[:5], sep='\n')

../../OneLifeData7/objects/3644.txt
../../OneLifeData7/objects/1053.txt
../../OneLifeData7/objects/1735.txt
../../OneLifeData7/objects/3122.txt
../../OneLifeData7/objects/2228.txt


## Helper Functions:

In [20]:
##helper function that finds the ingredients given the product from the transition dictionary
def find_ingredients(value):
    ingredient_list = []
    for idx in d.items():
        for item in d[idx[0]].items():
            if item[0] != 'category':
                if value in item[1]:
                    ingredient_list.append(item[0])
    return list(set(ingredient_list))       


In [21]:
##helper function that returns the category for a given object
def find_parent(mydict, child):
    for i in mydict.keys():
        if child in mydict[i]:
            return i

In [22]:
def split_dataframe_rows(df,column_selectors):
    # we need to keep track of the ordering of the columns
    def _split_list_to_rows(row,row_accumulator,column_selector):
        split_rows = {}
        max_split = 0
        for column_selector in column_selectors:
            split_row = row[column_selector]
            split_rows[column_selector] = split_row
            if len(split_row) > max_split:
                max_split = len(split_row)
        for i in range(max_split):
            new_row = row.to_dict()
            for column_selector in column_selectors:
                try:
                    new_row[column_selector] = split_rows[column_selector].pop(0)
                except IndexError:
                    new_row[column_selector] = ''
            row_accumulator.append(new_row)

    new_rows = []
    df.apply(_split_list_to_rows,axis=1,args = (new_rows,column_selectors))
    new_df = pd.DataFrame(new_rows, columns=df.columns)
    return new_df

In [106]:
##Helper function that finds the key associated with a certain value in the dictionary
def find_value(dictionary, value):
    mylist = []
    for item in dictionary.items():
        if value == item[1]:
            mylist.append(item[0])
    return mylist

In [24]:
def cat(name):
    if name in cat_dict:
        return True
    else:
        return False

## Parse Categories and Objects

Build a dictionary of {object: mapChance} pairs, and identify category names

In [15]:
str_extract = lambda pattern, s: re.search(pattern, s).group(0)
int_extract = lambda pattern, s: int(str_extract(pattern, s))
cat_names = {}
obj_dict = {}
all_obj = {}
for o in obj_files:
    is_obj = re.search('nextObjectNumber|groundHeat', o) is None
    
    if is_obj:
        o_num = int_extract('[0-9]+(?=.txt)', o)
        o_data = obj.read_obj(o_num)
        obj_dict[o_num] = o_data['mapChance']
        if '@' in o_data['name'] :
            cat_names[o_num] = o_data['name']
        else:
            all_obj[o_num] = o_data['name']


Build a dictionary of {category: children} pairs

In [16]:
l = os.listdir('../../OneLifeData7/categories')
cat_arr = [int_extract('[0-9]+(?=.txt)', o) for o in l]
len(cat_arr)

230

In [17]:
cat_arr[1]

3687

In [18]:
perhaps_list = list()
for i in all_obj.values():
    if 'Perhaps' in i:
        perhaps_list.append(find_parent(all_obj, i))
perhaps_list

[2328, 3221, 3233, 2095, 2811]

In [19]:
#update: if parent is not a @category name, add parent name to child list
cat_dict = {}
for i in cat_arr:
    children = cat.is_cat(i)
    if (i in cat_names):
        cat_dict[i] = children  
    else:
        cat_dict[i] = list(np.append(i, children))
          
    
len(cat_dict)

230

determine natural objects

In [25]:
nat_obj = []
for item in obj_dict.items():
    if item[1] != 0 :
        nat_obj.append(item[0])

In [26]:
for o in nat_obj:
    print('%i: %s' % (o, obj.obj_name(o)))

3888: Arctic Expert Way Stone - +biomeSet3 gridPlacement20 &20 &p3 &p0 +expertFind
706: Ice Hole
100: White Pine Tree with Needles
713: Indigo
707: Antarctic Fur Seal
2765: Sugarcane
2567: Sand Deposit
2174: Turkey
714: Rose Madder
1323: Wild Boar
674: Limestone
1874: Wild Mango Tree
4272: Rubber Tree with Pepper Vine
729: Alum
703: Penguin
942: Muddy Iron Vein - gridPlacement40 &40
63: Maple Tree -Branch
1435: Bison
764: Rattle Snake
2504: Malachite
49: Juniper Tree
2466: Dark Nosaj - +normalOnly
1184: Wild Squash Plant
161: Rabbit Hole -hiding &single
1157: Wild Bean Plant
4239: Seeding Wild Dill
65: Lombardy Poplar Tree -Branch
760: Dead Tree
3030: Natural Spring - gridPlacement40 evePrimaryLoc
1140: Wild Potato
99: White Pine Tree
761: Barrel Cactus
211: Fertile Soil Deposit
1020: Snow Bank
2515: Calamine
630: Bear Cave
3001: Electrum Ore
2135: Rubber Tree
418: Wolf
4221: Wild Cucumber Plant
791: Monolith
1013: Wild Rose with Fruit
805: Wild Onion
50: Milkweed
804: Burdock
769: Wil

## Parse transitions

In [27]:
trans_keys = ['origActor', 'newActor', 'origTarget', 'newTarget']
trans_list = []
for f in trans_files:
    trans_dict = trans.read_transition(f)
    trans_objs = [trans_dict[k] for k in trans_keys]
    
    # Are any of these categories?
    trans_cats = [cat(o) for o in trans_objs]
    trans_cat_str = ''.join(re.findall('[A-Z]', str(trans_cats)))
    trans_dict['isCat'] = trans_cat_str
    
    trans_list.append(trans_dict)

Assemble into dataframe:

In [99]:
trans_df = pd.DataFrame(trans_list)
trans_df['isDecay'] = trans_df['autoDecaySeconds'] > 0
trans_df = trans_df[['origActor', 'origTarget', 'newActor', 'newTarget', 'isDecay', 'isTool', 'isCat']]

trans_df.head()

Unnamed: 0,origActor,origTarget,newActor,newTarget,isDecay,isTool,isCat
0,-1,2574,0,2578,True,False,FFFF
1,0,702,425,695,False,False,FFFF
2,314,235,0,317,False,True,FFFF
3,2165,2165,235,3699,False,True,FFFF
4,0,1692,1719,1706,False,False,FFFF


Count each type of category transition:

In [23]:
cat_counts = trans_df.groupby('isCat')['isTool'].agg('count').reset_index()
cat_counts = cat_counts.rename(columns = {'isTool': 'n'})

Check examples of category transitions:

In [24]:
cat_trans = trans_df.groupby('isCat').first().reset_index()
cat_trans = pd.merge(cat_counts, cat_trans, on = 'isCat')
cat_trans = cat_trans.sort_values('n', ascending = False).reset_index(drop=True)
cat_trans

Unnamed: 0,isCat,n,origActor,origTarget,newActor,newTarget,isDecay,isTool
0,FFFF,3818,-1,2574,0,2578,True,False
1,TTFF,185,1127,2883,1127,2879,False,True
2,FFTT,147,516,519,520,519,False,False
3,FFTF,76,209,412,210,512,False,False
4,TFFF,55,853,3374,852,1465,False,False
5,TFFT,38,324,-1,239,322,False,True
6,FTTF,25,0,1069,969,1012,False,False
7,FFFT,20,812,665,0,2233,False,True
8,TTTT,13,394,1802,394,1806,False,True
9,FTFF,7,568,63,1829,63,False,False


## Build Tech Tree and calculate Depth

### Build a dataframe of transitions:

In [28]:
tech_df = pd.DataFrame()
for i in trans_files:
    a = trans.read_transition(opj(trans_dir, i))
    tech_df = pd.concat([tech_df, pd.DataFrame.from_records(a, index=[0])], sort=True)
tech_df.head()

Unnamed: 0,actorMinUseFraction,autoDecaySeconds,desiredMoveDist,isTool,lastUseActor,lastUseTarget,move,newActor,newActorName,newTarget,newTargetName,noUseActor,noUseTarget,origActor,origActorName,origTarget,origTargetName,reverseUseActor,reverseUseTarget,targetMinUseFraction
0,0.0,10,1.0,False,False,False,0.0,0,Empty,2578,Cool Glass,0.0,0.0,-1,Empty,2574,Molten Glass,0.0,0.0,0.0
0,0.0,0,1.0,False,False,False,0.0,425,Wolf Skin,695,Wolf Crown,,,0,Empty,702,Wolf Crown with Wolf Skin,0.0,0.0,0.0
0,,0,,True,False,False,,0,Empty,317,Crucible with Iron,,,314,Wrought Iron,235,Clay Bowl - empty,,,
0,0.0,0,1.0,True,False,False,0.0,235,Clay Bowl - empty,3699,Bowl with Raw Rubber Balls,0.0,0.0,2165,Bowl with Raw Rubber Ball,2165,Bowl with Raw Rubber Ball,0.0,1.0,0.0
0,0.0,0,1.0,False,False,False,0.0,1719,Hungry Schnauser Puppy - held,1706,Schnauser with Puppies -2,0.0,0.0,0,Empty,1692,Schnauser with Puppies -3,0.0,0.0,0.0


Only keeps origActor, origTarget, newActor, newTarget

In [29]:
tech_tree_df = tech_df[['origActor','origTarget','newActor','newTarget']].reset_index(drop = True)
tech_tree_df.head()

Unnamed: 0,origActor,origTarget,newActor,newTarget
0,-1,2574,0,2578
1,0,702,425,695
2,314,235,0,317
3,2165,2165,235,3699
4,0,1692,1719,1706


### Expand on categories

In [30]:
tech_tree_newdf = tech_tree_df.copy().astype('object')
for i in range(0,len(tech_tree_newdf)):
    for j in ['origActor','origTarget','newActor','newTarget']:
        curr_cell = tech_tree_df.iloc[i][j]
        if curr_cell in cat_dict:
            
            tech_tree_newdf.at[i,j] = list(cat_dict[curr_cell])


In [31]:
tech_tree_newdf = tech_tree_newdf.query('(newActor != 0) or (newTarget != 0)') #get rid of products = [0,0]

In [32]:
tech_tree_newdf.loc[tech_tree_newdf.newTarget == tech_tree_newdf.origActor, 'newTarget'] = ''
tech_tree_newdf.loc[tech_tree_newdf.newTarget == tech_tree_newdf.origTarget, 'newTarget'] = ''
tech_tree_newdf.loc[tech_tree_newdf.newActor == tech_tree_newdf.origTarget, 'newActor'] = ''
tech_tree_newdf.loc[tech_tree_newdf.newActor == tech_tree_newdf.origActor, 'newActor'] = ''

In [33]:
tech_tree_newdf.head(n = 20)

Unnamed: 0,origActor,origTarget,newActor,newTarget
0,-1,2574,0.0,2578
1,0,702,425.0,695
2,314,235,0.0,317
3,2165,2165,235.0,3699
4,0,1692,1719.0,1706
5,0,3085,2228.0,3086
6,-1,2206,0.0,2208
7,455,504,0.0,811
8,1619,2563,0.0,2564
9,0,1686,1717.0,1696


In [34]:
len(tech_tree_newdf)

4302

In [35]:
tech_df.query('(newTarget in @perhaps_list) or (newActor in @perhaps_list)')

Unnamed: 0,actorMinUseFraction,autoDecaySeconds,desiredMoveDist,isTool,lastUseActor,lastUseTarget,move,newActor,newActorName,newTarget,newTargetName,noUseActor,noUseTarget,origActor,origActorName,origTarget,origTargetName,reverseUseActor,reverseUseTarget,targetMinUseFraction
0,0.0,60,1.0,False,False,False,0.0,0,Empty,3221,Perhaps a Pumpkin,0.0,0.0,-1,Empty,1195,Blooming Squash Plant,0.0,0.0,0.0
0,0.0,60,1.0,False,False,False,0.0,0,Empty,3233,Perhaps a Squash,0.0,0.0,-1,Empty,3232,Blooming Pumpkin Plant,0.0,0.0,0.0
0,0.0,5,1.0,False,False,False,0.0,0,Empty,2095,Perhaps a Fish,0.0,0.0,-1,Empty,2101,Cast Fishing Pole,0.0,0.0,0.0
0,0.0,10,1.0,False,False,False,0.0,0,Empty,2328,Perhaps Oil,0.0,0.0,-1,Empty,2303,Firing Oil Drilling Rig,0.0,0.0,0.0
0,0.0,10,1.0,False,False,False,0.0,0,Empty,2328,Perhaps Oil,0.0,0.0,-1,Empty,2331,Firing Oil Drilling Rig - extended,0.0,0.0,0.0
0,0.0,5,1.0,False,False,False,0.0,0,Empty,2811,Perhaps some Shrimp,0.0,0.0,-1,Empty,2810,Cast Net,0.0,0.0,0.0


In [36]:
mydf = tech_tree_newdf.copy()
edge_cases = []
newdf = pd.DataFrame(columns = ['origActor','origTarget','newActor','newTarget'])
for i in range(0,len(mydf)):
    curr_line = mydf.iloc[i]
    target_columns = []
    for j in ['origActor','origTarget','newActor','newTarget']:
        if type(curr_line[j]) is list:
            target_columns = np.append(target_columns,j)
            
    #nothing in the equation has a category id       
    if (len(target_columns) == 0):
        newdf = newdf.append(curr_line.to_frame().T) #append to dataframe
        
    #category id only in product: category id is actually item id
    elif (len(target_columns) == 1) and ((target_columns=='newTarget') or (target_columns=='newActor')):
        if (target_columns=='newTarget'): #if the product is in target
            if curr_line['newTarget'][0] in perhaps_list: #special cases when the product is 'Perhaps xxx'
                curr_line['newTarget'] = curr_line['newTarget'][1:]
                for k in curr_line.values: #first turn every cell into a list obj
                    if type(k) is int:
                        k = [k]
                sub = split_dataframe_rows(curr_line.to_frame().T, target_columns) #split row
                newdf = newdf.append(sub)#append to dataframe
            else: #normal cases
                curr_line['newTarget'] = curr_line['newTarget'][0] #only keep the first element of the list
                newdf = newdf.append(curr_line.to_frame().T) #append to dataframe
        elif (target_columns=='newActor'): #if the product is in actor
            curr_line['newActor'] = curr_line['newActor'][0]
            newdf = newdf.append(curr_line.to_frame().T) #append to dataframe
        
    #more categories in equation and they match in lengths
    else:
        length = len(curr_line[target_columns[0]])
        if all(len(lst) == length for lst in curr_line[target_columns]): #check that they are of the same lengths
            for k in curr_line.values: #first turn every cell into a list obj
                if type(k) is int:
                    k = [k]
            sub = split_dataframe_rows(curr_line.to_frame().T, target_columns) #split row
            newdf = newdf.append(sub)#append to dataframe
        else:
            edge_cases = np.append(edge_cases, i) #edge cases are not included in newdf
        


In [37]:
newdf = newdf.query('(newActor != "") or (newTarget != "")') #remove situations where nothing is produced

In [38]:
len(newdf)

8160

In [39]:
edge = tech_tree_newdf.iloc[edge_cases].copy()
edge = edge.query('newTarget != 1947') #ignore the cards for now
edge

Unnamed: 0,origActor,origTarget,newActor,newTarget
275,"[210, 382]","[1802, 1803, 1804, 1805, 1872, 2723, 3069, 4311]",,"[1806, 1809, 1808, 1807, 1873, 2724, 3070, 4312]"
407,"[210, 382]","[1790, 1791, 1799, 1798, 1868, 2721, 3071, 4308]",,"[1792, 1793, 1801, 1800, 1869, 2722, 3072, 4309]"
438,"[210, 382]","[1034, 1056, 1055]",,"[1035, 1057, 1058]"
727,"[135, 560]",1015,,"[1016, 1025, 1026]"
1834,"[139, 850, 857]","[1136, 1101]",,213
1973,59,"[2226, 2245]",0.0,"[2220, 2240, 2270, 2280, 2314, 2305, 2312, 235..."
2443,"[71, 334, 34, 560]","[2982, 2986, 2985]",,2957
2569,"[912, 1000]","[977, 978, 979, 980, 981, 982, 983, 984]",917.0,
2595,"[210, 382]","[157, 158, 159, 231]",,127
3448,"[210, 382]","[1017, 1042, 1045]",,"[1018, 1043, 1046]"


In [40]:
cols = ['origActor','origTarget','newActor','newTarget']
edge_df = pd.DataFrame(columns = cols)

for j in range(0, len(edge)):
    lengths = [1,1,1,1]
    curr_line = edge.iloc[j]
    for i in range(0,len(curr_line.values)):
        if type(curr_line[i]) is list:
            lengths[i] = len(curr_line[i])

    num_repeat = np.product(np.unique(lengths))
    mat = -np.ones([num_repeat, 4]).astype('object')
    for i in range(0,4):
        if lengths[i] != max(lengths):
            mat[:,i] = np.repeat(curr_line[i], num_repeat/lengths[i])
        else:
            mat[:,i] = curr_line[i]*int(num_repeat/lengths[i])
    sub = pd.DataFrame(mat, columns = cols)
    edge_df = edge_df.append(sub)

In [41]:
edge_df.head(n= 10)

Unnamed: 0,origActor,origTarget,newActor,newTarget
0,210,1802,,1806
1,210,1803,,1809
2,210,1804,,1808
3,210,1805,,1807
4,210,1872,,1873
5,210,2723,,2724
6,210,3069,,3070
7,210,4311,,4312
8,382,1802,,1806
9,382,1803,,1809


In [42]:
newdf = newdf.append(edge_df)
len(newdf)

8306

## Calculate Depth

make a list of items

In [43]:
max_item_id = max(list(all_obj.keys()))
print(max_item_id)

4347


In [44]:
#replace negative values
newdf = newdf.replace(-1, max_item_id+1)
newdf = newdf.replace(-2, max_item_id+2)

In [45]:
#make a list of all objects
items = np.arange(max_item_id+3)
print(max(items))

4349


### create a nested dictionary of ingredients - products pair

In [46]:
#dictionary problem has been fixed. Ingredients will not override
#Update: also expands on situations where a category is in one side of the transition
d = {}
for item in all_obj:
    d[item] = {}
    recipy = newdf.query('(origActor == @item) or (origTarget == @item)')

    if len(recipy):
        for j in range(0,len(recipy)):
            ingredients = recipy.iloc[j].tolist()[0:2]
            ingredients.remove(item)
            ingredient = ingredients[0]
            products = recipy.iloc[j].tolist()[2:4]
            if "" in products:
                products.remove("")
            if ingredient in d[item].keys(): #prevent override
                d[item][ingredient] = list(map(int, set(np.append(d[item][ingredient], products))))
            else:
                d[item][ingredient] = products         
                

### loop over all pairs

In [154]:
#initialize depth
orig_depth = np.inf
depth = {}
transition_from = {} #this calculates the shortest path
all_products = {} #this calculates all paths
for item in items:
    if item in nat_obj:
        depth[item] = 0
        transition_from[item] = [item]
    else:
        depth[item] = orig_depth
        transition_from[item] = []
    all_products[item] = []
depth[0] = 0 #empty hand
depth[len(depth)-1] = 0 #empty ground
depth[len(depth)-2] = 0 #empty (for animals)

In [155]:
root_items = nat_obj
order_list = []
adj_dict = {}
while len(root_items):
    #take an item from root and remove it
    item = root_items[0]
    root_items = np.delete(root_items,0)

    #put all products associated with this item into root
    for key in d[item]: #looping through other ingredients
        #calculate the depths of the products
        for product in d[item][key]:

            #if this product is made of known-depth ingredients, add this product to root_items list
            if depth[product] > depth[item] + depth[key] + 1:
                depth[product] = depth[item] + depth[key] + 1  
                
                all_products[key] = np.append(all_products[key],product)
                all_products[item] = np.append(all_products[item],product)
                transition_from[product] = product
                transition_from[product] = np.append(transition_from[product], list(set(transition_from[key])))
                transition_from[product] = np.append(transition_from[product], list(set(transition_from[item])))
#                 print(transition_from[product])
                adj_dict[product] = [item, key]

                root_items = np.append(root_items, product)
                order_list = np.append(order_list,product)
                

In [88]:
with open('adj.p', 'wb') as fp:
    pickle.dump(adj_dict, fp, protocol=pickle.HIGHEST_PROTOCOL)

In [143]:
depth.values()

dict_values([0, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, 0, 1, 0, 0, 1, 5, 0, inf, inf, 2, 3, inf, inf, inf, inf, 1, inf, inf, 1, 0, 0, 1, 2, 1, 2, 3, 2, 1, 3, 7, inf, 1, 2, 0, 1, 0, 1, 3, 3, 5, 13, 15, 17, 11, 17, 21, inf, 24, 26, 28, 27, inf, 45, 63, inf, 46, 47, 35, 36, 37, inf, inf, 11, inf, inf, inf, 1, inf, inf, 0, 0, inf, inf, inf, inf, inf, 13, 7, 9, 10, 11, 25, 23, 24, 19, 29, 30, 31, inf, 30, inf, 0, 3, 2, 3, 0, 1, 5, 11, inf, inf, 3, 1, 0, 5, 3, 0, 3, 2, 3, 7, 1, 3, 2, 3, inf, 7, 13, 21, 15, 2, 11, 33, 0, 23, 21, 22, 17, 15, 16, 15, 0, 17, 20, 3, 2, 1, 5, 26, 25, inf, 29, 28, 4, 27, 30, 28, 31, 29, 32, 29, 34, 33, 34, 38, 38, 85, 84, inf, 88, 87, 92, 96, 6, 7, 30, 31, 86, 276, 166, 131, 201, 236, 136, 69, 104, 139, 39, 179, 135, 136, 0, 277, 13, 18, 118, 19, 119, 119, 120, 4, inf, inf, 1, 2, 3, 5, 6, 15, 115, 116, 6, inf, 2, 3, 98, 99, 12, 18, 7, 11, 98, 0, 117, 3, 104, in

In [156]:
not_updated = find_value(depth, np.inf)
len(not_updated)

1009

In [145]:
transition_from

{0: [],
 1: [],
 2: [],
 3: [],
 4: [],
 5: [],
 6: [],
 7: [],
 8: [],
 9: [],
 10: [],
 11: [],
 12: [],
 13: [],
 14: [],
 15: [],
 16: [],
 17: [],
 18: [],
 19: [],
 20: [],
 21: [],
 22: [],
 23: [],
 24: [],
 25: [],
 26: [],
 27: [],
 28: [],
 29: [],
 30: [30],
 31: array([ 31.,  30.]),
 32: [32],
 33: [33],
 34: array([34, 33, 32]),
 35: array([  35.,   32.,   33.,   34.,  133.,  135.,  150.,   30.,   31.]),
 36: [36],
 37: [],
 38: [],
 39: array([39, 36, 32, 33, 34]),
 40: array([ 40.,  32.,  33.,  34.,  36.,  39.]),
 41: [],
 42: [],
 43: [],
 44: [],
 45: array([ 45.,  65.]),
 46: [],
 47: [],
 48: array([ 48.,  63.]),
 49: [49],
 50: [50],
 51: array([ 51.,  50.]),
 52: array([ 52.,  50.,  51.]),
 53: array([ 53.,  50.]),
 54: array([ 54.,  50.,  51.]),
 55: array([ 55.,  50.,  51.,  52.]),
 56: array([ 56.,  50.,  53.]),
 57: array([ 57.,  50.]),
 58: array([ 58.,  57.,  50.,  57.,  50.]),
 59: array([ 59.,  57.,  58.,  50.,  57.,  58.,  50.]),
 60: [],
 61: array([ 61.

In [157]:
emp = pd.DataFrame(columns = ['id','name','num_imme_products'])
for i in all_obj:
    emp = emp.append({'id':i, 'name':all_obj[i], 'num_imme_products':len(all_ingredients[i])}, ignore_index = True)

In [158]:
emp.sort_values(by = 'num_imme_products', ascending = False)

Unnamed: 0,id,name,num_imme_products
2063,235,Clay Bowl - empty,145
1992,382,Bowl of Water - +contFoodDish,78
1555,59,Rope,56
2837,34,Sharp Stone,55
2846,135,Flint Chip,45
2331,568,Shears - +tool,43
1653,210,Full Water Pouch,41
3415,33,Stone,39
577,462,Steel Adze - +tool,39
3597,441,Smithing Hammer - +tool,36


turn values to list and remove the product itself from its ingredients

In [96]:
for key in transition_from.keys():
    transition_from[key] = list(set(transition_from[key]))
    transition_from[key] = [int(i) for i in transition_from[key] if (i != key)]

In [139]:
len(transition_from[248])

30

calculate empowerment of the object (defined as the number of objects that can be made with this object)

In [128]:
# empowerment = {}
# empowerment_df = pd.DataFrame(columns = ['id','name','num_products'])
# for item in all_obj:
#     products = []
#     for key in all_ingredients:
#         if item in all_ingredients[key]:
#             products = np.append(products, key)
#     empowerment[item] = products
#     empowerment_df = empowerment_df.append({'id':item, 'name':all_obj[item], 'num_products':len(products)}, ignore_index=True)

## Save to csv

In [147]:
with open('tech_outputs/depth.csv', 'w') as f:
    f.write("Id,Depth,Name\n")
    for key in depth.keys():
        if key in all_obj:
            f.write("%s,%s,%s\n"%(key, depth[key], all_obj[key]))

In [115]:
with open('tech_outputs/num_unique_ingredients.csv', 'w') as f:
    f.write("id,name,num_ingredients\n")
    for key in transition_from.keys():
        if key in all_obj:
            f.write("%s,%s,%s\n"%(key, all_obj[key], len(transition_from[key])))

In [116]:
with open('tech_outputs/ingredients.p', 'wb') as fp:
    pickle.dump(transition_from, fp, protocol=pickle.HIGHEST_PROTOCOL)

In [116]:
with open('tech_outputs/products.p', 'wb') as fp:
    pickle.dump(empowerment, fp, protocol=pickle.HIGHEST_PROTOCOL)

In [160]:
emp.to_csv('tech_outputs/empowerment.csv')

## Check:

In [121]:
with open('tech_outputs/ingredients.p', 'rb') as fp:
    data = pickle.load(fp)

In [162]:
emp_df = pd.read_csv('tech_outputs/empowerment.csv', index_col = 0)
emp_df = emp_df.sort_values(by = 'num_imme_products', ascending = False).reset_index(drop = True)
emp_df.head(n = 10)

Unnamed: 0,id,name,num_imme_products
0,235,Clay Bowl - empty,145
1,382,Bowl of Water - +contFoodDish,78
2,59,Rope,56
3,34,Sharp Stone,55
4,135,Flint Chip,45
5,568,Shears - +tool,43
6,210,Full Water Pouch,41
7,33,Stone,39
8,462,Steel Adze - +tool,39
9,441,Smithing Hammer - +tool,36


In [119]:
trans_df = pd.read_csv('tech_outputs/num_unique_ingredients.csv')
trans_df.head()

Unnamed: 0,id,name,num_ingredients
0,11,Skin Tone A &B &C &D &E &F,0
1,19,Female001 D,0
2,30,Wild Gooseberry Bush,0
3,31,Gooseberry,1
4,32,Big Hard Rock,0


In [120]:
sorted_df = trans_df.sort_values(by = 'num_ingredients', ascending = False).reset_index(drop = True)
sorted_df.head()

Unnamed: 0,id,name,num_ingredients
0,2998,Camera taking Photograph - +photo,460
1,3000,Wound Loaded Camera,459
2,2999,Loaded Camera,458
3,2703,Running Crude Airplane on Landing Strip,407
4,2704,Crude Airplane on Landing Strip - +toolThe_Air...,406


In [71]:
for i in not_updated:
    try:
        print(obj.read_obj(str(i))['name'])
    except:
        SyntaxError

Skin Tone A &B &C &D &E &F
Female001 D
Milkweed Stump -Fertile
Male002 D
Female003 D
Female004 C
Male005 D
Female006 C
Male007 C
Male008 C
Touched Edge
Edge
@ Kindling Source
@ Pond Water Source
@ Full Portable Water Source
@ Carrot
@ Wet Canada Goose Pond
Jason Test
Long Play Phonorecord Album
Phonorecord Sleeve
Long Play Phonorecord
Phonograph
Playing Phonograph
@ Yew Bow
Flooded Pond
@ Food Burner
@ Edible Pie
@ Skinning Tool
@ Lamb Food Bowl
@ Small Trash
@ Planted Stakes
Parts
@ Unshot Grizzly Bear
@ Empty Portable Water Source
@ New Scrap Steel
@ Shallow Digger
@ Rough Cutter
@ Simmering Liquid
@ Partial Adobe Build
@ Stone Rubble Sources
Floppy Basket
@ Row Tiller
@ Non-tilling Skewer
Broken Basket
@ Decaying Hat
@ Decaying Shirt
@ Decaying Bottom
@ Decaying Shoe
@ Tillable Row
@ Decaying Basket
@ Broken Steel Source
@ Weak Chisel Breaker
Fence Rail
@ Fine Cutter
@ Weak Froe Breaker
@ Unlocked Sign
@ Locked Sign
@ Free Lock
Female009 D
Male010 D
Female011 C
Male012 C
Full Deep W

In [95]:
hardest_item = max([i for i in list(depth.values()) if i < np.inf])
myitem = find_value(depth, hardest_item, 0)[0]
tech_df.query('(newTarget == @myitem) or (newActor == @myitem)')

Unnamed: 0,actorMinUseFraction,autoDecaySeconds,desiredMoveDist,isTool,lastUseActor,lastUseTarget,move,newActor,newActorName,newTarget,newTargetName,noUseActor,noUseTarget,origActor,origActorName,origTarget,origTargetName,reverseUseActor,reverseUseTarget,targetMinUseFraction
0,0.0,0,1.0,True,False,False,0.0,2343,Tank of Kerosene,2703,Running Crude Airplane on Landing Strip,0.0,0.0,2343,Tank of Kerosene,2704,Crude Airplane on Landing Strip - +toolThe_Air...,0.0,0.0,0.0
0,0.0,0,1.0,True,True,False,0.0,2302,Empty Tank,2703,Running Crude Airplane on Landing Strip,0.0,0.0,2343,Tank of Kerosene,2704,Crude Airplane on Landing Strip - +toolThe_Air...,0.0,0.0,0.0


In [96]:
depth[myitem]

92

In [99]:
depth[151]

4

In [100]:
depth[2998]

90