# Identify family interactions based on if they shared the same objects

In [1]:
%matplotlib inline

import os, re, glob, datetime, json
from os.path import join as opj
import pandas as pd
import numpy as np
import scipy.stats
from tqdm import tqdm
from datetime import datetime

from tqdm.notebook import tqdm
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from networkx.drawing.nx_agraph import graphviz_layout

## load data

In [2]:
baseDir = '../data/publicMapChangeData/bigserver2.onehouronelife.com/'

In [3]:
start = 1573982073

read all mapChange data

In [4]:
str_extract = lambda pattern, s: re.search(pattern, s).group(0)
int_extract = lambda pattern, s: int(str_extract(pattern, s))

In [5]:
files_tot = []
for ts in glob.glob(baseDir + '*'):
    files_tot.append(ts)

In [6]:
file_names = []
for f in files_tot:
    fn = f.split('/')[-1]
    file_names.append(fn)

sort according to timestamp

In [7]:
file_names.sort(key=lambda f: int_extract('[0-9]+(?=)', f))

In [8]:
file_names

['1573895673time_mapLog.txt',
 '1573895673time_mapSeed.txt',
 '1573982073time_mapLog.txt',
 '1574068473time_mapLog.txt',
 '1574102503time_mapLog.txt',
 '1574102503time_mapSeed.txt',
 '1574151679time_mapLog.txt',
 '1574238079time_mapLog.txt',
 '1574324479time_mapLog.txt',
 '1574410879time_mapLog.txt',
 '1574497279time_mapLog.txt',
 '1574552311time_mapLog.txt',
 '1574638711time_mapLog.txt',
 '1574725111time_mapLog.txt',
 '1574749280time_mapLog.txt',
 '1574835680time_mapLog.txt',
 '1574848832time_mapLog.txt',
 '1574935232time_mapLog.txt',
 '1575021632time_mapLog.txt',
 '1575108032time_mapLog.txt',
 '1575194432time_mapLog.txt',
 '1575280833time_mapLog.txt',
 '1575367233time_mapLog.txt',
 '1575453633time_mapLog.txt',
 '1575540033time_mapLog.txt',
 '1575626433time_mapLog.txt',
 '1575693816time_mapLog.txt',
 '1575780216time_mapLog.txt',
 '1575866616time_mapLog.txt',
 '1575953016time_mapLog.txt',
 '1576038671time_mapLog.txt',
 '1576038671time_mapSeed.txt',
 '1576125071time_mapLog.txt',
 '15762

create a dictionary for mapSeed -- mapLog

In [9]:
map_seeds = [int_extract('[0-9]+(?=)', fn) for fn in file_names if 'mapSeed' in fn]
map_seeds

[1573895673,
 1574102503,
 1576038671,
 1578345720,
 1578354747,
 1579713519,
 1580144896,
 1581985139,
 1583642903,
 1584061484,
 1585440511,
 1585512770,
 1585603481,
 1587166656]

In [10]:
file_dict = {}
for fn in file_names:
    timestamp = int_extract('[0-9]+(?=)', fn)
    if timestamp in map_seeds:
        map_start = timestamp
        file_dict[map_start] = [timestamp]
    else:
        file_dict[map_start].append(timestamp)

In [11]:
file_dict

{1573895673: [1573895673, 1573982073, 1574068473],
 1574102503: [1574102503,
  1574151679,
  1574238079,
  1574324479,
  1574410879,
  1574497279,
  1574552311,
  1574638711,
  1574725111,
  1574749280,
  1574835680,
  1574848832,
  1574935232,
  1575021632,
  1575108032,
  1575194432,
  1575280833,
  1575367233,
  1575453633,
  1575540033,
  1575626433,
  1575693816,
  1575780216,
  1575866616,
  1575953016],
 1576038671: [1576038671,
  1576125071,
  1576211471,
  1576295889,
  1576303914,
  1576372684,
  1576437742,
  1576454784,
  1576541127,
  1576627527,
  1576713927,
  1576800327,
  1576886727,
  1576905906,
  1576992306,
  1577078706,
  1577165106,
  1577251506,
  1577337906,
  1577424306,
  1577510707,
  1577597107,
  1577683507,
  1577769907,
  1577856307,
  1577942707,
  1578029107,
  1578101918,
  1578188318,
  1578274718],
 1578345720: [1578345720],
 1578354747: [1578354747,
  1578441147,
  1578527547,
  1578610753,
  1578697153,
  1578783553,
  1578869953,
  1578956353,
  

### test: only look at the first slice

In [12]:
subset = file_dict[list(file_dict.keys())[0]]
subset

[1573895673, 1573982073, 1574068473]

In [13]:
start = pd.read_csv(baseDir + str(subset[0]) + 'time_mapLog.txt')
time0 = float(start.columns[0].split(": ")[1])
print(time0)

1573895672.99


In [14]:
col = ['time','locX','locY','obj','playerID']
data = pd.DataFrame(columns = col)
for i in subset:
    mydf = pd.read_csv(baseDir + str(i) + 'time_mapLog.txt')
    start_time = float(mydf.columns[0].split(": ")[1])
    mydf[['time','locX','locY','obj','playerID']] = mydf[mydf.columns[0]].str.split(" ", expand=True)
    mydf = mydf.dropna()
    mydf['time'] = mydf['time'].astype(float) + start_time - time0
    mydf['playerID'] = mydf['playerID'].astype(int)
    print(len(mydf))
    mydf = mydf[col]
    data = data.append(mydf, ignore_index = True)

1015275
1414424
425470


In [15]:
data.head(n = 10)

Unnamed: 0,time,locX,locY,obj,playerID
0,82.36,-5123,-1403,74,-1
1,82.36,-5139,-1398,2919,-1
2,82.36,-5138,-1398,2917,-1
3,82.36,-5137,-1398,198,-1
4,82.36,-5136,-1398,2099,-1
5,82.36,-5136,-1397,2884,-1
6,82.37,-5137,-1383,198,-1
7,122.31,-5137,-1395,0,2276905
8,125.06,-5138,-1394,0,2276905
9,127.21,-5138,-1395,0,-1


## Approach 1: focus on location change

### load family data and write a find_family function

In [16]:
fam= pd.read_csv('../2_demographics/outputs/family_playerID.tsv', sep = '\t', index_col = 0)
fam.head(n=10)

  mask |= (ar1 == a)


Unnamed: 0,playerID,family
0,3080084,time-1592284232_eve-3080067_name-PICKLE
1,3080114,time-1592284232_eve-3080067_name-PICKLE
2,3080111,time-1592284232_eve-3080067_name-PICKLE
3,3080108,time-1592284232_eve-3080067_name-PICKLE
4,3080104,time-1592284232_eve-3080067_name-PICKLE
5,3080130,time-1592284232_eve-3080067_name-PICKLE
6,3080122,time-1592284232_eve-3080067_name-PICKLE
7,3080117,time-1592284232_eve-3080067_name-PICKLE
8,3080067,time-1592284232_eve-3080067_name-PICKLE
9,3080044,time-1592283401_eve-3080044_name-KORE


In [17]:
def find_fam(playerId):
    
    family = fam.loc[fam['playerID'] == playerId,'family'].tolist()
    if len(family):    
        fam_name = family[0].split('-')[-1]
    else:
        fam_name = "UnKnown"
    
    return fam_name

check if every player has a last name (No!) and find the number of players in a family

In [18]:
fam_dict = {}
for i in data['playerID'].unique():
    if (i != -1) and (i != 0):
        family = fam.query('playerID == @i')['family'].values
        if len(family):
            family_name = family[0].split('-')[-1]
            if family_name in fam_dict.keys():
                fam_dict[family_name] = fam_dict[family_name]+1
            else:
                fam_dict[family_name] = 1


### load transition df

In [19]:
adj_df = pd.read_csv('../3_technology/tech_outputs/adj.csv')
adj_df.head()

Unnamed: 0,ingredient1,ingredient2,product
0,706,707,717
1,706,703,718
2,100,0,96
3,2174,4349,2175
4,1323,4348,1328


### load object depth data and write a find_depth function

In [20]:
depth = pd.read_csv('../3_technology/tech_outputs/num_unique_ingredients.csv')
depth.head()

Unnamed: 0,id,name,num_ingredients
0,11,Skin Tone A &B &C &D &E &F,0
1,19,Female001 D,0
2,30,Wild Gooseberry Bush,0
3,31,Gooseberry,1
4,32,Big Hard Rock,0


In [21]:
d = dict([(i,a) for i,a in zip(depth.id, depth.num_ingredients)])
d[0] = 0

In [22]:
def find_depth(obj):
    try:
        return d[obj]
    except:
        return None

### load expertise data (Is expertise correlated with innovation?)

In [23]:
expertise = pd.read_csv('../data/outputs/player_expertise.tsv', sep = '\t')
expertise = expertise.query('era == "boundless"').reset_index(drop = True)
expertise.head()

Unnamed: 0,era,timestamp,playerID,hash,age,n_life,gametime
0,boundless,1584163133,2783339.0,0002e5ea5ce7cfd761135d255a245a3344af4377,60.0,0,60.0
1,boundless,1584164596,2783430.0,0002e5ea5ce7cfd761135d255a245a3344af4377,23.77,1,83.77
2,boundless,1584199272,2784552.0,0002e5ea5ce7cfd761135d255a245a3344af4377,29.9,2,113.67
3,boundless,1584207170,2784982.0,0002e5ea5ce7cfd761135d255a245a3344af4377,1.43,3,115.1
4,boundless,1584207184,2784989.0,0002e5ea5ce7cfd761135d255a245a3344af4377,0.09,4,115.19


In [24]:
data = data.iloc[0:10000]

In [25]:
data['family'] = data['playerID'].apply(find_fam)

In [26]:
data['objID'] = data['obj'].apply(lambda x: int_extract('[0-9]+',x))

In [27]:
data.head(n=10)

Unnamed: 0,time,locX,locY,obj,playerID,family,objID
0,82.36,-5123,-1403,74,-1,UnKnown,74
1,82.36,-5139,-1398,2919,-1,UnKnown,2919
2,82.36,-5138,-1398,2917,-1,UnKnown,2917
3,82.36,-5137,-1398,198,-1,UnKnown,198
4,82.36,-5136,-1398,2099,-1,UnKnown,2099
5,82.36,-5136,-1397,2884,-1,UnKnown,2884
6,82.37,-5137,-1383,198,-1,UnKnown,198
7,122.31,-5137,-1395,0,2276905,ZABICKI,0
8,125.06,-5138,-1394,0,2276905,ZABICKI,0
9,127.21,-5138,-1395,0,-1,UnKnown,0


In [54]:
fam_subset = data.query('family == "ZABICKI"')
fam_innovs = fam_subset.groupby(['objID'])['playerID'].apply(lambda x: x.tolist()[0]).to_frame().reset_index()
fam_innovs['depth'] = fam_innovs['objID'].apply(find_depth)
fam_innovs.head()

Unnamed: 0,objID,playerID,depth
0,0,2276905,0.0
1,30,2276906,0.0
2,31,2276914,1.0
3,33,2276930,0.0
4,34,2276909,2.0


In [55]:
len(fam_innovs)

309

In [56]:
fam_innovs = fam_innovs.loc[fam_innovs.depth > 0]
fam_innovs = pd.merge(fam_innovs, parsed, on = ['objID','playerID'], how = 'inner')
fam_innovs = fam_innovs.sort_values(by = 'depth', ascending = False).reset_index(drop = True)
fam_innovs.head()

Unnamed: 0,objID,playerID,depth,time,locX,locY,prev_objID,prev_playerID
0,2669,2276914,378.0,556.16,-5120,-1402,2665.0,2276914.0
1,2669,2276914,378.0,554.15,-5120,-1402,2665.0,2276914.0
2,2665,2276914,376.0,555.31,-5120,-1402,2669.0,2276914.0
3,2665,2276914,376.0,552.93,-5120,-1402,,
4,2390,2276909,334.0,1800.32,-5120,-1400,,


In [57]:
len(fam_innovs)

1003

In [40]:
fam = 'ZABICKI'
innovs_to_be_removed = pd.DataFrame(columns = ['fam', 'objID'])
for i,j in fam_innovs.iterrows():
    objID = j['objID']
    playerID = j['playerID']
    time = j['time']
    obj_data = parsed.query('(playerID == @playerID) and (time < @time)')
    prev = obj_data['prev_objID'].values
    if objID in prev:
#         print(objID)
        innovs_to_be_removed = innovs_to_be_removed.append({'fam':fam, 'objID':objID}, ignore_index = True)
        
    

In [43]:
obj_data

Unnamed: 0,time,locX,locY,objID,playerID,prev_objID,prev_playerID
637,682.46,-5105,-1401,391,2276914,,
627,674.25,-5106,-1397,602,2276914,,
635,681.22,-5106,-1400,235,2276914,,
478,617.7,-5107,-1401,1135,2276914,,
416,585.98,-5108,-1400,31,2276914,,
456,610.03,-5108,-1400,235,2276914,0.0,-1.0
460,611.86,-5108,-1400,253,2276914,235.0,2276914.0
469,614.41,-5108,-1400,31,2276914,0.0,-1.0
497,623.25,-5108,-1400,0,2276914,31.0,2276914.0
476,617.16,-5108,-1401,253,2276914,,


In [41]:
innovs_to_be_removed

Unnamed: 0,fam,objID,orig_fam
0,ZABICKI,2669,
1,ZABICKI,2665,
2,ZABICKI,2390,
3,ZABICKI,2881,
4,ZABICKI,2881,
5,ZABICKI,2882,
6,ZABICKI,2878,
7,ZABICKI,2878,
8,ZABICKI,3161,
9,ZABICKI,3161,


In [36]:
grouped = fam_innovs.groupby(['playerID'])['objID'].count().to_frame().reset_index()
grouped.head()

Unnamed: 0,playerID,objID
0,2276905,101
1,2276906,19
2,2276909,49
3,2276914,55
4,2276922,9


In [22]:
innovs = data.groupby(['objID'])['playerID'].apply(lambda x: x.tolist()[0]).to_frame().reset_index()

In [None]:
fam_innovs.query()

In [23]:
innovs.head(n = 10)

Unnamed: 0,objID,playerID
0,0,2276905
1,30,2276906
2,31,2276914
3,32,2276908
4,33,2276911
5,34,2276908
6,35,2276936
7,39,2276911
8,40,2276917
9,45,2276908


## record connection in a dataframe

Sort by location and time

In [29]:
parsed = data.sort_values(by = ['locX','locY','time']).copy()
parsed.head()

Unnamed: 0,time,locX,locY,obj,playerID,family,objID
39,225.39,-3,4,87,2276907,UnKnown,87
5404,2264.32,-4994,-1296,3161,2276905,ZABICKI,3161
5407,2265.92,-4994,-1296,0,2276905,ZABICKI,0
5411,2267.17,-4994,-1297,3161,2276905,ZABICKI,3161
5414,2269.82,-4994,-1297,0,2276905,ZABICKI,0


tag previous player and object at this location

In [30]:
parsed['same_loc'] = parsed.locY.eq(parsed.locY.shift())

In [34]:
parsed['objID'] = parsed['obj'].apply(lambda x: int_extract('[0-9]+',x))

In [35]:
parsed['all_prev_obj'] = parsed.objID.shift()
parsed['prev_objID'] = parsed[parsed['same_loc']==True]['all_prev_obj']
parsed['all_prev_playerID'] = parsed.playerID.shift()
parsed['prev_playerID'] = parsed[parsed['same_loc']==True]['all_prev_playerID']

In [36]:
parsed = parsed[['time','locX','locY','objID','playerID','prev_objID','prev_playerID']]
parsed.head(n = 10)

Unnamed: 0,time,locX,locY,objID,playerID,prev_objID,prev_playerID
39,225.39,-3,4,87,2276907,,
5404,2264.32,-4994,-1296,3161,2276905,,
5407,2265.92,-4994,-1296,0,2276905,3161.0,2276905.0
5411,2267.17,-4994,-1297,3161,2276905,,
5414,2269.82,-4994,-1297,0,2276905,3161.0,2276905.0
5457,2281.49,-5006,-1322,3161,2276905,,
5469,2283.87,-5006,-1322,0,2276905,3161.0,2276905.0
5484,2286.76,-5015,-1329,647,2276905,,
5680,2316.6,-5019,-1359,382,2276905,,
5661,2313.0,-5020,-1359,0,-1,382.0,2276905.0


take data where an object at a location was changed by a different player

In [1151]:
parsednew = parsed.loc[(parsed.obj != parsed.prev_obj) & (parsed.prev_obj != "0") \
                       & (parsed.prev_playerID != -1) & (parsed.playerID != -1)]
parsednew = parsednew.sort_values(by = ['time'])

In [1154]:
parsednew.head(n = 10)

Unnamed: 0,time,locX,locY,obj,objID,playerID,prev_obj,prev_playerID,later_obj,later_playerID
7,122.31,-5137,-1395,0,0,2276905,,,,
8,125.06,-5138,-1394,0,0,2276905,,,,
11,129.09,-5135,-1396,134,134,2276905,,,,
12,130.33,-5136,-1395,0,0,2276905,,,,
14,145.79,-7577,-352,0,0,2276908,,,,
15,147.25,-5136,-1386,0,0,2276905,,,,
16,149.37,-5138,-1386,0,0,2276905,,,,
17,149.88,-7577,-349,32,32,2276908,,,,
18,151.23,-5137,-1386,0,0,2276905,,,,
19,152.21,-7581,-351,150,150,2276908,,,,


In [1180]:
def extract_objID(obj):
    try:
        objid = int_extract('[0-9]+',obj)
        return objid
    except:
        return np.nan

In [1184]:
parsednew['prev_objID'] = parsednew['prev_obj'].apply(extract_objID)
parsednew['same_obj'] = parsednew.objID.eq(parsednew.prev_objID)
# parsednew = parsednew[['time','locX','locY','obj','playerID','prev_obj','prev_playerID','same_obj']]
parsednew.head(n=20)

Unnamed: 0,time,locX,locY,obj,objID,playerID,prev_obj,prev_playerID,later_obj,later_playerID,prev_objID,same_obj
7,122.31,-5137,-1395,0,0,2276905,,,,,,False
8,125.06,-5138,-1394,0,0,2276905,,,,,,False
11,129.09,-5135,-1396,134,134,2276905,,,,,,False
12,130.33,-5136,-1395,0,0,2276905,,,,,,False
14,145.79,-7577,-352,0,0,2276908,,,,,,False
15,147.25,-5136,-1386,0,0,2276905,,,,,,False
16,149.37,-5138,-1386,0,0,2276905,,,,,,False
17,149.88,-7577,-349,32,32,2276908,,,,,,False
18,151.23,-5137,-1386,0,0,2276905,,,,,,False
19,152.21,-7581,-351,150,150,2276908,,,,,,False


In [1185]:
len(parsednew)

1062897

In [492]:
duration = max(data.time) - min(data.time)
duration

206688.01999998093

### parse objects

how many unique objects are there?

In [222]:
all_objs = data['obj'].unique()
len(all_objs)

3135

In [223]:
all_objID = []
for i in all_objs:
    if type(i) is str:
        all_objID.append(int_extract('[0-9]+',i))
all_objID = list(set(all_objID))

In [224]:
len(all_objID)

2337

## focus on innovations

create an innovation df

In [1157]:
innovations = parsednew.query('(same_obj == False) and (obj != "0")')
innovations.head()

Unnamed: 0,time,locX,locY,obj,objID,playerID,prev_obj,prev_playerID,later_obj,later_playerID,prev_objID,same_obj
11,129.09,-5135,-1396,134,134,2276905,,,,,0,False
17,149.88,-7577,-349,32,32,2276908,,,,,0,False
19,152.21,-7581,-351,150,150,2276908,,,,,0,False
20,169.34,-7565,-337,224,224,2276908,,,,,0,False
21,171.74,-7570,-335,224,224,2276908,,,,,0,False


filter out strange objects FOR NOW

In [225]:
normal_objs = [i for i in all_objID if i<4000]
len(normal_objs)

1754

In [1186]:
innovations = innovations.query('(objID in @normal_objs) and (prev_objID in @normal_objs)')
len(innovations)

133381

make sure the new object has a greater depth than the previous one

In [1187]:
innovations['obj_depth'] = innovations['objID'].apply(find_depth)
innovations['prev_obj_depth'] = innovations['prev_objID'].apply(find_depth)
innovations['deeper'] = innovations.obj_depth>innovations.prev_obj_depth
innovations = innovations.query('deeper == True')
innovations.head()

Unnamed: 0,time,locX,locY,obj,objID,playerID,prev_obj,prev_playerID,later_obj,later_playerID,prev_objID,same_obj,obj_depth,prev_obj_depth,deeper
11,129.09,-5135,-1396,134,134,2276905,,,,,0,False,6,0,True
19,152.21,-7581,-351,150,150,2276908,,,,,0,False,4,0,True
20,169.34,-7565,-337,224,224,2276908,,,,,0,False,4,0,True
21,171.74,-7570,-335,224,224,2276908,,,,,0,False,4,0,True
22,172.11,-5125,-1393,2873u3,2873,2276905,,,,,0,False,189,0,True


In [1188]:
len(innovations)

133381

## Scratch: trying to get full information regarding what the player has in hand

load transition data

In [884]:
trans_df = pd.read_csv('../3_technology/tech_outputs/transition.csv', index_col = [0])
trans_df = trans_df.replace({4348:-1, 4349:-2}).reset_index(drop = True)
trans_df.head()

Unnamed: 0,origActor,origTarget,newActor,newTarget
0,-1,2574,0.0,2578.0
1,0,702,425.0,695.0
2,314,235,0.0,317.0
3,2165,2165,235.0,3699.0
4,0,1692,1719.0,1706.0


In [892]:
l = adj_df[['ingredient1','ingredient2']].values
len(l)

3258

In [886]:
kept = []
for i,j in trans_df.iterrows():
    if list(set([j['origActor'], j['origTarget']])) in l:
        kept.append(i)
        

In [902]:
adj_newdf = trans_df.iloc[kept]
adj_newdf.head()

Unnamed: 0,origActor,origTarget,newActor,newTarget
0,-1,2574,0.0,2578.0
2,314,235,0.0,317.0
3,2165,2165,235.0,3699.0
6,-1,2206,0.0,2208.0
7,455,504,0.0,811.0


In [890]:
len(adj_newdf)

5571

### test: look at an individual player

In [900]:
player_data = parsed.query('playerID == 2276929').sort_values(by = 'time').reset_index(drop = True)
player_data

Unnamed: 0,time,locX,locY,obj,playerID,prev_obj,prev_playerID
0,1351.99,-5133,-1403,0,2276929,183,2276914.0
1,1354.44,-5131,-1402,183,2276929,,
2,1359.6,-5131,-1402,0,2276929,183,2276929.0
3,1359.98,-5131,-1402,183,2276929,0,2276929.0
4,1360.41,-5131,-1402,0,2276929,183,2276929.0
5,1360.77,-5131,-1402,183,2276929,0,2276929.0
6,1366.58,-5131,-1402,0,2276929,183,2276929.0
7,1368.66,-5129,-1401,183,2276929,0,-1.0
8,1369.17,-5131,-1401,255,2276929,0,2276922.0
9,1375.0,-5129,-1401,0,2276929,183,2276929.0


In [920]:
actors = []
for i,j in player_data.iterrows():
    
    obj = int_extract('[0-9]+', j['obj'])
    prev_obj = j['prev_obj']
    possible_actors = adj_newdf.query('newTarget == @obj')['origActor'].values
    possible_actors = [i for i in possible_actors if i>=0]
    
    if (obj == 0):
        actors = np.append(actors, prev_obj)
        
    elif (prev_obj == "0"):
        actors = np.append(actors, obj)
        
    elif (len(possible_actors)>0):
#         actors_from_prev = adj_newdf.query('product == @prev_obj')['origActor']
#         actor = [i for i in possible_actors if i in actors_from_prev]
        actor = possible_actors[np.argmin([find_depth(i) for i in possible_actors])]
        if find_depth(actor)>0:
            actors = np.append(actors, actor)
        else:
            actors = np.append(actors, 0)
            
    else:
        actors = np.append(actors, 0)

In [919]:
player_data['hold'] = actors
player_data

Unnamed: 0,time,locX,locY,obj,playerID,prev_obj,prev_playerID,hold
0,1351.99,-5133,-1403,0,2276929,183,2276914.0,183.0
1,1354.44,-5131,-1402,183,2276929,,,0.0
2,1359.6,-5131,-1402,0,2276929,183,2276929.0,183.0
3,1359.98,-5131,-1402,183,2276929,0,2276929.0,183.0
4,1360.41,-5131,-1402,0,2276929,183,2276929.0,183.0
5,1360.77,-5131,-1402,183,2276929,0,2276929.0,183.0
6,1366.58,-5131,-1402,0,2276929,183,2276929.0,183.0
7,1368.66,-5129,-1401,183,2276929,0,-1.0,183.0
8,1369.17,-5131,-1401,255,2276929,0,2276922.0,255.0
9,1375.0,-5129,-1401,0,2276929,183,2276929.0,183.0


In [789]:
player_data.query('(obj == "2575")')

Unnamed: 0,time,locX,locY,obj,playerID,prev_obj,prev_playerID,hold


In [722]:
parsed.query('(locX == "-5123") and (locY == "-1412")')

Unnamed: 0,time,locX,locY,obj,playerID,prev_obj,prev_playerID
1902,1201.01,-5123,-1412,2575,2276905,,


In [760]:
adj_newdf.query('origActor == 3052')

Unnamed: 0,origActor,origTarget,product


In [830]:
depth.query('id == 255')

Unnamed: 0,id,name,num_ingredients
193,255,Bowl of Minced Rabbit,70


## back to innovations

make sure the innovation is used by a differnt player (avoid intermediate products being counted)

In [1190]:
useful_innovations = innovations.loc[innovations.later_playerID != innovations.playerID].copy()
useful_innovations = useful_innovations[['time','locX','locY','obj','playerID','prev_obj',\
                                         'prev_playerID','later_obj','later_playerID','objID','obj_depth']]
useful_innovations.head(n=10)

Unnamed: 0,time,locX,locY,obj,playerID,prev_obj,prev_playerID,later_obj,later_playerID,objID,obj_depth
11,129.09,-5135,-1396,134,2276905,,,,,134,6
19,152.21,-7581,-351,150,2276908,,,,,150,4
20,169.34,-7565,-337,224,2276908,,,,,224,4
21,171.74,-7570,-335,224,2276908,,,,,224,4
22,172.11,-5125,-1393,2873u3,2276905,,,,,2873,189
23,173.59,-7566,-336,34,2276908,,,,,34,2
24,175.11,-5123,-1392,2742u3,2276905,,,,,2742,55
30,182.68,-5121,-1392,2861,2276905,,,,,2861,174
32,186.68,-7557,-359,45,2276908,,,,,45,1
34,201.17,-7569,-335,66,2276908,,,,,66,1


In [1191]:
len(useful_innovations)

133381

In [1192]:
def find_num(objs):
    try:
        return len(set(objs))
    except:
        return 0

count the number of innovations per player (defined as depth(present_obj) > depth(previous_obj))

In [1193]:
creator_df = useful_innovations.groupby(['playerID'])['objID'].apply(list).to_frame().reset_index()
creator_df['innovations'] = creator_df['objID'].apply(lambda x: len(set(x)))
creator_df = creator_df.sort_values(by = 'innovations', ascending = False)
creator_df['playerID'] = creator_df['playerID'].astype(int)
creator_df.head()

Unnamed: 0,playerID,objID,innovations
1414,2279373,"[48, 64, 64, 2882, 345, 1317, 593, 72, 72, 72,...",87
1209,2279012,"[391, 1217, 1217, 2742, 1217, 1217, 1108, 3179...",71
1550,2279579,"[123, 123, 123, 34, 292, 141, 292, 141, 150, 2...",62
1927,2281733,"[1262, 395, 395, 141, 1262, 141, 141, 1262, 14...",60
3499,2284428,"[55, 57, 220, 57, 55, 58, 58, 54, 57, 55, 59, ...",60


In [1194]:
first_loc = parsed.loc[(parsed.prev_playerID.isnull()) & (parsed.playerID != -1) & (parsed.obj != "0")].copy()
first_loc.head()

Unnamed: 0,time,locX,locY,obj,objID,playerID,prev_obj,prev_playerID,later_obj,later_playerID
952037,49726.09,-10000,-1042,2144,2144,2279625,,,,
1049318,91043.7,-10000,-1101,32,32,2281593,,,,
2258681,159274.73,-10000,-220,34,34,2285145,,,,
2251001,158631.87,-10000,-221,167,167,2285080,,,,
2289688,161759.75,-10000,-255,64,64,2285145,,,,


In [1195]:
len(first_loc.playerID.unique())

5049

In [1196]:
# first_loc = first_loc.sort_values(by = 'playerID')
first_loc_objs = first_loc.groupby('playerID')['objID'].apply(list).to_frame().reset_index()
first_loc_objs.head()

Unnamed: 0,playerID,objID
0,2276905,"[3161, 3161, 3161, 647, 382, 1603, 2140, 3161,..."
1,2276906,"[87, 2919, 2146, 242, 223, 1160, 1242, 30, 396..."
2,2276907,[87]
3,2276908,"[87, 45, 224, 34, 225, 225, 66, 224, 32, 150]"
4,2276909,"[64, 66, 45, 48, 614, 614, 224, 224, 602, 45, ..."


In [1197]:
player_prev_obj = parsed.groupby('playerID')['prev_obj'].apply(list).to_frame().reset_index()
player_prev_obj.head()

Unnamed: 0,playerID,prev_obj
0,-1,"[nan, nan, 807, nan, nan, nan, nan, 57, 344, n..."
1,2276905,"[nan, 3161, nan, 3161, nan, 3161, nan, nan, 0,..."
2,2276906,"[nan, nan, nan, nan, nan, nan, nan, 0, nan, 0,..."
3,2276907,[nan]
4,2276908,"[nan, nan, nan, 224, nan, nan, nan, nan, nan, ..."


In [1198]:
merged_loc = pd.merge(first_loc_objs, player_prev_obj, on = 'playerID', how = 'left')
merged_loc.head()

Unnamed: 0,playerID,objID,prev_obj
0,2276905,"[3161, 3161, 3161, 647, 382, 1603, 2140, 3161,...","[nan, 3161, nan, 3161, nan, 3161, nan, nan, 0,..."
1,2276906,"[87, 2919, 2146, 242, 223, 1160, 1242, 30, 396...","[nan, nan, nan, nan, nan, nan, nan, 0, nan, 0,..."
2,2276907,[87],[nan]
3,2276908,"[87, 45, 224, 34, 225, 225, 66, 224, 32, 150]","[nan, nan, nan, 224, nan, nan, nan, nan, nan, ..."
4,2276909,"[64, 66, 45, 48, 614, 614, 224, 224, 602, 45, ...","[nan, 64, nan, nan, nan, nan, nan, 0, nan, nan..."


In [1199]:
creator_newdf = creator_df.copy()
for i,j in merged_loc.iterrows():
    obj_list = j['objID']
    player = j['playerID']
    prev_list = j['prev_obj']
    objs = [i for i in obj_list if i not in prev_list]
    if player in creator_newdf.playerID.values:
        old_list = creator_newdf.loc[creator_newdf.playerID == player]['objID'].values[0]
        entry = pd.Series([objs + old_list], index = creator_newdf.loc[creator_newdf.playerID == player].index, dtype = 'object')
        creator_newdf.loc[creator_newdf.playerID == player, 'newobjID'] = entry         
    else: 
        creator_newdf = creator_newdf.append({'playerID':player, 'newobjID':objs}, ignore_index=True)

In [1200]:
creator_newdf['num_innovations'] = creator_newdf['newobjID'].apply(find_num)

In [1201]:
creator_newdf = creator_newdf[['playerID','newobjID','num_innovations']].sort_values(by='num_innovations', ascending=False)


In [1202]:
creator_newdf.head()

Unnamed: 0,playerID,newobjID,num_innovations
0,2279373,"[3161, 2144, 812, 812, 812, 48, 3161, 593, 812...",93
1,2279012,"[1247, 1247, 135, 1113, 292, 334, 1108, 1146, ...",75
3,2281733,"[1157, 471, 1114, 126, 190, 190, 135, 190, 190...",67
8,2276905,"[3161, 3161, 3161, 647, 382, 1603, 2140, 3161,...",66
2,2279579,"[3161, 3161, 3161, 33, 107, 292, 3161, 292, 31...",66


In [1203]:
innovation_expertise_merged = pd.merge(creator_newdf, expertise[['playerID','age','gametime']], on= 'playerID')
innovation_expertise_merged.head(n=20)

Unnamed: 0,playerID,newobjID,num_innovations,age,gametime
0,2279373,"[3161, 2144, 812, 812, 812, 48, 3161, 593, 812...",93,60.0,8190.79
1,2279012,"[1247, 1247, 135, 1113, 292, 334, 1108, 1146, ...",75,60.0,3753.54
2,2281733,"[1157, 471, 1114, 126, 190, 190, 135, 190, 190...",67,60.0,26092.11
3,2276905,"[3161, 3161, 3161, 647, 382, 1603, 2140, 3161,...",66,60.0,9928.12
4,2279579,"[3161, 3161, 3161, 33, 107, 292, 3161, 292, 31...",66,60.0,134739.56
5,2278592,"[34, 39, 292, 33, 2419, 53, 57, 292, 53, 45, 6...",65,52.79,43105.78
6,2285148,"[30, 57, 54, 54, 54, 30, 58, 53, 57, 55, 54, 5...",65,59.63,18576.35
7,2279230,"[1422, 1422, 1422, 42731087, 1422, 1422, 2881,...",65,60.0,8125.17
8,2284428,"[2144, 2144, 152, 806, 152, 34, 3146, 2835, 34...",65,60.0,15738.93
9,2276927,"[198, 3051, 1376, 1314, 225, 225, 225, 225, 22...",63,60.0,33202.2


In [1204]:
innovation_expertise_merged[['playerID','num_innovations','age','gametime']].to_csv('innovations_v2.csv')