# Check for players with missing families
Grace Deng & Natalia Vélez, August 2020

In [1]:
import pandas as pd
import numpy as np

## Step 1: Reproduce the error

In [2]:
i = 1573982073
baseDir = '../data/publicMapChangeData/bigserver2.onehouronelife.com/'

In [3]:
mydf = pd.read_csv(baseDir + str(i) + 'time_mapLog.txt')
mydf[['time','locX','locY','obj','playerID']] = mydf[mydf.columns[0]].str.split(" ", expand=True)
mydf = mydf[['time','locX','locY','obj','playerID']]
mydf['playerID'] = mydf['playerID'].astype(int)
mydf.head()

Unnamed: 0,time,locX,locY,obj,playerID
0,0.0,-5998,81,0,2281395
1,0.0,-7918,-1039,0,2281463
2,0.21,-6004,94,577,2281440
3,0.24,-5991,85,0,2281459
4,0.29,-9753,-639,292,2281355


In [4]:
fam= pd.read_csv('../2_demographics/outputs/family_playerID.tsv', sep = '\t', index_col = 0)
fam.head(n=10)

  mask |= (ar1 == a)


Unnamed: 0,playerID,family
0,3080084,time-1592284232_eve-3080067_name-PICKLE
1,3080114,time-1592284232_eve-3080067_name-PICKLE
2,3080111,time-1592284232_eve-3080067_name-PICKLE
3,3080108,time-1592284232_eve-3080067_name-PICKLE
4,3080104,time-1592284232_eve-3080067_name-PICKLE
5,3080130,time-1592284232_eve-3080067_name-PICKLE
6,3080122,time-1592284232_eve-3080067_name-PICKLE
7,3080117,time-1592284232_eve-3080067_name-PICKLE
8,3080067,time-1592284232_eve-3080067_name-PICKLE
9,3080044,time-1592283401_eve-3080044_name-KORE


In [5]:
fam.query('playerID == 2281395')

Unnamed: 0,playerID,family


In [6]:
def find_fam(playerId):
    
    family = fam.loc[fam['playerID'] == playerId,'family'].tolist()
    if len(family):    
        fam_name = family[0].split('-')[-1]
    else:
        fam_name = "UnKnown"
    
    return fam_name

In [7]:
mydf = mydf.iloc[0:1000]

In [8]:
mydf['fam'] = mydf['playerID'].apply(find_fam)

In [9]:
unknown_fams = mydf.query('fam == "UnKnown"')
unknown_players = unknown_fams['playerID'].values
unknown_fams.head()

Unnamed: 0,time,locX,locY,obj,playerID,fam
0,0.0,-5998,81,0,2281395,UnKnown
1,0.0,-7918,-1039,0,2281463,UnKnown
2,0.21,-6004,94,577,2281440,UnKnown
3,0.24,-5991,85,0,2281459,UnKnown
4,0.29,-9753,-639,292,2281355,UnKnown


## Step 2: Fix

Load lifelog data across all eras:

In [10]:
lifelog_data = pd.read_csv('outputs/all_lifelogs_compact.tsv', sep='\t', index_col=0)
lifelog_data.head()

  mask |= (ar1 == a)


Unnamed: 0,release,era,hash,playerID,tBirth,parent,birth,tDeath,death,age,cause_of_death,birthX,birthY,deathX,deathY,first,last
0,342.0,boundless,2cdc4d0b016f9c0d96d27013f1d19c42596e0c5c,3080130,1592288229,3080111,[-454640 41],1592288538,[-454625 49],5.14,hunger,-454640,41,-454625,49,CLAUDINE,P
1,342.0,boundless,a8b5975a81344f690f45ffc2554a0bc35af557a9,3080128,1592288191,3080105,[-454394 -4],1592291791,[-454383 15],60.0,oldAge,-454394,-4,-454383,15,DACY,LIGHTNING
2,342.0,boundless,c5838da39fd525a2ac24aee049dae6a42e502236,3080125,1592288138,3080101,[-311131 -322],1592291738,[-311112 -325],59.98,hunger,-311131,-322,-311112,-325,SHOKO,GREATHOUSE
3,342.0,boundless,11f9fce50bbc1ebbba5126434aea123e79249942,3080122,1592288029,3080104,[-454638 41],1592288696,[-454647 58],11.12,hunger,-454638,41,-454647,58,RANGER,PICKLE
4,342.0,boundless,b265b1958566b474094cf0282a0fa59e6d622885,3080120,1592287917,3080101,[-311111 -311],1592291365,[-311115 -66],57.46,hunger,-311111,-311,-311115,-66,HAPPY,GREATHOUSE


Search for missing player:

In [11]:
missing_player = 2281395
lifelog_data[lifelog_data['playerID'] == missing_player]

Unnamed: 0,release,era,hash,playerID,tBirth,parent,birth,tDeath,death,age,cause_of_death,birthX,birthY,deathX,deathY,first,last
479372,284.0,boundless,e4fc9f1fc0bc611c05205e7aaaba3a51081374dc,2281395,1573979732,2281332,[-5999 69],1573982412,[-5993 81],44.66,murdered,-5999,69,-5993,81,ROSEALIE,FINCH


In [12]:
qID = missing_player
while True:
    newID = lifelog_data[lifelog_data['playerID'] == qID]['parent'].values[0]
    if newID > 0:
        print('%s <-- %s' % (newID, qID))
        qID = newID
    else:
        break

2281332 <-- 2281395
2281269 <-- 2281332
2281230 <-- 2281269
2281191 <-- 2281230
2281120 <-- 2281191
2281081 <-- 2281120
2281042 <-- 2281081
2280998 <-- 2281042
2280947 <-- 2280998
2280889 <-- 2280947
2280767 <-- 2280889
2280724 <-- 2280767
2280621 <-- 2280724
2280512 <-- 2280621
2280467 <-- 2280512
2280388 <-- 2280467
2280322 <-- 2280388
2280268 <-- 2280322
2280222 <-- 2280268
2280122 <-- 2280222
2280071 <-- 2280122
2279990 <-- 2280071


IndexError: index 0 is out of bounds for axis 0 with size 0

In [13]:
qID

2279990

In [None]:
pd.isna(unknown_lifelogs['last'])

In [None]:
unknown_lifelogs = lifelog_data[np.isin(lifelog_data['playerID'], unknown_players)]
unknown_lifelogs.loc[pd.isna(unknown_lifelogs['last']), 'last'] = 'missing'
unknown_lasts = np.unique(unknown_lifelogs['last'])
print(unknown_lasts)

In [15]:
missing_player = 2279990
lifelog_data[lifelog_data['playerID'] == missing_player]

Unnamed: 0,release,era,hash,playerID,tBirth,parent,birth,tDeath,death,age,cause_of_death,birthX,birthY,deathX,deathY,first,last


Search for player family: