In [36]:
import pandas as pd
import numpy as np
from tqdm import tqdm

### Big Server Data only
The bigserver is the only server that records all the data we are interested in for our analyses (lifelogs, map change, etc.), so we will just look at that separately throughout this notebook.

In [2]:
lifelog_data = pd.read_csv('outputs/lifelogs_201907-202005_data.tsv', sep='\t')
lifelog_data.head()

Unnamed: 0.1,Unnamed: 0,release,server,event,timestamp,playerID,hash,uniqueID,age,sex,location,parent,cause_of_death,killer,pop,chain
0,0,254.0,bigserver2,D,1564444828,1842380,583a6de73718f5dd0ddf388f4e68dc060b15e6df,1842380_bigserver2_254.0,1.81,F,"(48, 94)",,hunger,,82,
1,1,254.0,bigserver2,D,1564444830,1842301,520ddb069aa77dcb202dd0310a8852e8fddc58dd,1842301_bigserver2_254.0,8.0,M,"(40, 350)",,hunger,,82,
2,2,254.0,bigserver2,B,1564444832,1842398,583a6de73718f5dd0ddf388f4e68dc060b15e6df,1842398_bigserver2_254.0,,F,"(-325, 209)",1842145.0,,,84,3.0
3,3,254.0,bigserver2,B,1564444836,1842399,520ddb069aa77dcb202dd0310a8852e8fddc58dd,1842399_bigserver2_254.0,,M,"(67, -266)",1842397.0,,,83,2.0
4,4,254.0,bigserver2,D,1564444859,1842311,dfd85ac03c4dd577352484b023d19d521b592696,1842311_bigserver2_254.0,7.76,M,"(-200, -123)",,hunger,,82,


In [3]:
big_df = lifelog_data[lifelog_data['server'] == 'bigserver2'].reset_index(drop=True)
big_df.head()

Unnamed: 0.1,Unnamed: 0,release,server,event,timestamp,playerID,hash,uniqueID,age,sex,location,parent,cause_of_death,killer,pop,chain
0,0,254.0,bigserver2,D,1564444828,1842380,583a6de73718f5dd0ddf388f4e68dc060b15e6df,1842380_bigserver2_254.0,1.81,F,"(48, 94)",,hunger,,82,
1,1,254.0,bigserver2,D,1564444830,1842301,520ddb069aa77dcb202dd0310a8852e8fddc58dd,1842301_bigserver2_254.0,8.0,M,"(40, 350)",,hunger,,82,
2,2,254.0,bigserver2,B,1564444832,1842398,583a6de73718f5dd0ddf388f4e68dc060b15e6df,1842398_bigserver2_254.0,,F,"(-325, 209)",1842145.0,,,84,3.0
3,3,254.0,bigserver2,B,1564444836,1842399,520ddb069aa77dcb202dd0310a8852e8fddc58dd,1842399_bigserver2_254.0,,M,"(67, -266)",1842397.0,,,83,2.0
4,4,254.0,bigserver2,D,1564444859,1842311,dfd85ac03c4dd577352484b023d19d521b592696,1842311_bigserver2_254.0,7.76,M,"(-200, -123)",,hunger,,82,


### Unique player hashes

In [13]:
death_data = lifelog_data[lifelog_data['event'] == 'D'].reset_index(drop=True)
death_data.head()

Unnamed: 0.1,Unnamed: 0,release,server,event,timestamp,playerID,hash,uniqueID,age,sex,location,parent,cause_of_death,killer,pop,chain
0,0,254.0,bigserver2,D,1564444828,1842380,583a6de73718f5dd0ddf388f4e68dc060b15e6df,1842380_bigserver2_254.0,1.81,F,"(48, 94)",,hunger,,82,
1,1,254.0,bigserver2,D,1564444830,1842301,520ddb069aa77dcb202dd0310a8852e8fddc58dd,1842301_bigserver2_254.0,8.0,M,"(40, 350)",,hunger,,82,
2,4,254.0,bigserver2,D,1564444859,1842311,dfd85ac03c4dd577352484b023d19d521b592696,1842311_bigserver2_254.0,7.76,M,"(-200, -123)",,hunger,,82,
3,5,254.0,bigserver2,D,1564444861,1842145,f766576701306189b31a049054c0840b39f84c51,1842145_bigserver2_254.0,18.89,F,"(-322, 242)",,hunger,,82,
4,6,254.0,bigserver2,D,1564444863,1842359,9381dc0dab74b62eee67d3dc275f5c61ea51bcd0,1842359_bigserver2_254.0,4.23,M,"(-308, 258)",,hunger,,82,


In [14]:
big_hash = death_data.groupby('hash')['timestamp'].count().reset_index()
big_hash = big_hash.rename(columns={"hash": "player", "timestamp": "n_lives"})
big_hash.head()

Unnamed: 0,player,n_lives
0,0002e5ea5ce7cfd761135d255a245a3344af4377,397
1,00047b60a01d908e2407b05a09b404c2f6cec6f6,4
2,0004f2a9005cf3562482fe408876808498e1fad5,11
3,000548ed7f04e2ddf81b4016d87d53ddbbd2e89f,45
4,0005eb266ff93d00bb6072d9c07f3a47d4864219,6


In [19]:
death_data_new = death_data[['release','timestamp','hash','playerID','age','cause_of_death']].copy()
death_data_new['n_live'] = np.inf
death_data_new['cumulative_game_time'] = np.inf

In [21]:
death_data_new.head()

Unnamed: 0,release,timestamp,hash,playerID,age,cause_of_death,n_live,cumulative_game_time
0,254.0,1564444828,583a6de73718f5dd0ddf388f4e68dc060b15e6df,1842380,1.81,hunger,inf,inf
1,254.0,1564444830,520ddb069aa77dcb202dd0310a8852e8fddc58dd,1842301,8.0,hunger,inf,inf
2,254.0,1564444859,dfd85ac03c4dd577352484b023d19d521b592696,1842311,7.76,hunger,inf,inf
3,254.0,1564444861,f766576701306189b31a049054c0840b39f84c51,1842145,18.89,hunger,inf,inf
4,254.0,1564444863,9381dc0dab74b62eee67d3dc275f5c61ea51bcd0,1842359,4.23,hunger,inf,inf


In [None]:

death_newdf = pd.DataFrame(['release','timestamp','hash','playerID','age','cause_of_death','n_live','cumulative_game_time'])
for k in tqdm(range(len(death_data.hash.unique()))):
    i = death_data.hash.unique()[k]
    sub_data = death_data.query('hash == @i').copy().reset_index(drop = True)
    sub_data = sub_data[['release','timestamp','hash','playerID','age','cause_of_death']]
    sub_data['cumulative_game_time'] = sub_data.index +1
    sub_data['n_lives'] = big_hash.query('player == @i')['n_lives']
    death_newdf.append(sub_data)
    
    
    

100%|██████████| 28495/28495 [1:41:59<00:00,  4.66it/s]  


In [None]:
death_newdf