# Modeling individual survival
Natalia Vélez, August 2020

The goal of this notebook is to create the inputs for the survival regression.

In [1]:
%matplotlib inline

import os,re,glob,datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('white')
sns.set_context('talk')

## Find families & players included in analysis

Find start of new-style map logs:

In [2]:
seed_file = '../3_technology/outputs/seed_changes.txt'
with open(seed_file, 'r') as seed_handle:
    seeds = seed_handle.read().splitlines()

seeds = [int(s) for s in seeds]
start_t = seeds[0]

print('First seed: %i' % start_t)
print(datetime.datetime.fromtimestamp(start_t))

First seed: 1573895673
2019-11-16 09:14:33


Families included in analysis:

In [3]:
success_df = pd.read_csv('../2_demographics/outputs/family_fitness.tsv', sep='\t')
success_df['fam_start'] = success_df['family'].str.extract('(?<=time-)([0-9]+)')
success_df['fam_start'] = success_df['fam_start'].astype(np.int)
success_df = success_df[success_df['fam_start'] >= start_t]

included_families = success_df['family'].values

print(success_df.shape)
success_df.head()

(4054, 11)


Unnamed: 0,family,sum,count,a,b,beta_mean,beta_var,snr,weighted_size,longest_path,fam_start
92,time-1573895755_eve-2276905_name-ZABICKI,9,17,12,11,0.521739,0.010397,50.181818,8.869565,3,1573895755
93,time-1573895904_eve-2276911_name-ROHRER,1,3,4,5,0.444444,0.024691,18.0,1.333333,2,1573895904
94,time-1573896101_eve-2276917_name-BADA,14,32,17,21,0.447368,0.006339,70.571429,14.315789,5,1573896101
95,time-1573896134_eve-2276919_name-SARINA,33,68,36,38,0.486486,0.003331,146.052632,33.081081,9,1573896134
96,time-1573896495_eve-2276927_name-LISSADE,311,454,314,146,0.682609,0.00047,1452.465753,309.904348,46,1573896495


Load each player's family:

In [4]:
family_df = pd.read_csv('../2_demographics/outputs/family_playerID.tsv', sep='\t', index_col=0)
family_df.head()

  mask |= (ar1 == a)


Unnamed: 0,avatar,family
0,3308592,time-1599460982_eve-3308568_name-SOL
1,3308568,time-1599460982_eve-3308568_name-SOL
2,3308504,time-1599458188_eve-3308504_name-SOL
3,3308439,time-1599456178_eve-3308427_name-MORALES
4,3308507,time-1599456178_eve-3308427_name-MORALES


Prepare lifelogs:

In [5]:
# Add family to lifelogs
all_life = pd.read_csv('../2_demographics/outputs/all_lifelogs_compact.tsv', sep='\t', index_col=0)
life_df = pd.merge(all_life, family_df, on='avatar')

# Include only avatars that:
# - belong to families within the time period studied
# - had died by the time data were accessed 
# - did not disconnect as infants (age > 2)
life_df = life_df[life_df['family'].isin(included_families)]
life_df = life_df[life_df['age'] > 2]
life_df = life_df.dropna(subset=['age'])

# Clean up
life_df = life_df.reset_index(drop=True)
life_df = life_df[['player', 'avatar', 'parent', 'family', 'tBirth', 'tDeath', 'age', 'cause_of_death']]
print(life_df.shape)
life_df.head()

  mask |= (ar1 == a)


(419656, 8)


Unnamed: 0,player,avatar,parent,family,tBirth,tDeath,age,cause_of_death
0,310d12780dbb316a3b50f83adbf9ce76973172bb,3308597,3308571,time-1599447278_eve-3308179_name-MATSU,1599462620,1599464000.0,28.89,hunger
1,de586e48a15f97f09e71355fcc82cea88d29633a,3308595,3308576,time-1599411200_eve-3307135_name-SUN,1599462571,1599465000.0,36.92,hunger
2,3da17539b16d6fe6911aefc57388bae9d1303e22,3308591,3308571,time-1599447278_eve-3308179_name-MATSU,1599462365,1599465000.0,45.82,hunger
3,043719a2996d748e294d308706447d2e6ce30181,3308583,3308537,time-1599454213_eve-3308371_name-LUNA,1599462113,1599463000.0,20.11,hunger
4,49bd234c63fac4073c98cc23dab916528f87e634,3308581,3308545,time-1599447278_eve-3308179_name-MATSU,1599461853,1599465000.0,45.37,hunger


In [6]:
n_players = len(np.unique(life_df['player']))
n_families = len(np.unique(life_df['family']))
n_avatars = len(np.unique(life_df['avatar']))

print('N =\t%i players\n\t%i families\n\t%i avatars' % (n_players, n_families, n_avatars))

N =	23915 players
	4054 families
	419656 avatars


## Player & parent expertise

In [8]:
#expertise_df = pd.read_csv('../2_demographics/outputs/player_expertise.tsv', sep='\t')
expert_df = all_life.copy()
expert_df = expert_df[['player', 'avatar', 'tBirth', 'age']]
expert_df = expert_df.sort_values(['player', 'tBirth']).reset_index(drop=True)
expert_df['n_life'] = expert_df.groupby('player').cumcount()
expert_df['gametime'] = expert_df.groupby('player')['age'].cumsum()
expert_df['gametime'] = expert_df.groupby('player')['gametime'].shift(periods=1, fill_value=0)
expert_df.head()

Unnamed: 0,player,avatar,tBirth,age,n_life,gametime
0,00006debdd4d630a4a5aa395135a70a69e0db01e,630641,1552916631,13.88,0,0.0
1,00006debdd4d630a4a5aa395135a70a69e0db01e,630737,1552917470,4.98,1,13.88
2,00006debdd4d630a4a5aa395135a70a69e0db01e,630799,1552917791,4.68,2,18.86
3,0001394319e5e63bb74219fef647e9ff24507a21,214206,1550132363,33.78,0,0.0
4,0001394319e5e63bb74219fef647e9ff24507a21,214530,1550134720,7.25,1,33.78


Load expertise:

In [9]:
child_expertise = expert_df.copy()
child_expertise = child_expertise[['avatar', 'n_life', 'gametime']]
child_expertise.head()

Unnamed: 0,avatar,n_life,gametime
0,630641,0,0.0
1,630737,1,13.88
2,630799,2,18.86
3,214206,0,0.0
4,214530,1,33.78


Make a copy (to look up parent expertise):

In [10]:
parent_expertise = child_expertise.copy()
parent_expertise = parent_expertise.rename(columns={'avatar': 'parent', 'n_life': 'parent_life', 'gametime': 'parent_gametime'})
parent_expertise.head()

Unnamed: 0,parent,parent_life,parent_gametime
0,630641,0,0.0
1,630737,1,13.88
2,630799,2,18.86
3,214206,0,0.0
4,214530,1,33.78


Merge with lifelogs:

In [21]:
life_expertise = pd.merge(life_df, child_expertise, on='avatar', how='left')
life_expertise = pd.merge(life_expertise, parent_expertise, on='parent', how='left')
life_expertise.head()

Unnamed: 0,player,avatar,parent,family,tBirth,tDeath,age,cause_of_death,n_life,gametime,parent_life,parent_gametime
0,310d12780dbb316a3b50f83adbf9ce76973172bb,3308597,3308571,time-1599447278_eve-3308179_name-MATSU,1599462620,1599464000.0,28.89,hunger,0,0.0,64.0,3179.97
1,de586e48a15f97f09e71355fcc82cea88d29633a,3308595,3308576,time-1599411200_eve-3307135_name-SUN,1599462571,1599465000.0,36.92,hunger,34,887.18,980.0,43618.02
2,3da17539b16d6fe6911aefc57388bae9d1303e22,3308591,3308571,time-1599447278_eve-3308179_name-MATSU,1599462365,1599465000.0,45.82,hunger,245,8579.85,64.0,3179.97
3,043719a2996d748e294d308706447d2e6ce30181,3308583,3308537,time-1599454213_eve-3308371_name-LUNA,1599462113,1599463000.0,20.11,hunger,0,0.0,66.0,1898.64
4,49bd234c63fac4073c98cc23dab916528f87e634,3308581,3308545,time-1599447278_eve-3308179_name-MATSU,1599461853,1599465000.0,45.37,hunger,930,36784.8,143.0,7574.32


Note: Some avatars are missing expertise info. This may be due to an error in logging or in downloading. These are a tiny, tiny fraction of the whole, so we will just drop them for now.

In [22]:
missing_gametime = ~np.isfinite(life_expertise['gametime'])
missing_parent = (life_expertise['parent_gametime'] > 0) & (~np.isfinite(life_expertise['parent_gametime']))

print('%i missing gametime data' % np.sum(missing_gametime))
print('%i non-Eves missing parent gametime' % np.sum(missing_parent))
print('Dropping %i entries for now' % np.sum(missing_gametime | missing_parent))

life_expertise = life_expertise[~missing_gametime & ~missing_parent]
print(life_expertise.shape)

96 missing gametime data
0 non-Eves missing parent gametime
Dropping 96 entries for now
(419560, 12)


## Cumulative viability

In [36]:
life_viability = life_expertise.copy()
life_viability = life_viability.sort_values(by=['family', 'tBirth'])
life_viability = life_viability.reset_index(drop=True)

life_viability['adult'] = (life_viability['age'] >= 14.0)*1
life_viability['adult_count'] = life_viability.groupby('family')['adult'].cumcount()
life_viability['adult_sum'] = life_viability.groupby('family')['adult'].cumsum()
life_viability['adult_sum'] = np.where(life_viability['adult'] == 1, 
                                       life_viability['adult_sum']-1,
                                       life_viability['adult_sum'])
life_viability.head()

Unnamed: 0,player,avatar,parent,family,tBirth,tDeath,age,cause_of_death,n_life,gametime,parent_life,parent_gametime,adult,adult_count,adult_sum
0,6290670c4fe41726873b54d070b869fdcc9e06e7,2276905,-1,time-1573895755_eve-2276905_name-ZABICKI,1573895755,1573899000.0,60.0,oldAge,201,9736.39,,,1,0,0
1,d74e306b2dc466835e85acf5b53a4391e2346857,2276906,2276905,time-1573895755_eve-2276905_name-ZABICKI,1573895772,1573896000.0,10.05,hunger,229,7267.02,201.0,9736.39,0,1,1
2,b0527ef2371111cb11369f4a5fe021c1bdb06ed9,2276909,2276905,time-1573895755_eve-2276905_name-ZABICKI,1573895873,1573899000.0,45.99,murdered,416,15933.89,201.0,9736.39,1,2,1
3,e417361f8e2770a2a5647353af05f6e621be69c0,2276914,2276905,time-1573895755_eve-2276905_name-ZABICKI,1573895976,1573898000.0,41.77,murdered,25,678.56,201.0,9736.39,1,3,2
4,0c7bc0236deae0bb888f63c4f4d94f4ab7cefad1,2276922,2276905,time-1573895755_eve-2276905_name-ZABICKI,1573896361,1573897000.0,9.73,hunger,45,1931.45,201.0,9736.39,0,4,3


## Cumulative repertoire size