# Upload demographic data to database
Natalia Vélez, April 2021

In [1]:
import pandas as pd
import pymongo

Connect to database:

In [2]:
# Connect:
keyfile = '../6_database/credentials.key'
creds = open(keyfile, "r").read().splitlines()
myclient = pymongo.MongoClient('134.76.24.75', username=creds[0], password=creds[1], authSource='ohol') 
db = myclient.ohol

print(db)
print(db.list_collection_names())

Database(MongoClient(host=['134.76.24.75:27017'], document_class=dict, tz_aware=False, connect=True, authsource='ohol'), 'ohol')
['tfidf_matrix.files', 'maplogs', 'item_embeddings', 'tech_tree', 'lifelogs', 'item_links_demo', 'objects', 'expanded_transitions', 'avatar_embeddings', 'tfidf_matrix.chunks', 'transitions', 'activity_matrix.files', 'activity_matrix.chunks', 'cleaned_job_matrix.chunks', 'cleaned_job_matrix.files', 'activity_labels', 'item_links', 'categories']


Load lifelogs:

In [3]:
life_df = pd.read_csv('outputs/all_lifelogs_compact.csv')
print(life_df.shape)
life_df.head()

(3834737, 16)


Unnamed: 0,player,avatar,tBirth,parent,birth,sex,tDeath,death,age,cause_of_death,birthX,birthY,deathX,deathY,first,last
0,79d28bc07c1f45c6602d5d84f506e36a795751dc,4109237,1617695981,4109188,[-16083 279],F,,[],,,-16083,279,,,,
1,3da17539b16d6fe6911aefc57388bae9d1303e22,4109236,1617695903,4109184,[-15482 374],F,,[],,,-15482,374,,,,
2,b14977d2ebf15c9c0d643378aacedb42e8a43757,4109235,1617695831,4109188,[-16095 276],F,,[],,,-16095,276,,,,
3,2da3db708a04acdf8f3e52486e4cfbd820cfb766,4109234,1617695804,4109214,[-16807 79],M,1617696000.0,[-16807 79],0.03,disconnect,-16807,79,-16807.0,79.0,,
4,3da17539b16d6fe6911aefc57388bae9d1303e22,4109233,1617695734,4109211,[-16219 -46],M,1617696000.0,[-16354 -121],2.61,hunger,-16219,-46,-16354.0,-121.0,,


Load families:

In [4]:
family_df = pd.read_csv('outputs/family_playerID.csv')
print(family_df.shape)
family_df.head()

(3834737, 2)


Unnamed: 0,avatar,family
0,4109234,time-1617694919_eve-4109214_name-(missing)
1,4109230,time-1617694919_eve-4109214_name-(missing)
2,4109225,time-1617694919_eve-4109214_name-(missing)
3,4109222,time-1617694919_eve-4109214_name-(missing)
4,4109217,time-1617694919_eve-4109214_name-(missing)


Has anyone not been assigned a family?

In [5]:
assigned_family = family_df.avatar
life_df[~life_df.avatar.isin(assigned_family)]

Unnamed: 0,player,avatar,tBirth,parent,birth,sex,tDeath,death,age,cause_of_death,birthX,birthY,deathX,deathY,first,last


Compute expertise:

In [6]:
expert_df = life_df[['avatar', 'player', 'tBirth', 'age']]
expert_df = expert_df.sort_values(by=['player', 'tBirth'])
expert_df['n_lives'] = expert_df.groupby(['player']).cumcount()
expert_df['gametime'] = expert_df.groupby(['player'])['age'].transform(lambda x: x.cumsum().shift())
expert_df['gametime'] = expert_df['gametime'].fillna(0)
expert_df = expert_df.drop(columns=['tBirth', 'age']).reset_index(drop=True)

expert_df.head(10)

Unnamed: 0,avatar,player,n_lives,gametime
0,630641,00006debdd4d630a4a5aa395135a70a69e0db01e,0,0.0
1,630737,00006debdd4d630a4a5aa395135a70a69e0db01e,1,13.88
2,630799,00006debdd4d630a4a5aa395135a70a69e0db01e,2,18.86
3,214206,0001394319e5e63bb74219fef647e9ff24507a21,0,0.0
4,214530,0001394319e5e63bb74219fef647e9ff24507a21,1,33.78
5,214960,0001394319e5e63bb74219fef647e9ff24507a21,2,41.03
6,215019,0001394319e5e63bb74219fef647e9ff24507a21,3,47.76
7,2783339,0002e5ea5ce7cfd761135d255a245a3344af4377,0,0.0
8,2783430,0002e5ea5ce7cfd761135d255a245a3344af4377,1,60.0
9,2784552,0002e5ea5ce7cfd761135d255a245a3344af4377,2,83.77


Merge:

In [8]:
merged_data = pd.merge(life_df, family_df, on='avatar')
merged_data = pd.merge(merged_data, expert_df, on=['avatar', 'player'])

print('Before merge: %s' % str(life_df.shape))
print('After merge: %s' % str(merged_data.shape))
merged_data = merged_data.drop(columns=['birth','death'])
merged_data = merged_data.to_dict('records')
print(merged_data[0])

Before merge: (3834737, 16)
After merge: (3834737, 19)
{'player': '79d28bc07c1f45c6602d5d84f506e36a795751dc', 'avatar': 4109237, 'tBirth': 1617695981, 'parent': 4109188, 'sex': 'F', 'tDeath': nan, 'age': nan, 'cause_of_death': nan, 'birthX': -16083, 'birthY': 279, 'deathX': nan, 'deathY': nan, 'first': nan, 'last': nan, 'family': 'time-1617679935_eve-4108925_name-(missing)', 'n_lives': 427, 'gametime': 21694.880000000005}


Upload merged dataframe to database:

In [9]:
life_col = db.lifelogs
life_col.insert_many(merged_data)

<pymongo.results.InsertManyResult at 0x2b6727d60048>