# Tag each player by the depth of the most sophisticated object they have interacted with
    
Created by Grace Deng in September, 2020

In [2]:
%matplotlib inline

import os, re, glob, datetime, json
from os.path import join as opj
import pandas as pd
import numpy as np
import scipy.stats
from datetime import datetime
from tqdm.notebook import tqdm

## load data

In [3]:
baseDir = '../data/publicMapChangeData/bigserver2.onehouronelife.com/'

In [4]:
start = 1573982073

read all mapChange data

In [5]:
str_extract = lambda pattern, s: re.search(pattern, s).group(0)
int_extract = lambda pattern, s: int(str_extract(pattern, s))

In [6]:
files_tot = []
for ts in glob.glob(baseDir + '*'):
    files_tot.append(ts)

In [7]:
file_names = []
for f in files_tot:
    fn = f.split('/')[-1]
    file_names.append(fn)

sort according to timestamp

In [8]:
file_names.sort(key=lambda f: int_extract('[0-9]+(?=)', f))

In [9]:
# file_names

create a dictionary for mapSeed -- mapLog

In [10]:
map_seeds = [int_extract('[0-9]+(?=)', fn) for fn in file_names if 'mapSeed' in fn]
print("Looking at mapSeeds: ", map_seeds)

Looking at mapSeeds:  [1573895673, 1574102503, 1576038671, 1578345720, 1578354747, 1579713519, 1580144896, 1581985139, 1583642903, 1584061484, 1585440511, 1585512770, 1585603481, 1587166656]


In [11]:
file_dict = {}
for fn in file_names:
    timestamp = int_extract('[0-9]+(?=)', fn)
    if timestamp in map_seeds:
        map_start = timestamp
        file_dict[map_start] = [timestamp]
    else:
        file_dict[map_start].append(timestamp)

In [12]:
# file_dict

### test: only look at the first slice

In [13]:
subset = file_dict[list(file_dict.keys())[0]]
print("Now parseing mapChange data: ", subset)

Now parseing mapChange data:  [1573895673, 1573982073, 1574068473]


In [14]:
start = pd.read_csv(baseDir + str(subset[0]) + 'time_mapLog.txt')
time0 = float(start.columns[0].split(": ")[1])

In [15]:
col = ['time','locX','locY','obj','playerID']
data = pd.DataFrame(columns = col)
for i in subset:
    mydf = pd.read_csv(baseDir + str(i) + 'time_mapLog.txt')
    start_time = float(mydf.columns[0].split(": ")[1])
    mydf[['time','locX','locY','obj','playerID']] = mydf[mydf.columns[0]].str.split(" ", expand=True)
    mydf = mydf.dropna()
    mydf['time'] = mydf['time'].astype(float) + start_time - time0
    mydf['playerID'] = mydf['playerID'].astype(int)
    mydf = mydf[col]
    data = data.append(mydf, ignore_index = True)

In [16]:
data.head(n = 10)

Unnamed: 0,time,locX,locY,obj,playerID
0,82.36,-5123,-1403,74,-1
1,82.36,-5139,-1398,2919,-1
2,82.36,-5138,-1398,2917,-1
3,82.36,-5137,-1398,198,-1
4,82.36,-5136,-1398,2099,-1
5,82.36,-5136,-1397,2884,-1
6,82.37,-5137,-1383,198,-1
7,122.31,-5137,-1395,0,2276905
8,125.06,-5138,-1394,0,2276905
9,127.21,-5138,-1395,0,-1


In [18]:
num_players = len(data.playerID.unique())-1
print("how many players: ", num_players)

how many players:  7170


### load depth data

In [19]:
depth = pd.read_csv('../3_technology/tech_outputs/num_unique_ingredients.csv')
depth.head()

Unnamed: 0,id,name,num_ingredients
0,11,Skin Tone A &B &C &D &E &F,0
1,19,Female001 D,0
2,30,Wild Gooseberry Bush,0
3,31,Gooseberry,1
4,32,Big Hard Rock,0


In [20]:
obj_data = data.groupby(['playerID','obj']).count().reset_index()

In [21]:
player_obj_data = obj_data.loc[(obj_data.playerID!=-1) & (obj_data.obj!='0')][['playerID', 'obj']]

In [22]:
player_obj_data['objID'] = player_obj_data['obj'].apply(lambda x: int_extract('[0-9]+', x))

In [23]:
player_obj_data_new = pd.merge(player_obj_data, depth, left_on = 'objID', right_on = 'num_ingredients')

In [24]:
tech_sophist_player = player_obj_data_new.groupby('playerID')['num_ingredients'].apply(max).to_frame().reset_index()

In [25]:
tech_sophist_player

Unnamed: 0,playerID,num_ingredients
0,2276905,406
1,2276906,292
2,2276907,87
3,2276908,244
4,2276909,346
5,2276910,87
6,2276911,297
7,2276912,224
8,2276913,279
9,2276914,357
