In [2]:
import csv,sys, math, random, collections, pprint, json, os, gzip, pickle
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from typing import List, Any
from itertools import groupby
from IPython.display import JSON, display_javascript, display_html, display
from sklearn import tree
from sklearn.naive_bayes import MultinomialNB, CategoricalNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
print(sys.version)
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200

3.8.5 (default, Jan 27 2021, 15:41:15) 
[GCC 9.3.0]


In [2]:
stoplist = {'eventUniqueId', 'timeEpoch', 'timeUTC', 'eventType'}

choice_event_keys = sorted(['CardPlacedInMachine', 'FlagPlaced', 'FlagMoved',
                     'CardPlacedInTray', 'CardSwapped', 
                     'RainbowPlantPlanted', 'PlantPlanted', 'CreatureSpawned'])
choice_nonevent_keys = sorted(['GameStart', 'CreatureStateChanged', 'PlantStateChanged', 'TreasureDespawned',
                        'CurrentScreenChanged', 'CreatureSelected', 'CreatureDeselected',
                        'TreasureSpawned', 'PlantWatered', 'TreasureOpened', 'CreatureDespawned',
                        'AllOrbsFilled', 'MushroomOrbFilled', 'WoodyOrbFilled', 'SeedPacketCountChanged',
                        'LeafyOrbFilled', 'FlagCancelled', 'CreatureLocked', 'CreatureUnlocked'])
progress_keys = sorted(['TreasureOpened', 'PlantWatered', 'RainbowPlantPlanted', 'AllOrbsFilled', 'MushroomOrbFilled',
                 'WoodyOrbFilled', 'LeafyOrbFilled'])
relevant_keys = sorted(choice_event_keys + progress_keys)
creature_keys_p1 = [f"player1Creature{j}" for j in range(1,4)]
creature_keys_p2 = [f"player2Creature{j}" for j in range(1,4)]
creatures = [creature_keys_p1,creature_keys_p2]
creature_stats = ['cardsExecuted','machineType','plantsPlanted','plantsWatered','starsEarned']

event_change_threshold = 30

In [3]:
DEBUG = False

if DEBUG:
    num_datasets = 1
else:
    num_datasets = 5

In [4]:
def diff_elts(a: dict, b: dict) -> dict:
    output = {}
    if 'eventKey' in a:
        output['eventKey'] = a['eventKey']
    for ka in a.keys():
        if ka in b and ka not in stoplist:
            if isinstance(a[ka], dict):
                diff = diff_elts(a[ka], b[ka])
                if diff:
                    output[ka] = diff
            elif a[ka] != b[ka]:
                output[ka] = a[ka]
    if len(output.keys()) > 1:
        return output
    return False


def find_likely_player(datum: dict) -> tuple:
    if datum:
        player1 = any(e for e in datum.keys() if "layer1" in e)
        player2 = any(e for e in datum.keys() if "layer2" in e)
        if player1 or player2:
            return player1, player2
    return False, False


def apply_diff(player, elt):
    current_time = elt['timeEpoch']

    current_event = elt['eventKey']
    if current_event == player[2] or current_event in choice_nonevent_keys:
        return (player[0], current_time, player[2]), ""
    return (current_time, current_time, current_event), f"{player[2]} {player[0]} => {player[1]}"

In [5]:
pklfilename = "./lawrence.pkl"
lawrence_data = []
nysci_data = []

if not os.path.exists(pklfilename):
    filenames = random.sample([file.path for file in os.scandir("./data")], num_datasets)
    print(filenames)

    for filename in filenames:
        with gzip.open(filename, "rb") as f:
            print(f"\nJSON loading {filename}...", end="")
            t_data_list = list(json.load(f).values())
            print("DONE.")
            print(f"Data filtering {filename}...", end="")
            lawrence_data.extend(list(filter(lambda x: "Lawrence" == x["siteName"], t_data_list)))
            nysci_data.extend(list(filter(lambda x: "Lawrence" != x["siteName"], t_data_list)))
            print("DONE.")
        
    print("Sorting...", end="")
    lawrence_data = list(sorted(lawrence_data, key=lambda e: e["timeEpoch"]))
    nysci_data = list(sorted(nysci_data, key=lambda e: e["timeEpoch"]))
    print("DONE.")
    with open(pklfilename,"wb") as f:
        pickle.dump(lawrence_data, f, pickle.HIGHEST_PROTOCOL)
else:
    print("LOADING PKL...",end="")
    with open(pklfilename,"rb") as f:
        lawrence_data = pickle.load(f)
    print("DONE!")

if len(nysci_data) > 0:
    print(f"   NYSCI: {len(nysci_data)}: {nysci_data[0]['timeEpoch']} => {nysci_data[-1]['timeEpoch']}")
else:
    print(f"   NYSCI: NONE")
if len(lawrence_data) > 0:
    print(f"LAWRENCE: {len(lawrence_data)}: {lawrence_data[0]['timeEpoch']} => {lawrence_data[-1]['timeEpoch']}")
else:
    print(f"LAWRENCE: NONE")

LOADING PKL...DONE!
   NYSCI: NONE
LAWRENCE: 957778: 1560964321 => 1576476755


In [10]:
set(map(lambda elt: elt['eventKey'], lawrence_data))

{'AllOrbsFilled',
 'CardPlacedInMachine',
 'CardPlacedInTray',
 'CardSwapped',
 'CreatureDeselected',
 'CreatureDespawned',
 'CreatureLocked',
 'CreatureSelected',
 'CreatureSpawned',
 'CreatureStateChanged',
 'CreatureUnlocked',
 'CurrentScreenChanged',
 'FlagCancelled',
 'FlagMoved',
 'FlagPlaced',
 'GameStart',
 'LeafyOrbFilled',
 'MushroomOrbFilled',
 'PlantPlanted',
 'PlantStateChanged',
 'PlantWatered',
 'RainbowPlantPlanted',
 'SeedPacketCountChanged',
 'TreasureDespawned',
 'TreasureOpened',
 'TreasureSpawned',
 'WoodyOrbFilled'}

In [7]:
def live_players(grouped_elts):
    op1 = op2 = 0
    elt_prior = False
    for elt in grouped_elts:
        if elt_prior:
            elt_diff = diff_elts(elt, elt_prior)
            p1, p2 = find_likely_player(elt_diff)
            if p1: op1 = 1
            if p2: op2 = 1
            if 2 == p1 + p2: return 2
        elt_prior = elt
    return (op1 + op2)

def progress_types(grouped_elts):
    return set([elt['eventKey'] for elt in grouped_elts if elt['eventKey'] in progress_keys])

def action_types(grouped_elts):
    return set([elt['eventKey'] for elt in grouped_elts if elt['eventKey'] in choice_event_keys])

def count_elts(grouped_elts):
    return len([elt['eventKey'] for elt in grouped_elts if elt['eventKey'] in relevant_keys])

def profile_group(grouped_elts):
    output = {}
    output['live_players'] = live_players(grouped_elts)
    output['progress_types'] = progress_types(grouped_elts)
    output['action_types'] = action_types(grouped_elts)
    output['num_elts'] = count_elts(grouped_elts)
    return output

In [8]:
# bucket_size = 120
# flaw = filter(lambda elt: elt['eventKey'] in relevant_keys, lawrence_data)
# gflaw = groupby(flaw, lambda elt: bucket_size * int(elt['timeEpoch'] / bucket_size))
# mgflaw = map(lambda tgrp: (tgrp[0], profile_group(list(tgrp[1]))), gflaw) 
# lmgflaw = list(mgflaw)
# print(random.choice(lmgflaw))
# # print(random.sample([x for x in mgflaw if x[1]['live_players'] > 0 and x[1]['progress_types']],10))


In [9]:
gflaw

NameError: name 'gflaw' is not defined

In [None]:
bucket_idle_split = 30
flaw = filter(lambda elt: elt['eventKey'] in relevant_keys, lawrence_data)

def split_by_idle(evts):
    evt_time = 0
    output = []
    current_bucket = []
    for elt in evts:
        prev_evt_time = evt_time
        evt_time = elt['timeEpoch']
        if (evt_time - prev_evt_time) > bucket_idle_split:
            if len(current_bucket) > 0:
                output.append(tuple([evt_time,current_bucket]))
                current_bucket = []
        current_bucket.append(elt)
    output.append(tuple([evt_time,current_bucket]))
    return output

gflaw = split_by_idle(flaw)
mgflaw = map(lambda tgrp: (tgrp[0], profile_group(list(tgrp[1]))), gflaw) 
lmgflaw = list(mgflaw)
print(random.choice(lmgflaw))
print(len(lmgflaw))

In [None]:
print(random.choice(gflaw)[1])

In [None]:


end = False
for buckets in gflaw:
    if not end:
        bucket = buckets[1]
        for elt in bucket:
            if elt['eventKey'] == "CardPlacedInTray":
                display(elt)
                end = True
    

In [None]:
print(len(gflaw))
end = False
for buckets in gflaw:
    if not end:
        bucket = buckets[1]
        output = []
        for elt in bucket:
            output.append(elt['eventKey'])
        print(",".join(output))
        end = True

In [None]:
to_dummy = lambda header, data : [1 if h in data else 0 for h in header]
# print(to_dummy(choice_event_keys,{'CardPlacedInMachine', 'FlagPlaced'}))


header = ['epoch','live_players','num_elts'] + progress_keys + choice_event_keys 
csvoutfilename = "dummy_coded_lawrence_2021idc.csv"
csvout = open(csvoutfilename,"w")
csvout.write(",".join(header) + "\n")
for profile in lmgflaw:
    output_row = [  profile[0], profile[1]['live_players'],profile[1]['num_elts'] ] + to_dummy(progress_keys,profile[1]['progress_types']) + to_dummy(choice_event_keys,profile[1]['action_types'])
    csvout.write(",".join(map(str,output_row)) + "\n")
csvout.close()

In [None]:
df = pd.read_csv(csvoutfilename)

In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols


df_lm=ols('TreasureOpened ~ live_players + FlagMoved + CreatureSpawned + PlantPlanted + CardPlacedInMachine', data=df[df.num_elts>200]).fit() #Specify C for Categorical
print(sm.stats.anova_lm(df_lm))

In [None]:
import math
df['orb'] = df.LeafyOrbFilled + df.MushroomOrbFilled + df.WoodyOrbFilled

df.orb = df.orb.apply(lambda x : x/3.0).apply(np.ceil).astype('int32')
dflive = df[df.num_elts>20]
dflive.head()

In [None]:
dflive[dflive.columns[1:]].mean()

In [None]:

plt.figure(figsize=(16, 6))
dfcorr = dflive.corr()
sns.heatmap(dfcorr, annot=True, fmt='.1f')
plt.show()

### two players more things happen?
- increase in orbs filled + treasure MORE than increase in action?
    - progress = b1\*x1  + b2\*x2 + ...
- similarity of action correlated to orb-filling?
    - e.g. if both players are using the same actions (as opposed to different actions) is there a higher likelihood of an orb filling event
    - this is interesting because maybe it's syncronicity (in the sting sense) or (opposite) maybe it's roles!
- p(diversity_of_creatures \* complexity_of_creature | live_players)
- ? p(orbsfilled>2 | live players, orbsfilled>1)
- p(appropriate_plant_planted | orb_in_need_of_filling, live_players)

### is there more productive play when there are two players
- is dwell time increased?
- not just "more actions, more better" (MC) but somehow intentional

### todo
- score = f(card diversity, creature diversity, plant diversity) 
    - max the spread
    
### vishesh 2021MARCH4
- creature spawn as an episode
    - how much time spent in flagplaced?
    - which animal
    - next to treasure box or plant?


In [None]:
dflive.size

In [None]:
choice_event_keys

In [None]:

X_train, X_test, y_train, y_test = train_test_split(dflive[choice_event_keys],dflive.orb, test_size=0.1)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


In [None]:
model = MultinomialNB().fit(X_train, y_train)
print(f"{model.score(X_test,y_test):.2f}")
# print(confusion_matrix(y_test, predicted))
print(",".join(choice_event_keys))
# print(list(map(lambda x : int(100 * (2 ** x)), model.feature_log_prob_[0])))
pize = lambda x : int(100 * (2 ** x))
not_orbbed = map(pize, model.feature_log_prob_[0])
orbbed = map(pize,model.feature_log_prob_[1])
 
print(list(map(lambda a,b: a-b,orbbed,not_orbbed)))

In [None]:
clf = DecisionTreeClassifier(max_depth=4)
clf = clf.fit(X_train, y_train)
tree.plot_tree(clf, filled=True, feature_names=choice_event_keys, class_names=["no orb","orb"]) 
print(f"{clf.score(X_test,y_test):.2f}")


In [None]:
model = RandomForestClassifier().fit(X_train, y_train)
print(f"{model.score(X_test,y_test):.2f}")
# print(confusion_matrix(y_test, predicted))

In [None]:
model = MLPClassifier(max_iter=1000,hidden_layer_sizes=(300,)).fit(X_train, y_train)
print(f"{model.score(X_test,y_test):.2f}")
# print(confusion_matrix(y_test, predicted))
# model.loss_curve_

In [None]:
# p(diversity_of_creatures * complexity_of_creature | live_players)

def find_creatures_in_elt(elt):
    output = []
    for player in range(2):
        for creature in creatures:
            if creature in elt:
                if (elt[creature]['cardsExecuted'] > 0):
                    if len(output) < 2:
                        output = [[],[]]
                    out_creature = {k: elt[creature][k] for k in creature_stats}
                    out_creature['creature'] = creature
                    output[player].append(out_creature)
    return output

machine_types = ['Conditional','Sequential','Probability']
def max_by_player(elts_by_player):
    output = {k: {} for k in machine_types}
    for elts in elts_by_player:
        for elt in elts:
            for machine_type in machine_types:
                try:
                    if elt['machineType'] == machine_type:
                        if 'cardsExecuted' not in output[machine_type] or elt['cardsExecuted'] > output[machine_type]['cardsExecuted']:
                            output[machine_type] = elt
                except:
                    print("ERROR:",elt)
    return output

random_elts = random.sample(lawrence_data, k=2)
for elt in random_elts:
    display(find_creatures_in_elt(elt))

In [None]:
from itertools import chain, accumulate

def creature_score(elts):
    return list(map(find_creatures_in_elt,elts))
#     creature_stats_by_elt = map()

random_elts = random.sample(lawrence_data, k=2)
# list(map(lambda x: x[0][0] if len(x) > 0 and len(x[0]) > 0 else {}, (creature_score(random_elts))))

In [None]:
# player2Creature3
creatures = [f"player{i}Creature{j}" for i in range(1,3) for j in range(1,4)]
creatures

In [None]:


random_elts = random.sample(lawrence_data, k=2)


for elt in random_elts:
    display(find_creatures_in_elt(elt))
#     for player in range(2):
#         for creature in creatures[player]:
#             if creature in elt and (elt[creature]['cardsExecuted'] > 0):
#                 print(f"{1 + player} {creature}: {elt[creature]}")
                