# Flatten Equivacard event data

In [1]:
import plotnine
import pandas as pd
import numpy as np
import json
from datetime import datetime
import os
from bson import json_util

In [2]:
import saga_py_aws
import saga_py_mongo

In [3]:
secret = saga_py_aws.get_mongo_uri_secret('prod')
client = saga_py_mongo.get_mongo_client(secret)

In [4]:
mapping_df = pd.read_csv('./data/cognition_mapping.csv', dtype= {'user_id': str})

In [5]:
mapping_df

Unnamed: 0,user_id,email
0,1336,cognition_1@saga.org
1,1337,cognition_2@saga.org
2,1338,cognition_3@saga.org
3,1339,cognition_4@saga.org
4,1340,cognition_5@saga.org
...,...,...
161,4600,cognition_216@saga.org
162,4601,cognition_217@saga.org
163,4602,cognition_218@saga.org
164,4603,cognition_219@saga.org


In [6]:
query = [
    {"$match": {"activity":"EQUIVACARDS"}},
    {"$match": {"user_id": {"$in": list(mapping_df.user_id.values)}}}
     ]

In [8]:
results = saga_py_mongo.aggregate_query_mongo(aggregate_query=query, collection_client=client['xlr8_beta']['analytics_simple_event_data'])

2023-06-26 07:32:10 [debug    ] Querying xlr8_beta.analytics_simple_event_data


In [9]:
event_json = results

In [216]:
json.dump?

In [None]:
event_json = [ev]

In [230]:
with open('./data/analysis_set.json', 'w+') as f:
    json.dump(json.loads(json_util.dumps(event_json)), f)

In [11]:
example_id='1339'
example= [result for result in results if result['user_id']==example_id]

In [12]:
len(example)

58

In [13]:
example_id='1339'
example2= [result for result in event_json if result['user_id']==example_id]

In [14]:
len(example2)

58

## Preview event shape

In [15]:
event_json[0:3]

[{'_id': ObjectId('62ed84e87c108f0fa8a3f6ae'),
  'timestamp': 1659733224.835,
  'user_id': '1336',
  'browser_session_id': 'Not provided',
  'product': 'SKYE',
  'activity': 'EQUIVACARDS',
  'bucket': '2022-08-eqc-study',
  'event_name': 'launched_equivacards'},
 {'_id': ObjectId('62ed84ef7c108f0fa8a3f6af'),
  'timestamp': 1659733231.802,
  'user_id': '1336',
  'browser_session_id': 'Not provided',
  'product': 'SKYE',
  'activity': 'EQUIVACARDS',
  'bucket': '2022-08-eqc-study',
  'event_name': 'user_turn',
  'payload': {'best_play_length': 3,
   'best_play': [{'label': 'red,9-x', 'color': 'red', 'value': '9-x'},
    {'label': 'blue,5', 'color': 'blue', 'value': '5'},
    {'label': 'blue,3', 'color': 'blue', 'value': '3'}]}},
 {'_id': ObjectId('62ed84ef843d8e07c20f6c32'),
  'timestamp': 1659733231.81,
  'user_id': '1336',
  'browser_session_id': 'Not provided',
  'product': 'SKYE',
  'activity': 'EQUIVACARDS',
  'bucket': '2022-08-eqc-study',
  'event_name': 'deal_ended'}]

In [16]:
event_json[-1]

{'_id': ObjectId('6498c030603071cc1626a4e0'),
 'user_id': '1355',
 'product': 'SKYE',
 'activity': 'EQUIVACARDS',
 'bucket': '2023-6-eqc-study',
 'event_name': 'user_won',
 'payload': {'main_info': {'game_length': 3},
  'additional_info': {'deck_version': 1}},
 'browser_session_id': '41ffe670-101f-4dfa-b076-79ebc7880234',
 'server_timestamp': 1687732272009.0}

In [17]:
def unnest_dict(event):
    event_copy = event.copy()
    if type(event) == dict:
        keys = event.keys()
        for key in keys:
            if type(event_copy[key]) == dict:
                event_copy.update(unnest_dict(event_copy[key]))
                del event_copy[key]
    return event_copy
    

In [18]:
unnest_dict(event_json[4])

{'_id': ObjectId('62fbf24c5c4e1a2091565a2c'),
 'timestamp': 1660678732.673,
 'user_id': '1336',
 'browser_session_id': 'Not provided',
 'product': 'SKYE',
 'activity': 'EQUIVACARDS',
 'bucket': '2022-08-eqc-study',
 'event_name': 'deal_ended'}

In [19]:
event_json[4]

{'_id': ObjectId('62fbf24c5c4e1a2091565a2c'),
 'timestamp': 1660678732.673,
 'user_id': '1336',
 'browser_session_id': 'Not provided',
 'product': 'SKYE',
 'activity': 'EQUIVACARDS',
 'bucket': '2022-08-eqc-study',
 'event_name': 'deal_ended'}

In [20]:
flat_events = []
for event in event_json:
    flat_events.append(unnest_dict(event))

In [21]:
json_df = pd.json_normalize(flat_events, meta=['_id.$oid'])
json_df.head()

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,board,p1_hand,p2_ncards,n_cards_played,changed_x_val,changed_pile_val,game_length,server_timestamp,deck_version,game_name
0,62ed84e87c108f0fa8a3f6ae,1659733000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,...,,,,,,,,,,
1,62ed84ef7c108f0fa8a3f6af,1659733000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_turn,3.0,"[{'label': 'red,9-x', 'color': 'red', 'value':...",...,,,,,,,,,,
2,62ed84ef843d8e07c20f6c32,1659733000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,deal_ended,,,...,,,,,,,,,,
3,62fbf2455c4e1a2091565a2b,1660679000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,...,,,,,,,,,,
4,62fbf24c5c4e1a2091565a2c,1660679000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,deal_ended,,,...,,,,,,,,,,


In [22]:
json_df.sample(1).to_dict('records')

[{'_id': ObjectId('64775a3782802790a65efcec'),
  'timestamp': nan,
  'user_id': '4494',
  'browser_session_id': 'd9ffb922-67cd-458e-b4d0-e596a0cb10c8',
  'product': 'SKYE',
  'activity': 'EQUIVACARDS',
  'bucket': '2023-5-eqc-study',
  'event_name': 'user_played_card',
  'best_play_length': nan,
  'best_play': nan,
  'card': 'yellow.6',
  'value': 6.0,
  'match_color': True,
  'match_value': False,
  'match_algebraic': False,
  'whose_turn': nan,
  'board': nan,
  'p1_hand': nan,
  'p2_ncards': nan,
  'n_cards_played': nan,
  'changed_x_val': nan,
  'changed_pile_val': nan,
  'game_length': nan,
  'server_timestamp': 1685543479262.0,
  'deck_version': 0.0,
  'game_name': nan}]

## Align timestamps

In [23]:
json_df.server_timestamp.values[0]

nan

In [24]:
json_df['server_timestamp'].describe()

count    1.225900e+04
mean     1.684657e+12
std      8.556241e+08
min      1.680806e+12
25%      1.684183e+12
50%      1.684886e+12
75%      1.685209e+12
max      1.687732e+12
Name: server_timestamp, dtype: float64

In [25]:
json_df['timestamp'].describe()

count    2.175000e+03
mean     1.661567e+09
std      7.215110e+05
min      1.659733e+09
25%      1.660785e+09
50%      1.661819e+09
75%      1.662078e+09
max      1.662688e+09
Name: timestamp, dtype: float64

In [26]:
print(datetime.utcfromtimestamp(1.662995e+15/1000000).strftime('%Y-%m-%d %H:%M:%S'))

2022-09-12 15:03:20


In [27]:
print(datetime.utcfromtimestamp(1.662995e+09).strftime('%Y-%m-%d %H:%M:%S'))

2022-09-12 15:03:20


In [28]:
print(datetime.utcfromtimestamp(json_df.server_timestamp.min()/1000).strftime('%Y-%m-%d %H:%M:%S'))

2023-04-06 18:36:32


In [29]:
print(datetime.utcfromtimestamp(json_df.server_timestamp.max()/1000).strftime('%Y-%m-%d %H:%M:%S'))

2023-06-25 22:31:12


In [30]:
def align_unix_convention(x):
    if not np.isnan(x.server_timestamp):
        return x.server_timestamp/1000 
    else:
        ts = x.timestamp
        above_12 = np.floor(np.log10(ts)-9)
        return ts/(10**(above_12))
    

In [31]:
print(json_df.head(1).apply(lambda x: align_unix_convention(x), axis=1))

0    1.659733e+09
dtype: float64


In [242]:
[print(val) for val in json_df.timestamp.sample(4).values]

nan
1660681782.869
nan
nan


[None, None, None, None]

In [243]:
[print(val) for val in json_df.server_timestamp.sample(4).values]

1683591728345.0
1682466396062.0
1685542526261.0
1684193345927.0


[None, None, None, None]

In [32]:
json_df['unix_timestamp_combined'] = json_df.apply(lambda x: align_unix_convention(x), axis=1)

In [33]:
json_df['timestamp_combined'] = json_df.apply(lambda x: datetime.utcfromtimestamp(x.unix_timestamp_combined), axis=1)

In [34]:
json_df['unix_timestamp_combined'].describe()

count    1.443400e+04
mean     1.681178e+09
std      8.302870e+06
min      1.659733e+09
25%      1.683592e+09
50%      1.684637e+09
75%      1.684984e+09
max      1.687732e+09
Name: unix_timestamp_combined, dtype: float64

In [35]:
print(json_df.timestamp_combined.min())

2022-08-05 21:00:24.835000


In [36]:
print(json_df.timestamp_combined.max())

2023-06-25 22:31:12.009000


In [37]:
json_df.sample(5)

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,p2_ncards,n_cards_played,changed_x_val,changed_pile_val,game_length,server_timestamp,deck_version,game_name,unix_timestamp_combined,timestamp_combined
10105,646e8b34603071cc16266bd6,,4488,441e555c-e08c-4a1c-aabe-b835808ab669,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,7.0,,,,,1684966000000.0,0.0,Equivacards,1684966000.0,2023-05-24 22:09:56.975
2853,645985ed9123dab334e8f560,,4484,90fe74a7-b1df-487e-a632-6aca593d7a18,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,2.0,,,,,1683589000000.0,1.0,Equivacards,1683589000.0,2023-05-08 23:29:49.042
6466,646571e0603071cc162633cf,,4488,830d054c-17bf-4f54-9355-8ac6f7e9e5f7,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,7.0,,,,,1684370000000.0,0.0,Equivacards,1684370000.0,2023-05-18 00:31:28.385
6801,64697b80603071cc16264933,,4489,6d751a06-6c8f-4e29-8d31-ad89773f331b,SKYE,EQUIVACARDS,2023-5-eqc-study,user_turn,2.0,"[{'label': 'green,3', 'color': 'green', 'value...",...,,,,,,1684634000000.0,1.0,,1684634000.0,2023-05-21 02:01:36.774
9976,646e894b603071cc16266bae,,4489,6d751a06-6c8f-4e29-8d31-ad89773f331b,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,4.0,,,,,1684966000000.0,1.0,Equivacards,1684966000.0,2023-05-24 22:01:47.521


## Review event distribution across field categories

In [38]:
json_df.groupby('activity').count()

Unnamed: 0_level_0,_id,timestamp,user_id,browser_session_id,product,bucket,event_name,best_play_length,best_play,card,...,p2_ncards,n_cards_played,changed_x_val,changed_pile_val,game_length,server_timestamp,deck_version,game_name,unix_timestamp_combined,timestamp_combined
activity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
EQUIVACARDS,14434,2175,14434,14434,14434,14434,14434,1124,1124,3538,...,7504,1074,325,325,296,12259,12248,5884,14434,14434


In [39]:
json_df.groupby('event_name').count()

Unnamed: 0_level_0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,best_play_length,best_play,card,...,p2_ncards,n_cards_played,changed_x_val,changed_pile_val,game_length,server_timestamp,deck_version,game_name,unix_timestamp_combined,timestamp_combined
event_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
deal_ended,329,50,329,329,329,329,329,0,0,0,...,0,0,0,0,0,279,277,0,329,329
game_state_changed,7223,1058,7223,7223,7223,7223,7223,0,0,0,...,7223,0,0,0,0,6165,6163,5884,7223,7223
initial_game_state,281,0,281,281,281,281,281,0,0,0,...,281,0,0,0,0,281,279,0,281,281
launched_equivacards,106,21,106,106,106,106,106,0,0,0,...,0,0,0,0,0,85,83,0,106,106
play_again_no,13,2,13,13,13,13,13,0,0,0,...,0,0,0,0,0,11,11,0,13,13
play_again_yes,242,31,242,242,242,242,242,0,0,0,...,0,0,0,0,0,211,211,0,242,242
play_not_allowed,938,227,938,938,938,938,938,0,0,938,...,0,0,0,0,0,711,711,0,938,938
user_can_win,208,31,208,208,208,208,208,0,0,0,...,0,0,0,0,0,177,176,0,208,208
user_drew_card,1074,180,1074,1074,1074,1074,1074,0,0,0,...,0,1074,0,0,0,894,894,0,1074,1074
user_lost,155,28,155,155,155,155,155,0,0,0,...,0,0,0,0,155,127,127,0,155,155


In [40]:
json_df.groupby('bucket').count()

Unnamed: 0_level_0,_id,timestamp,user_id,browser_session_id,product,activity,event_name,best_play_length,best_play,card,...,p2_ncards,n_cards_played,changed_x_val,changed_pile_val,game_length,server_timestamp,deck_version,game_name,unix_timestamp_combined,timestamp_combined
bucket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-08-eqc-study,2175,2175,2175,2175,2175,2175,2175,194,194,568,...,1058,180,63,63,40,0,0,0,2175,2175
2023-4-eqc-study,435,0,435,435,435,435,435,40,40,79,...,238,35,19,19,8,435,424,212,435,435
2023-5-eqc-study,11018,0,11018,11018,11018,11018,11018,824,824,2730,...,5765,794,229,229,232,11018,11018,5263,11018,11018
2023-6-eqc-study,806,0,806,806,806,806,806,66,66,161,...,443,65,14,14,16,806,806,409,806,806


In [41]:
json_df.groupby('activity')['user_id'].nunique()

activity
EQUIVACARDS    22
Name: user_id, dtype: int64

In [42]:
equiv_events_df = json_df[json_df.activity.isin(['EQUIVACARDS'])]

In [43]:
equiv_events_df.shape

(14434, 28)

In [44]:
equiv_events_df.columns

Index(['_id', 'timestamp', 'user_id', 'browser_session_id', 'product',
       'activity', 'bucket', 'event_name', 'best_play_length', 'best_play',
       'card', 'value', 'match_color', 'match_value', 'match_algebraic',
       'whose_turn', 'board', 'p1_hand', 'p2_ncards', 'n_cards_played',
       'changed_x_val', 'changed_pile_val', 'game_length', 'server_timestamp',
       'deck_version', 'game_name', 'unix_timestamp_combined',
       'timestamp_combined'],
      dtype='object')

## Flatten object columns

In [45]:
equiv_events_df.sample(5)

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,p2_ncards,n_cards_played,changed_x_val,changed_pile_val,game_length,server_timestamp,deck_version,game_name,unix_timestamp_combined,timestamp_combined
5792,6462c0039123dab334e976a2,,4485,caf0eb97-861f-4a73-9258-f4ad921d0817,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,1.0,,,,,1684193000000.0,1.0,Equivacards,1684193000.0,2023-05-15 23:28:03.043
7246,6469860182802790a65eb88d,,4489,6d751a06-6c8f-4e29-8d31-ad89773f331b,SKYE,EQUIVACARDS,2023-5-eqc-study,user_turn,4.0,"[{'label': 'blue,x+x', 'color': 'blue', 'value...",...,,,,,,1684637000000.0,1.0,,1684637000.0,2023-05-21 02:46:25.718
7094,6469850c82802790a65eb859,,4489,6d751a06-6c8f-4e29-8d31-ad89773f331b,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,2.0,,,,,1684637000000.0,1.0,Equivacards,1684637000.0,2023-05-21 02:42:20.482
13596,64775de982802790a65efdae,,4494,d9ffb922-67cd-458e-b4d0-e596a0cb10c8,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,2.0,,,,,1685544000000.0,0.0,Equivacards,1685544000.0,2023-05-31 14:47:05.525
2885,64598624603071cc16259c9b,,4484,90fe74a7-b1df-487e-a632-6aca593d7a18,SKYE,EQUIVACARDS,2023-5-eqc-study,user_played_card,,,...,,,,,,1683589000000.0,1.0,,1683589000.0,2023-05-08 23:30:44.208


In [46]:
equiv_events_df[equiv_events_df.best_play.notna()].sample(5).best_play.values

array([list([{'label': 'yellow,4', 'color': 'yellow', 'value': '4'}, {'label': 'green,4', 'color': 'green', 'value': '4'}, {'label': 'green,x+1', 'color': 'green', 'value': 'x+1'}, {'label': 'blue,2x', 'color': 'blue', 'value': '2x'}, {'label': 'red,x+1', 'color': 'red', 'value': 'x+1'}, {'label': 'red,x+2', 'color': 'red', 'value': 'x+2'}, {'label': 'blue,x+2', 'color': 'blue', 'value': 'x+2'}, {'label': 'blue,6', 'color': 'blue', 'value': '6'}, {'label': 'yellow,6', 'color': 'yellow', 'value': '6'}]),
       list([]),
       list([{'label': 'green,1', 'color': 'green', 'value': '1'}, {'label': 'green,x+2', 'color': 'green', 'value': 'x+2'}, {'label': 'green,7', 'color': 'green', 'value': '7'}, {'label': 'green,x+1', 'color': 'green', 'value': 'x+1'}, {'label': 'yellow,5', 'color': 'yellow', 'value': '5'}, {'label': 'red,9-x', 'color': 'red', 'value': '9-x'}, {'label': 'red,x+1', 'color': 'red', 'value': 'x+1'}]),
       list([{'label': 'red,4', 'color': 'red', 'value': '4'}, {'label'

In [47]:
equiv_events_df.best_play

0                                                      NaN
1        [{'label': 'red,9-x', 'color': 'red', 'value':...
2                                                      NaN
3                                                      NaN
4                                                      NaN
                               ...                        
14429                                                  NaN
14430                                                  NaN
14431                                                  NaN
14432                                                  NaN
14433                                                  NaN
Name: best_play, Length: 14434, dtype: object

`best_play` is a complex object and would require targeted processing to pull out value

Length of the best play and first move of the best play seem likely useful 

In [48]:
best_play_df = equiv_events_df.best_play.apply(pd.Series) 



In [49]:
best_play_df.columns = [f"best_play_turn_{item}" for item in best_play_df.columns]

In [50]:
best_play_df

Unnamed: 0,best_play_turn_0,best_play_turn_1,best_play_turn_2,best_play_turn_3,best_play_turn_4,best_play_turn_5,best_play_turn_6,best_play_turn_7,best_play_turn_8,best_play_turn_9,best_play_turn_10
0,,,,,,,,,,,
1,"{'label': 'red,9-x', 'color': 'red', 'value': ...","{'label': 'blue,5', 'color': 'blue', 'value': ...","{'label': 'blue,3', 'color': 'blue', 'value': ...",,,,,,,,
2,,,,,,,,,,,
3,,,,,,,,,,,
4,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
14429,,,,,,,,,,,
14430,,,,,,,,,,,
14431,,,,,,,,,,,
14432,,,,,,,,,,,


In [51]:
best_play_df[best_play_df.best_play_turn_0.notna()].head()

Unnamed: 0,best_play_turn_0,best_play_turn_1,best_play_turn_2,best_play_turn_3,best_play_turn_4,best_play_turn_5,best_play_turn_6,best_play_turn_7,best_play_turn_8,best_play_turn_9,best_play_turn_10
1,"{'label': 'red,9-x', 'color': 'red', 'value': ...","{'label': 'blue,5', 'color': 'blue', 'value': ...","{'label': 'blue,3', 'color': 'blue', 'value': ...",,,,,,,,
5,"{'label': 'white,x=3', 'color': 'white', 'valu...","{'label': 'red,x', 'color': 'red', 'value': 'x'}","{'label': 'red,9-x', 'color': 'red', 'value': ...","{'label': 'blue,x+x', 'color': 'blue', 'value'...","{'label': 'white,x=4', 'color': 'white', 'valu...","{'label': 'green,8', 'color': 'green', 'value'...",,,,,
15,"{'label': 'yellow,5', 'color': 'yellow', 'valu...",,,,,,,,,,
26,"{'label': 'white,x=3', 'color': 'white', 'valu...","{'label': 'white,x=4', 'color': 'white', 'valu...","{'label': 'green,8', 'color': 'green', 'value'...","{'label': 'blue,x+x', 'color': 'blue', 'value'...","{'label': 'red,x+x', 'color': 'red', 'value': ...","{'label': 'red,5', 'color': 'red', 'value': '5'}",,,,,
29,"{'label': 'blue,3', 'color': 'blue', 'value': ...","{'label': 'blue,1', 'color': 'blue', 'value': ...","{'label': 'blue,x+1', 'color': 'blue', 'value'...","{'label': 'yellow,x+1', 'color': 'yellow', 'va...",,,,,,,


In [52]:
best_play_df[best_play_df.best_play_turn_0.notna()].head()

Unnamed: 0,best_play_turn_0,best_play_turn_1,best_play_turn_2,best_play_turn_3,best_play_turn_4,best_play_turn_5,best_play_turn_6,best_play_turn_7,best_play_turn_8,best_play_turn_9,best_play_turn_10
1,"{'label': 'red,9-x', 'color': 'red', 'value': ...","{'label': 'blue,5', 'color': 'blue', 'value': ...","{'label': 'blue,3', 'color': 'blue', 'value': ...",,,,,,,,
5,"{'label': 'white,x=3', 'color': 'white', 'valu...","{'label': 'red,x', 'color': 'red', 'value': 'x'}","{'label': 'red,9-x', 'color': 'red', 'value': ...","{'label': 'blue,x+x', 'color': 'blue', 'value'...","{'label': 'white,x=4', 'color': 'white', 'valu...","{'label': 'green,8', 'color': 'green', 'value'...",,,,,
15,"{'label': 'yellow,5', 'color': 'yellow', 'valu...",,,,,,,,,,
26,"{'label': 'white,x=3', 'color': 'white', 'valu...","{'label': 'white,x=4', 'color': 'white', 'valu...","{'label': 'green,8', 'color': 'green', 'value'...","{'label': 'blue,x+x', 'color': 'blue', 'value'...","{'label': 'red,x+x', 'color': 'red', 'value': ...","{'label': 'red,5', 'color': 'red', 'value': '5'}",,,,,
29,"{'label': 'blue,3', 'color': 'blue', 'value': ...","{'label': 'blue,1', 'color': 'blue', 'value': ...","{'label': 'blue,x+1', 'color': 'blue', 'value'...","{'label': 'yellow,x+1', 'color': 'yellow', 'va...",,,,,,,


In [53]:
best_play_0_df = pd.json_normalize(best_play_df.best_play_turn_0,errors='ignore')

In [54]:
best_play_0_df

Unnamed: 0,label,color,value
0,,,
1,"red,9-x",red,9-x
2,,,
3,,,
4,,,
...,...,...,...
14429,,,
14430,,,
14431,,,
14432,,,


In [55]:
best_play_0_df.columns = [f"best_play_turn_0_{item}" for item in best_play_0_df.columns]

In [56]:
equiv_events_df[equiv_events_df.board.notna()].sample(5).board.values

array([list(['white.x=4', 'blue.6']), list(['white.x=3', 'blue.6']),
       list(['white.x=1', 'blue.8']), list(['white.x=3', 'red.2']),
       list(['white.x=3', 'green.1'])], dtype=object)

In [57]:
board_df = equiv_events_df.board.apply(pd.Series) 

In [58]:
board_df.columns = ["board_left_card", "board_right_card"]

In [59]:
board_df[board_df.board_right_card.notna()].sample(5)

Unnamed: 0,board_left_card,board_right_card
1151,white.x=2,blue.9-x
11403,white.x=1,yellow.7
3028,white.x=1,yellow.x
12740,white.x=1,yellow.7
14307,white.x=2,yellow.2


In [60]:
equiv_events_df[equiv_events_df.p1_hand.notna()].sample(5).p1_hand.values

array([list(['blue.2', 'blue.6']),
       list(['blue.x+1', 'blue.7', 'blue.8', 'green.3', 'yellow.x+x', 'red.3']),
       list(['blue.5', 'blue.8', 'green.x', 'yellow.x', 'yellow.3']),
       list([]), list(['blue.5', 'blue.6', 'green.6'])], dtype=object)

In [61]:
p1_hand_df = equiv_events_df.p1_hand.apply(pd.Series) 



In [62]:
p1_hand_df.columns = [f"p1_hand_card_{item}" for item in p1_hand_df.columns]

In [63]:
p1_hand_df['p1_hand_size'] = p1_hand_df.count(axis=1)

In [64]:
p1_hand_df[p1_hand_df.p1_hand_card_0.notna()].sample(5)

Unnamed: 0,p1_hand_card_0,p1_hand_card_1,p1_hand_card_2,p1_hand_card_3,p1_hand_card_4,p1_hand_card_5,p1_hand_card_6,p1_hand_card_7,p1_hand_card_8,p1_hand_card_9,p1_hand_card_10,p1_hand_size
3602,white.x=1,green.4,,,,,,,,,,2
621,blue.5,yellow.7,red.6,red.7,green.x,,,,,,,5
5005,green.1,green.6,yellow.x+x,yellow.8,red.5,red.x+x,,,,,,6
10369,green.9-x,green.1,green.3,yellow.x,yellow.9-x,yellow.x+x,,,,,,6
1079,white.x=3,white.x=4,blue.x,red.x,,,,,,,,4


In [65]:
equiv_flat_df = pd.concat([
    equiv_events_df.reset_index(drop=True), 
    best_play_df.reset_index(drop=True),
    best_play_0_df.reset_index(drop=True),
    board_df.reset_index(drop=True),
    p1_hand_df.reset_index(drop=True)
], axis=1)

In [66]:
equiv_flat_df.sample(5)

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,p1_hand_card_2,p1_hand_card_3,p1_hand_card_4,p1_hand_card_5,p1_hand_card_6,p1_hand_card_7,p1_hand_card_8,p1_hand_card_9,p1_hand_card_10,p1_hand_size
13665,64790fe29123dab334e9ef58,,4493,13b0e511-3e44-438e-ad55-ad094f4a79af,SKYE,EQUIVACARDS,2023-6-eqc-study,user_played_card,,,...,,,,,,,,,,0
8,62fbf25de764193979eeade0,1660679000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,,,,,,,,,,0
183,62fbfd31289aed6bd15ac580,1660682000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,game_state_changed,,,...,green.1,green.5,,,,,,,,4
8463,646d5d4f9123dab334e9b91d,,4492,0584a84c-c1af-4776-8535-332f346b8398,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,,,,,,,,,,1
12598,64767d0c82802790a65efb99,,4493,13b0e511-3e44-438e-ad55-ad094f4a79af,SKYE,EQUIVACARDS,2023-5-eqc-study,user_drew_card,,,...,,,,,,,,,,0


## Correct Connect the Drops labels

In [67]:
game_temp_df = equiv_flat_df.sort_values(by='timestamp_combined')
# [equiv_flat_df.event_name.isin(["launched_connect_the_drops", "launched_equivacards", "user_won", "user_lost"])]
# .sort_values(by='timestamp_combined')

In [68]:
equiv_flat_df

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,p1_hand_card_2,p1_hand_card_3,p1_hand_card_4,p1_hand_card_5,p1_hand_card_6,p1_hand_card_7,p1_hand_card_8,p1_hand_card_9,p1_hand_card_10,p1_hand_size
0,62ed84e87c108f0fa8a3f6ae,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,...,,,,,,,,,,0
1,62ed84ef7c108f0fa8a3f6af,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_turn,3.0,"[{'label': 'red,9-x', 'color': 'red', 'value':...",...,,,,,,,,,,0
2,62ed84ef843d8e07c20f6c32,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,deal_ended,,,...,,,,,,,,,,0
3,62fbf2455c4e1a2091565a2b,1.660679e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,...,,,,,,,,,,0
4,62fbf24c5c4e1a2091565a2c,1.660679e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,deal_ended,,,...,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14429,6498c02d82802790a65f14ec,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,user_played_card,,,...,,,,,,,,,,0
14430,6498c02e603071cc1626a4df,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,game_state_changed,,,...,,,,,,,,,,1
14431,6498c02f9123dab334e9ff5f,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,user_played_card,,,...,,,,,,,,,,0
14432,6498c03082802790a65f14ed,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,game_state_changed,,,...,,,,,,,,,,0


In [69]:
game_temp_df

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,p1_hand_card_2,p1_hand_card_3,p1_hand_card_4,p1_hand_card_5,p1_hand_card_6,p1_hand_card_7,p1_hand_card_8,p1_hand_card_9,p1_hand_card_10,p1_hand_size
0,62ed84e87c108f0fa8a3f6ae,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,...,,,,,,,,,,0
1,62ed84ef7c108f0fa8a3f6af,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_turn,3.0,"[{'label': 'red,9-x', 'color': 'red', 'value':...",...,,,,,,,,,,0
2,62ed84ef843d8e07c20f6c32,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,deal_ended,,,...,,,,,,,,,,0
3,62fbf2455c4e1a2091565a2b,1.660679e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,...,,,,,,,,,,0
5,62fbf24c289aed6bd15ac546,1.660679e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_turn,6.0,"[{'label': 'white,x=3', 'color': 'white', 'val...",...,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14429,6498c02d82802790a65f14ec,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,user_played_card,,,...,,,,,,,,,,0
14430,6498c02e603071cc1626a4df,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,game_state_changed,,,...,,,,,,,,,,1
14431,6498c02f9123dab334e9ff5f,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,user_played_card,,,...,,,,,,,,,,0
14432,6498c03082802790a65f14ed,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,game_state_changed,,,...,,,,,,,,,,0


In [70]:
game_temp_df = equiv_flat_df.sort_values(by='timestamp_combined')

In [71]:
game_temp_df['game_launch'] = game_temp_df.event_name.apply(lambda x: x in ['launched_connect_the_drops', 'launched_equivacards'])

In [72]:
game_temp_df['game_start'] = game_temp_df.event_name.apply(lambda x: x in ['launched_equivacards', 'initial_game_state', 'play_again_yes'])

In [73]:
game_temp_df['game_start_time'] = game_temp_df.apply(lambda x: x.timestamp_combined if x.game_start else None , axis=1 )

In [74]:
def correct_launch_activity(x):
    if x == 'launched_connect_the_drops':
        return "CONNECT_THE_DROPS"
    elif x ==  'launched_equivacards':
        return "EQUIVACARDS"

In [75]:
game_temp_df['corrected_activity'] = game_temp_df.event_name.apply(correct_launch_activity)

In [76]:
game_temp_df['game_end'] = game_temp_df.event_name.apply(lambda x: x in ['user_won', 'user_lost'])

In [77]:
game_temp_df['game_end_time'] = game_temp_df.apply(lambda x: x.timestamp_combined if x.game_end else None , axis=1 )

In [78]:
game_temp_df['user_turn_start'] = game_temp_df.event_name.apply(lambda x: x in ['user_turn'])

In [79]:
game_temp_df['user_turn_start_time'] = game_temp_df.apply(lambda x: x.timestamp_combined if x.user_turn_start else None , axis=1 )

In [80]:
game_temp_df['user_turn_end'] = game_temp_df.event_name.apply(lambda x: x in ['user_drew_card', 'user_won', 'user_lost'])

In [81]:
game_temp_df['user_turn_end_time'] = game_temp_df.apply(lambda x: x.timestamp_combined if x.user_turn_end else None , axis=1 )

In [82]:
game_temp_df['user_took_action'] = game_temp_df.event_name.apply(lambda x: x in ['user_played_card', 'user_drew_card', 'play_not_allowed'])

In [83]:
game_temp_df['user_action_time'] = game_temp_df.apply(lambda x: x.timestamp_combined if x.user_took_action else None , axis=1 )

In [84]:
game_temp_df['user_launch_index'] = game_temp_df.groupby('user_id').game_launch.cumsum()
game_temp_df['user_game_index'] = game_temp_df.groupby('user_id').game_start.cumsum()

In [85]:
game_temp_df['user_turn_start_index'] = game_temp_df.groupby(['user_id','user_game_index']).user_turn_start.cumsum()
game_temp_df['user_turn_end_index'] = game_temp_df.groupby(['user_id','user_game_index']).user_turn_end.cumsum()

In [86]:
game_temp_df['turn_id'] = game_temp_df.apply(lambda x: '-'.join([str(x.user_turn_start_index), str(x.user_turn_end_index)]), axis=1)

In [91]:
game_temp_df['comp_turn'] = game_temp_df.apply(lambda x: x.user_turn_start_index == x.user_turn_end_index, axis=1)
game_temp_df['user_turn'] = ~game_temp_df['comp_turn']

In [92]:
game_temp_df[game_temp_df.user_id == '4492'][['timestamp_combined','event_name', 'corrected_activity', 'game_end', 'game_start', 'user_launch_index', 'user_game_index', 'user_turn_start_time', 'user_turn_end_time', 'turn_id', 'user_turn_start_index', 'match_type', 'last_match_type', 'match_type_change', 'match_color', 'match_value', 'match_algebraic']].to_csv('./4492.csv')

In [93]:
game_temp_df[game_temp_df.user_id == '1336'][['timestamp_combined','event_name', 'user_game_index', 'user_turn_start', 'user_turn_end', 'user_turn_start_index', 'user_turn_end_index', 'turn_id', 'comp_turn', 'user_turn', 'game_end_time', 'game_start_time', 'user_turn_start_index']].to_csv('./1336.csv')

In [94]:
def correct_activity(x):
    if x == 'launched_connect_the_drops':
        return "CONNECT_THE_DROPS"
    elif x ==  'launched_equivacards':
        return "EQUIVACARDS"

In [95]:
game_temp_df['corrected_activity'] = game_temp_df.event_name.apply(lambda x: correct_activity(x))

In [231]:
game_temp_df

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,match_type,last_match_type,match_type_change,comp_turn,user_turn,match_cat_str,previous_match_color,previous_match_value,previous_match_algebraic,strict_switch
0,62ed84e87c108f0fa8a3f6ae,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,...,,,,True,False,,,,,True
1,62ed84ef7c108f0fa8a3f6af,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_turn,3.0,"[{'label': 'red,9-x', 'color': 'red', 'value':...",...,,,,False,True,,,,,True
2,62ed84ef843d8e07c20f6c32,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,deal_ended,,,...,,,,False,True,,,,,True
3,62fbf2455c4e1a2091565a2b,1.660679e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,...,,,,True,False,,,,,True
5,62fbf24c289aed6bd15ac546,1.660679e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_turn,6.0,"[{'label': 'white,x=3', 'color': 'white', 'val...",...,,,,False,True,,,,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14429,6498c02d82802790a65f14ec,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,user_played_card,,,...,value,,,False,True,value,,,,True
14430,6498c02e603071cc1626a4df,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,game_state_changed,,,...,,,,False,True,,,,,True
14431,6498c02f9123dab334e9ff5f,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,user_played_card,,,...,color,value,True,False,True,color,False,True,False,True
14432,6498c03082802790a65f14ed,,1355,41ffe670-101f-4dfa-b076-79ebc7880234,SKYE,EQUIVACARDS,2023-6-eqc-study,game_state_changed,,,...,,,,False,True,,,,,True


In [233]:
game_temp_df.drop(['browser_session_id'], axis=1)

Unnamed: 0,_id,timestamp,user_id,product,activity,bucket,event_name,best_play_length,best_play,card,...,match_type,last_match_type,match_type_change,comp_turn,user_turn,match_cat_str,previous_match_color,previous_match_value,previous_match_algebraic,strict_switch
0,62ed84e87c108f0fa8a3f6ae,1.659733e+09,1336,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,,...,,,,True,False,,,,,True
1,62ed84ef7c108f0fa8a3f6af,1.659733e+09,1336,SKYE,EQUIVACARDS,2022-08-eqc-study,user_turn,3.0,"[{'label': 'red,9-x', 'color': 'red', 'value':...",,...,,,,False,True,,,,,True
2,62ed84ef843d8e07c20f6c32,1.659733e+09,1336,SKYE,EQUIVACARDS,2022-08-eqc-study,deal_ended,,,,...,,,,False,True,,,,,True
3,62fbf2455c4e1a2091565a2b,1.660679e+09,1336,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,,...,,,,True,False,,,,,True
5,62fbf24c289aed6bd15ac546,1.660679e+09,1336,SKYE,EQUIVACARDS,2022-08-eqc-study,user_turn,6.0,"[{'label': 'white,x=3', 'color': 'white', 'val...",,...,,,,False,True,,,,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14429,6498c02d82802790a65f14ec,,1355,SKYE,EQUIVACARDS,2023-6-eqc-study,user_played_card,,,blue.3,...,value,,,False,True,value,,,,True
14430,6498c02e603071cc1626a4df,,1355,SKYE,EQUIVACARDS,2023-6-eqc-study,game_state_changed,,,,...,,,,False,True,,,,,True
14431,6498c02f9123dab334e9ff5f,,1355,SKYE,EQUIVACARDS,2023-6-eqc-study,user_played_card,,,blue.7,...,color,value,True,False,True,color,False,True,False,True
14432,6498c03082802790a65f14ed,,1355,SKYE,EQUIVACARDS,2023-6-eqc-study,game_state_changed,,,,...,,,,False,True,,,,,True


In [96]:
game_by_launch_df = game_temp_df[game_temp_df['corrected_activity'].notna()][['user_id', "user_launch_index","corrected_activity"]]

In [97]:
game_by_launch_df.sample(5)

Unnamed: 0,user_id,user_launch_index,corrected_activity
6863,4489,2,EQUIVACARDS
7441,4490,2,EQUIVACARDS
1265,1341,1,EQUIVACARDS
8412,4492,3,EQUIVACARDS
9233,4488,4,EQUIVACARDS


In [98]:
corrected_activity_events_df = pd.merge(game_temp_df.drop('corrected_activity', axis=1), game_by_launch_df, on=["user_id", "user_launch_index"])

In [99]:
corrected_activity_events_df.groupby('corrected_activity')['user_id'].nunique()

corrected_activity
EQUIVACARDS    22
Name: user_id, dtype: int64

In [100]:
corrected_equivacards_events = corrected_activity_events_df[corrected_activity_events_df.corrected_activity=="EQUIVACARDS"]

In [101]:
assert corrected_equivacards_events[corrected_equivacards_events.user_id=='4492'].user_game_index.max()==52

In [102]:
assert corrected_equivacards_events[corrected_equivacards_events.user_id=='1336'].user_game_index.max()==9

In [103]:
corrected_equivacards_events.describe()

Unnamed: 0,timestamp,best_play_length,value,whose_turn,p2_ncards,n_cards_played,game_length,server_timestamp,deck_version,unix_timestamp_combined,p1_hand_size,user_launch_index,user_game_index,user_turn_start_index,user_turn_end_index
count,2175.0,1124.0,2275.0,1058.0,7504.0,1074.0,296.0,12259.0,12248.0,14434.0,14434.0,14434.0,14434.0,14434.0,14434.0
mean,1661567000.0,2.30427,4.414945,1.534026,4.832623,1.534451,7.050676,1684657000000.0,0.598873,1681178000.0,2.099349,3.479631,26.047319,2.828045,2.629555
std,721511.0,2.461808,2.298167,0.499077,2.218919,1.86077,6.666445,855624100.0,0.490147,8302870.0,2.465891,3.215192,22.337324,3.192817,3.215619
min,1659733000.0,0.0,1.0,1.0,0.0,0.0,1.0,1680806000000.0,0.0,1659733000.0,0.0,1.0,1.0,0.0,0.0
25%,1660785000.0,0.0,2.0,1.0,3.0,0.0,2.0,1684183000000.0,0.0,1683592000.0,0.0,2.0,7.0,1.0,0.0
50%,1661819000.0,1.0,4.0,2.0,5.0,1.0,5.0,1684886000000.0,1.0,1684637000.0,1.0,2.0,19.0,2.0,1.0
75%,1662078000.0,4.0,6.0,2.0,7.0,3.0,9.0,1685209000000.0,1.0,1684984000.0,4.0,4.0,40.0,4.0,4.0
max,1662688000.0,11.0,8.0,2.0,12.0,7.0,44.0,1687732000000.0,1.0,1687732000.0,11.0,19.0,94.0,22.0,23.0


In [104]:
corrected_equivacards_events.groupby('event_name').count()

Unnamed: 0_level_0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,best_play_length,best_play,card,...,user_game_index,user_turn_start_index,user_turn_end_index,turn_id,match_type,last_match_type,match_type_change,comp_turn,user_turn,corrected_activity
event_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
deal_ended,329,50,329,329,329,329,329,0,0,0,...,329,329,329,329,0,0,0,329,329,329
game_state_changed,7223,1058,7223,7223,7223,7223,7223,0,0,0,...,7223,7223,7223,7223,0,0,0,7223,7223,7223
initial_game_state,281,0,281,281,281,281,281,0,0,0,...,281,281,281,281,0,0,0,281,281,281
launched_equivacards,106,21,106,106,106,106,106,0,0,0,...,106,106,106,106,0,0,0,106,106,106
play_again_no,13,2,13,13,13,13,13,0,0,0,...,13,13,13,13,0,0,0,13,13,13
play_again_yes,242,31,242,242,242,242,242,0,0,0,...,242,242,242,242,0,0,0,242,242,242
play_not_allowed,938,227,938,938,938,938,938,0,0,938,...,938,938,938,938,0,0,0,938,938,938
user_can_win,208,31,208,208,208,208,208,0,0,0,...,208,208,208,208,0,0,0,208,208,208
user_drew_card,1074,180,1074,1074,1074,1074,1074,0,0,0,...,1074,1074,1074,1074,0,0,0,1074,1074,1074
user_lost,155,28,155,155,155,155,155,0,0,0,...,155,155,155,155,0,0,0,155,155,155


In [105]:
corrected_equivacards_events.user_game_index.max()

94

In [106]:
corrected_equivacards_events.groupby('event_name')['user_id'].nunique()

event_name
deal_ended              22
game_state_changed      22
initial_game_state      14
launched_equivacards    22
play_again_no            7
play_again_yes          21
play_not_allowed        21
user_can_win            22
user_drew_card          21
user_lost               21
user_played_card        21
user_turn               22
user_won                20
Name: user_id, dtype: int64

In [107]:
corrected_equivacards_events.dtypes

_id                    object
timestamp             float64
user_id                object
browser_session_id     object
product                object
                       ...   
last_match_type        object
match_type_change      object
comp_turn                bool
user_turn                bool
corrected_activity     object
Length: 78, dtype: object

In [108]:
corrected_equivacards_events.columns.sort_values()

Index(['_id', 'activity', 'best_play', 'best_play_length', 'best_play_turn_0',
       'best_play_turn_0_color', 'best_play_turn_0_label',
       'best_play_turn_0_value', 'best_play_turn_1', 'best_play_turn_10',
       'best_play_turn_2', 'best_play_turn_3', 'best_play_turn_4',
       'best_play_turn_5', 'best_play_turn_6', 'best_play_turn_7',
       'best_play_turn_8', 'best_play_turn_9', 'board', 'board_left_card',
       'board_right_card', 'browser_session_id', 'bucket', 'card',
       'changed_pile_val', 'changed_x_val', 'comp_turn', 'corrected_activity',
       'deck_version', 'event_name', 'game_end', 'game_end_time',
       'game_launch', 'game_length', 'game_name', 'game_start',
       'game_start_time', 'last_match_type', 'match_algebraic', 'match_color',
       'match_type', 'match_type_change', 'match_value', 'n_cards_played',
       'p1_hand', 'p1_hand_card_0', 'p1_hand_card_1', 'p1_hand_card_10',
       'p1_hand_card_2', 'p1_hand_card_3', 'p1_hand_card_4', 'p1_hand_card_5

In [109]:
corrected_equivacards_events.shape

(14434, 78)

In [110]:
corrected_equivacards_events.shape

(14434, 78)

In [111]:
corrected_equivacards_events.columns

Index(['_id', 'timestamp', 'user_id', 'browser_session_id', 'product',
       'activity', 'bucket', 'event_name', 'best_play_length', 'best_play',
       'card', 'value', 'match_color', 'match_value', 'match_algebraic',
       'whose_turn', 'board', 'p1_hand', 'p2_ncards', 'n_cards_played',
       'changed_x_val', 'changed_pile_val', 'game_length', 'server_timestamp',
       'deck_version', 'game_name', 'unix_timestamp_combined',
       'timestamp_combined', 'best_play_turn_0', 'best_play_turn_1',
       'best_play_turn_2', 'best_play_turn_3', 'best_play_turn_4',
       'best_play_turn_5', 'best_play_turn_6', 'best_play_turn_7',
       'best_play_turn_8', 'best_play_turn_9', 'best_play_turn_10',
       'best_play_turn_0_label', 'best_play_turn_0_color',
       'best_play_turn_0_value', 'board_left_card', 'board_right_card',
       'p1_hand_card_0', 'p1_hand_card_1', 'p1_hand_card_2', 'p1_hand_card_3',
       'p1_hand_card_4', 'p1_hand_card_5', 'p1_hand_card_6', 'p1_hand_card_7',
      

In [112]:
corrected_equivacards_events[corrected_activity_events_df.user_id == '4492']

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,user_game_index,user_turn_start_index,user_turn_end_index,turn_id,match_type,last_match_type,match_type_change,comp_turn,user_turn,corrected_activity
7825,646a7b2682802790a65eb8fc,,4492,1271bd41-2272-479c-9319-231403e8798a,SKYE,EQUIVACARDS,2023-5-eqc-study,launched_equivacards,,,...,1,0,0,0-0,,,,True,False,EQUIVACARDS
7826,646a7b3a82802790a65eb8fd,,4492,1271bd41-2272-479c-9319-231403e8798a,SKYE,EQUIVACARDS,2023-5-eqc-study,initial_game_state,,,...,2,0,0,0-0,,,,True,False,EQUIVACARDS
7827,646a7b3a9123dab334e9a406,,4492,1271bd41-2272-479c-9319-231403e8798a,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,2,0,0,0-0,,,,True,False,EQUIVACARDS
7828,646a7b3e9123dab334e9a407,,4492,1271bd41-2272-479c-9319-231403e8798a,SKYE,EQUIVACARDS,2023-5-eqc-study,user_can_win,,,...,2,0,0,0-0,,,,True,False,EQUIVACARDS
7829,646a7b3e603071cc16264a25,,4492,1271bd41-2272-479c-9319-231403e8798a,SKYE,EQUIVACARDS,2023-5-eqc-study,user_turn,7.0,"[{'label': 'blue,1', 'color': 'blue', 'value':...",...,2,1,0,1-0,,,,False,True,EQUIVACARDS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12235,6473b41e9123dab334e9e018,,4492,1a499333-2803-4066-985d-b699227eddee,SKYE,EQUIVACARDS,2023-5-eqc-study,user_played_card,,,...,52,2,1,2-1,color,,,False,True,EQUIVACARDS
12236,6473b41e82802790a65ef5dd,,4492,1a499333-2803-4066-985d-b699227eddee,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,52,2,1,2-1,,,,False,True,EQUIVACARDS
12237,6473b41e9123dab334e9e019,,4492,1a499333-2803-4066-985d-b699227eddee,SKYE,EQUIVACARDS,2023-5-eqc-study,user_played_card,,,...,52,2,1,2-1,color,color,False,False,True,EQUIVACARDS
12238,6473b41f82802790a65ef5de,,4492,1a499333-2803-4066-985d-b699227eddee,SKYE,EQUIVACARDS,2023-5-eqc-study,game_state_changed,,,...,52,2,1,2-1,,,,False,True,EQUIVACARDS


In [244]:
corrected_equivacards_events.sample(1)

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,user_game_index,user_turn_start_index,user_turn_end_index,turn_id,match_type,last_match_type,match_type_change,comp_turn,user_turn,corrected_activity
4023,6459968f82802790a65e0d0c,,4491,924b76b5-ac5d-434c-bf41-1d4673dbd2b0,SKYE,EQUIVACARDS,2023-5-eqc-study,user_played_card,,,...,22,2,1,2-1,value,,,False,True,EQUIVACARDS


In [249]:
match_columns = [f'match_{val}' for val in ['color', 'value', 'algebraic']]

In [259]:
columns= ['user_id', 'user_game_index', 'user_turn_start_index', 'event_name', 'user_action_time', 'user_turn_start_time', 'changed_x_val', 'changed_pile_val'] + match_columns
temp_df = corrected_equivacards_events[(corrected_equivacards_events.user_id =='4491') 
#                                        &
#                             (corrected_equivacards_events.event_name=='user_played_card')
                                      ][columns]
temp_df.user_id = 'abc'
temp_df.to_csv('analysis/tests/data/abc.csv')

In [260]:
example_df = pd.read_csv("./analysis/tests/data/abc.csv")

In [263]:
example_df[['match_color', 'match_value', 'match_algebraic']]

Unnamed: 0,match_color,match_value,match_algebraic
0,,,
1,,,
2,,,
3,,,
4,,,
...,...,...,...
449,True,False,False
450,,,
451,True,False,False
452,,,


In [271]:
type(example_df['match_color'][0])

float

In [269]:
example_df.apply(lambda x: x[['match_color', 'match_value', 'match_algebraic']].values, axis=1)

0           [nan, nan, nan]
1           [nan, nan, nan]
2           [nan, nan, nan]
3           [nan, nan, nan]
4           [nan, nan, nan]
               ...         
449    [True, False, False]
450         [nan, nan, nan]
451    [True, False, False]
452         [nan, nan, nan]
453         [nan, nan, nan]
Length: 454, dtype: object

In [None]:
1,0,1

In [113]:
corrected_equivacards_events[corrected_activity_events_df.user_id == '4492'][['event_name', 'corrected_activity', 'game_end', 'game_start', 'user_launch_index', 'user_game_index', 'match_type']].to_csv('4492.csv')

# Calc number cards

In [114]:
example_df = corrected_equivacards_events[corrected_activity_events_df.user_id.isin(
    [
        '1336'
#         , '4492'
    ])].copy()

In [115]:
corrected_equivacards_events.columns

Index(['_id', 'timestamp', 'user_id', 'browser_session_id', 'product',
       'activity', 'bucket', 'event_name', 'best_play_length', 'best_play',
       'card', 'value', 'match_color', 'match_value', 'match_algebraic',
       'whose_turn', 'board', 'p1_hand', 'p2_ncards', 'n_cards_played',
       'changed_x_val', 'changed_pile_val', 'game_length', 'server_timestamp',
       'deck_version', 'game_name', 'unix_timestamp_combined',
       'timestamp_combined', 'best_play_turn_0', 'best_play_turn_1',
       'best_play_turn_2', 'best_play_turn_3', 'best_play_turn_4',
       'best_play_turn_5', 'best_play_turn_6', 'best_play_turn_7',
       'best_play_turn_8', 'best_play_turn_9', 'best_play_turn_10',
       'best_play_turn_0_label', 'best_play_turn_0_color',
       'best_play_turn_0_value', 'board_left_card', 'board_right_card',
       'p1_hand_card_0', 'p1_hand_card_1', 'p1_hand_card_2', 'p1_hand_card_3',
       'p1_hand_card_4', 'p1_hand_card_5', 'p1_hand_card_6', 'p1_hand_card_7',
      

In [182]:
columns = ['user_game_index', 'user_turn_start_index', 
           'event_name', 'match_color', 'match_value', 
           'match_algebraic', 'best_play_length', 
           'p1_hand_size', 'p1_hand', 'best_play' ]
example_df[(example_df.user_id == '1336') & (example_df.user_game_index == 7)][columns].to_csv('example_events.csv')

In [117]:
example_df[['user_launch_index', 'user_game_index', 'turn_id', 'event_name','user_id', 'board_right_card', 'board_left_card','p1_hand', 'card', 'value', 'match_color','match_value', 'match_algebraic']].to_csv('example_events.csv')

In [179]:
example_df[(example_df.user_id == '1336') & (example_df.user_game_index == 7)].to_csv('example_events.csv')

In [119]:
example_df.groupby(['user_id', 'user_game_index', 'turn_id', 'user_turn', 'event_name']).event_name.count().to_csv('example_by_turn.csv')

In [120]:
example_df.groupby(['user_id', 'user_game_index', 'event_name']).event_name.count().to_csv('example_by_game.csv')

## Turn Metrics

In [121]:
example_df[(example_df.user_id == '1336') & (example_df.user_game_index == 5) & (example_df.user_turn_start_index == 1)][['user_turn_start_time', 'user_action_time']]

Unnamed: 0,user_turn_start_time,user_action_time
82,2022-08-16 20:22:55.091,NaT
83,NaT,NaT
84,NaT,2022-08-16 20:22:58.576
85,NaT,NaT
86,NaT,2022-08-16 20:22:59.561
87,NaT,NaT
88,NaT,2022-08-16 20:23:01.144
89,NaT,NaT
90,NaT,2022-08-16 20:23:02.376
91,NaT,NaT


In [142]:
# 'num_of_cards_played_in_turn'
num_of_cards_played__df = example_df[example_df.event_name=='user_played_card'].groupby(['user_id', 'user_game_index', 'user_turn_start_index']).card.count().reset_index()
num_of_cards_played__df.head()

Unnamed: 0,user_id,user_game_index,user_turn_start_index,card
0,1336,2,1,2
1,1336,2,2,1
2,1336,4,2,2
3,1336,4,3,2
4,1336,4,4,1


In [124]:
# example_df['category_match_switches'] = 

example_df.groupby(['user_id', 'user_game_index', 'user_turn_start_index']).match_type_change.sum().reset_index().astype(int)

Unnamed: 0,user_id,user_game_index,user_turn_start_index,match_type_change
0,1336,1,0,0
1,1336,1,1,0
2,1336,2,0,0
3,1336,2,1,0
4,1336,2,2,0
5,1336,2,3,0
6,1336,3,0,0
7,1336,3,1,0
8,1336,4,0,0
9,1336,4,1,0


In [125]:
example_df

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,user_game_index,user_turn_start_index,user_turn_end_index,turn_id,match_type,last_match_type,match_type_change,comp_turn,user_turn,corrected_activity
0,62ed84e87c108f0fa8a3f6ae,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,...,1,0,0,0-0,,,,True,False,EQUIVACARDS
1,62ed84ef7c108f0fa8a3f6af,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_turn,3.0,"[{'label': 'red,9-x', 'color': 'red', 'value':...",...,1,1,0,1-0,,,,False,True,EQUIVACARDS
2,62ed84ef843d8e07c20f6c32,1.659733e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,deal_ended,,,...,1,1,0,1-0,,,,False,True,EQUIVACARDS
3,62fbf2455c4e1a2091565a2b,1.660679e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,launched_equivacards,,,...,2,0,0,0-0,,,,True,False,EQUIVACARDS
4,62fbf24c289aed6bd15ac546,1.660679e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_turn,6.0,"[{'label': 'white,x=3', 'color': 'white', 'val...",...,2,1,0,1-0,,,,False,True,EQUIVACARDS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
428,62fbff3d5c4e1a2091565ab5,1.660682e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,game_state_changed,,,...,9,1,1,1-1,,,,True,False,EQUIVACARDS
429,62fbff40289aed6bd15ac5cf,1.660682e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,game_state_changed,,,...,9,1,1,1-1,,,,True,False,EQUIVACARDS
430,62fbff42e764193979eeae75,1.660682e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,game_state_changed,,,...,9,1,1,1-1,,,,True,False,EQUIVACARDS
431,62fbff45e764193979eeae76,1.660682e+09,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,game_state_changed,,,...,9,1,1,1-1,,,,True,False,EQUIVACARDS


In [126]:
example_df.groupby(['user_id', 'user_game_index', 'user_turn_start_index']).match_type.first()

user_id  user_game_index  user_turn_start_index
1336     1                0                         None
                          1                         None
         2                0                         None
                          1                        color
                          2                        color
                          3                         None
         3                0                         None
                          1                         None
         4                0                         None
                          1                         None
                          2                        value
                          3                        value
                          4                         None
                          5                        color
         5                0                         None
                          1                        color
                          2             

In [127]:
example_df[(example_df.user_id == '1336') & (example_df.user_game_index == 2) & (example_df.user_turn_start_index == 1)][['user_turn_start_time', 'user_action_time']]

Unnamed: 0,user_turn_start_time,user_action_time
4,2022-08-16 19:38:52.663,NaT
5,NaT,NaT
6,NaT,2022-08-16 19:39:01.933
7,NaT,NaT
8,NaT,2022-08-16 19:39:08.853
9,NaT,NaT
10,NaT,2022-08-16 19:39:17.180
11,NaT,NaT
12,NaT,NaT
13,NaT,NaT


In [128]:
example_df[(example_df.user_id == '1336') & (example_df.user_game_index == 2) & (example_df.user_turn_start_index == 1)][['user_id', 'user_turn_start_time', 'user_action_time']].groupby(['user_id']).apply(lambda x: (x.user_action_time.min() - x.user_turn_start_time.min()).total_seconds())

user_id
1336    9.27
dtype: float64

In [129]:
(pd.Timestamp('2022-08-16 19:39:01.933') - pd.Timestamp('2022-08-16 19:38:52.663')).total_seconds()

9.27

In [130]:
# example_df['time_to_first_move_seconds'] = 
example_df.groupby(['user_id', 'user_game_index', 'user_turn_start_index']).apply(lambda x: (x.user_action_time.min() - x.user_turn_start_time.min()).total_seconds())

user_id  user_game_index  user_turn_start_index
1336     1                0                            NaN
                          1                            NaN
         2                0                            NaN
                          1                          9.270
                          2                          6.912
                          3                            NaN
         3                0                            NaN
                          1                            NaN
         4                0                            NaN
                          1                        174.426
                          2                          6.541
                          3                         24.233
                          4                          8.316
                          5                          4.171
         5                0                            NaN
                          1                          3.485
        

In [131]:
# example_df['time_to_first_move_seconds'] = 
example_df.groupby(['user_id', 'user_game_index', 'user_turn_start_index']).apply(lambda x: (x.user_action_time.min() - x.user_turn_start_time.min()).microseconds)

user_id  user_game_index  user_turn_start_index
1336     1                0                             NaN
                          1                             NaN
         2                0                             NaN
                          1                        270000.0
                          2                        912000.0
                          3                             NaN
         3                0                             NaN
                          1                             NaN
         4                0                             NaN
                          1                        426000.0
                          2                        541000.0
                          3                        233000.0
                          4                        316000.0
                          5                        171000.0
         5                0                             NaN
                          1                        4

In [132]:
# example_df['turn_time_seconds'] = 
example_df.groupby(['user_id', 'user_game_index', 'user_turn_start_index']).apply(lambda x: (x.user_turn_end_time.max() - x.user_turn_start_time.min()).microseconds/100000)

user_id  user_game_index  user_turn_start_index
1336     1                0                         NaN
                          1                         NaN
         2                0                         NaN
                          1                        5.17
                          2                        0.98
                          3                         NaN
         3                0                         NaN
                          1                         NaN
         4                0                         NaN
                          1                        4.26
                          2                        2.01
                          3                        9.49
                          4                        8.44
                          5                        2.68
         5                0                         NaN
                          1                        2.85
                          2                        3.71


In [133]:
# example_df['turn_time_seconds'] = 


In [134]:
# example_df['num_of_cards_played'] = 
example_df.groupby(['user_id', 'user_game_index', 'user_turn_start_index']).apply(lambda x: x.user_turn_start_time.min() )

user_id  user_game_index  user_turn_start_index
1336     1                0                                           NaT
                          1                       2022-08-05 21:00:31.802
         2                0                                           NaT
                          1                       2022-08-16 19:38:52.663
                          2                       2022-08-16 19:39:24.461
                          3                       2022-08-16 19:39:50.015
         3                0                                           NaT
                          1                       2022-08-16 19:40:05.669
         4                0                                           NaT
                          1                       2022-08-16 20:17:15.966
                          2                       2022-08-16 20:20:28.584
                          3                       2022-08-16 20:21:14.411
                          4                       2022-08-16 20:

In [173]:
played_df = example_df[example_df.event_name=='user_played_card'].copy()

In [174]:
played_df

Unnamed: 0,_id,timestamp,user_id,browser_session_id,product,activity,bucket,event_name,best_play_length,best_play,...,user_game_index,user_turn_start_index,user_turn_end_index,turn_id,match_type,last_match_type,match_type_change,comp_turn,user_turn,corrected_activity
6,62fbf256e764193979eeaddf,1660679000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,2,1,0,1-0,color,,,False,True,EQUIVACARDS
8,62fbf25de764193979eeade0,1660679000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,2,1,0,1-0,color,color,False,False,True,EQUIVACARDS
16,62fbf2735c4e1a2091565a2e,1660679000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,2,2,1,2-1,color,,,False,True,EQUIVACARDS
46,62fbfc12e764193979eeadef,1660681000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,4,2,1,2-1,value,,,False,True,EQUIVACARDS
48,62fbfc18e764193979eeadf0,1660681000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,4,2,1,2-1,value,value,False,False,True,EQUIVACARDS
56,62fbfc525c4e1a2091565a3c,1660681000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,4,3,2,3-2,value,,,False,True,EQUIVACARDS
58,62fbfc585c4e1a2091565a3d,1660681000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,4,3,2,3-2,value,value,False,False,True,EQUIVACARDS
64,62fbfc67289aed6bd15ac55a,1660681000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,4,4,3,4-3,,,,False,True,EQUIVACARDS
71,62fbfc715c4e1a2091565a42,1660681000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,4,5,4,5-4,color,,,False,True,EQUIVACARDS
73,62fbfc72289aed6bd15ac55c,1660681000.0,1336,Not provided,SKYE,EQUIVACARDS,2022-08-eqc-study,user_played_card,,,...,4,5,4,5-4,color,color,False,False,True,EQUIVACARDS


In [148]:
match_types = ['color', 'value', 'algebraic']
for match_type in match_types:
    example_df[f'previous_match_{match_type}'] = game_temp_df[game[f'match_{match_type}'].shift(1)

In [167]:
def strict_switch(row):
    if row['match_color'] == np.nan:
        return np.nan
    is_strict = np.nan
    match_types = ['color', 'value', 'algebraic']
    is_strict = True
    for match_type in match_types:
        if row[f'match_{match_type}']:
            if row[f'match_{match_type}'] == row[f'previous_match_{match_type}']:
                is_strict = False
    return is_strict

In [168]:
game_temp_df['strict_switch'] = game_temp_df.apply(strict_switch, axis=1)

In [169]:
game_temp_df[['event_name','match_color', 'match_value', 'match_algebraic', 'previous_match_color', 'previous_match_value', 'previous_match_algebraic','strict_switch']]

Unnamed: 0,event_name,match_color,match_value,match_algebraic,previous_match_color,previous_match_value,previous_match_algebraic,strict_switch
0,launched_equivacards,,,,,,,True
1,user_turn,,,,,,,True
2,deal_ended,,,,,,,True
3,launched_equivacards,,,,,,,True
5,user_turn,,,,,,,True
...,...,...,...,...,...,...,...,...
14429,user_played_card,False,True,False,,,,True
14430,game_state_changed,,,,,,,True
14431,user_played_card,True,False,False,False,True,False,True
14432,game_state_changed,,,,,,,True


In [145]:
def ambiguous_switch(row):
    match_str = ""
    match_types = ['color', 'value', 'algebraic']
    for match_type in match_types:
        if row[f'match_{match_type}'] is True:
            match_str += match_type
    return match_str

In [145]:
def strict_repeat(row):
    match_str = ""
    match_types = ['color', 'value', 'algebraic']
    for match_type in match_types:
        if row[f'match_{match_type}'] is True:
            match_str += match_type
    return match_str

In [90]:
game_temp_df['match_type_change'] = game_temp_df.apply(lambda x: x.match_type != x.last_match_type if type(x.last_match_type) == str else None, axis=1)

In [135]:
example_df.user_turn_end_time.max() 
# - example_df.user_turn_end_time.min()

Timestamp('2022-08-16 20:34:13.679000')

In [136]:
example_df.user_turn_end_time.min()

Timestamp('2022-08-16 19:39:17.180000')

In [137]:
example_df.user_turn_end_time.max()

Timestamp('2022-08-16 20:34:13.679000')

In [138]:
# example_df['num_of_cards_played'] = 
example_df.groupby(['user_id', 'user_game_index', 'turn_id', 'user_turn', 'event_name']).card.count()

user_id  user_game_index  turn_id  user_turn  event_name          
1336     1                0-0      False      launched_equivacards    0
                          1-0      True       deal_ended              0
                                              user_turn               0
         2                0-0      False      launched_equivacards    0
                          1-0      True       deal_ended              0
                                                                     ..
         9                1-0      True       user_played_card        5
                                              user_turn               0
                          1-1      False      game_state_changed      0
                                              user_drew_card          0
                          1-2      True       user_lost               0
Name: card, Length: 210, dtype: int64

In [184]:
test_1337_df = pd.read_csv('./data/1336_7_test_example.csv')
test_1337_df

Unnamed: 0,user_id,user_game_index,user_turn_start_index,time_to_first_move_seconds,num_of_cards_played_in_turn,strict_switch,ambiguous_switch,strict_repeat,first_match_in_turn,x_variable_switches,num_cards_in_hand_at_start_of_turn,max_possible_cards_playable,num_play_not_allowed,total_turn_time
0,1336,7,1,5,3,0,0,0,color,2,7,1,6,30
1,1336,7,2,2,0,0,0,0,,0,5,0,6,15
2,1336,7,3,1,5,3,0,1,color,0,6,6,0,25
3,1336,7,4,2,0,0,0,0,,0,2,0,2,5


In [210]:
def test_compare_column(df, answer_df, columns):
    assert df.shape[1]==answer_df.shape[1], 'unequal number of turns'
    for column in columns:
        assert list(df[column].values) == list(answer_df[column].values), f"column:{column} does not match"

In [211]:
compare_columns = test_1337_df.columns
test_compare_column(test_1337_df, test_1337_df, compare_columns)

# Calc options

In [None]:
calc_df = example_df[
    example_df.event_name=='game_state_changed'
][[
    'user_launch_index', 'user_game_index','turn_id', 
    'user_turn', 'event_name','user_id', 'board_right_card', 'board_left_card', 'p1_hand_card_0',
    'p1_hand_card_1', 'p1_hand_card_2', 'p1_hand_card_3', 'p1_hand_card_4', 
    'p1_hand_card_5','p1_hand_card_6', 'p1_hand_card_7', 'p1_hand_card_8', 'p1_hand_card_9',
       'p1_hand_card_10', 'card', 'value', 'match_color','match_value', 'match_algebraic']].copy()

In [None]:
p1_card_columns = [column for column in calc_df.columns if 'p1_hand_card' in column]
p1_card_columns

In [None]:
for p1_column in p1_card_columns:
    

In [None]:
calc_df.to_csv('example_events.csv')

In [None]:
def eval_card(x,expr):
    if expr=='1':
        return(1)
    elif expr=='2':
        return(2)
    elif expr=='3':
        return(3)
    elif expr=='4':
        return(4)
    elif expr=='5':
        return(5)
    elif expr=='6':
        return(6)
    elif expr=='7':
        return(7)
    elif expr=='8':
        return(8)
    elif expr=='x':
        return(x)
    elif expr=='x+x':
        return(x+x)
    elif expr=='x+1':
        return(x+1)
    elif expr=='x+2':
        return(x+2)
    elif expr=='9-x':
        return(9-x)
    elif expr=='2x':
        return(2*x)
    elif '=' in expr:
        return False
    print(f'unexpected card expression {expr}')

In [None]:
calc_df['board.x_val'] = calc_df.apply(lambda x: int(x.board_left_card[-1:]), axis=1)

In [None]:
calc_df['board.color'] = calc_df.apply(lambda x: (x.board_right_card.split(".")[0]), axis=1)

In [None]:
calc_df['board.right_val'] = calc_df.apply(lambda x: (x.board_right_card.split(".")[1]), axis=1)

In [None]:
calc_df['board.right_val'] = calc_df.apply(lambda x: (x.board_right_card.split(".")[1]), axis=1)

In [None]:
calc_df['aval_value'] = calc_df.apply(lambda x: eval_card(int(x['board.x_val']), x['board.right_val']), axis =1)

In [None]:
for column in p1_card_columns:
    calc_df[f'{column}_can_match_color'] = calc_df.apply(lambda x: x['board.color'] in str(x[column]), axis =1)
    calc_df[f'{column}_can_match_value'] = calc_df.apply(lambda x: x['board.right_val'] in str(x[column]), axis =1)
    

In [None]:
for column in p1_card_columns:
    calc_df[f'{column}_eval_value'] = calc_df.apply(lambda x: eval_card(int(x['board.x_val']), x[column].split(".")[1]) if type(x[column])==str else None, axis =1)
    

In [None]:
for column in p1_card_columns:
    calc_df[f'{column}_can_match_algebraic'] = calc_df.apply(
        lambda x: str(x[f'{column}_eval_value']) == str(x['aval_value'])  if type(x[column])==str else None, axis =1)
    

In [None]:
can_match_columns={}
for match_type in ['algebraic', 'color', 'value']:
    can_match_columns[match_type] = [ column+ f'_can_match_{match_type}' for column in p1_card_columns]
    calc_df[f'num_can_match_{match_type}'] = calc_df[can_match_columns[match_type]].sum(axis=1)

In [None]:
pd.merge(example_df, calc_df, how='outer').to_csv('./example_events_with_options.csv')

In [None]:
####

## Review of game time and distribution of events per game 

In [None]:
from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap, scale_x_date, geom_line,facet_grid, theme, element_text, labs, element_blank, ggtitle, geom_bar

In [None]:
(ggplot(
   corrected_equivacards_events, aes('timestamp_combined', 'factor(user_id)', color= 'factor(user_id)'))
+ geom_point(show_legend=False)
 + geom_line(show_legend=False)
  + labs(x= "date", y="User Id")
 + ggtitle("Game by user_id vs date")
 + theme(figure_size=(6, 4), axis_text_x=element_text(rotation=90, hjust=1)) 
)

In [None]:
(ggplot(
   corrected_equivacards_events, aes('user_game_index', fill= 'factor(event_name)'))
+ geom_bar(stat='count', position= 'stack')
 + facet_grid('user_id ~', scales ='free', )
 + theme(figure_size=(4, 10), axis_text_x=element_text(rotation=90, hjust=1), strip_text_y = element_text(angle = 0)) 
 + labs(x= "User Game Index", y="Event count")
 + ggtitle("Game events by user_id")
)

## Cursory comparison of best move 0 to taken move 0

In [None]:
corrected_equivacards_events['previous_best_move_0'] = corrected_equivacards_events.groupby('user_id').best_play_turn_0_label.shift(2).apply(lambda x: x.replace(',', '.') if type(x)==str else None ) 
corrected_equivacards_events['made_best_move_0'] = corrected_equivacards_events.apply(lambda x: x.previous_best_move_0==x.card, axis =1) 

In [None]:
corrected_equivacards_events[['event_name','card', 'best_play_turn_0_label', 'previous_best_move_0', 'made_best_move_0']].head(15)

In [None]:
corrected_equivacards_events[corrected_equivacards_events.event_name=='user_turn'].groupby(['user_id', 'made_best_move_0'])['$oid'].count()

In [None]:
corrected_equivacards_events[corrected_equivacards_events.event_name.isin(['user_won', 'user_lost'])].groupby(['user_id', 'event_name'])['$oid'].count()

In [None]:
corrected_equivacards_events[corrected_equivacards_events.event_name.isin(['user_won', 'user_lost'])].groupby(['event_name'])['$oid'].count()

In [None]:
corrected_equivacards_events[corrected_equivacards_events.event_name.isin(['user_turn'])].groupby(['user_game_index'])['$oid'].count()

In [None]:
corrected_equivacards_events.groupby('deck_version').count()