In [1]:
import os
import re
import json

import numpy as np
import pandas as pd
import scipy as sp
from scipy import stats

import random
random.seed(28)
np.random.seed(28)

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline


# Any results you write to the current directory are saved as output.
from time import time
from tqdm import tqdm_notebook as tqdm

pd.set_option('display.max_columns', 1000)
pd.set_option('max_rows', 500)
pd.options.display.precision = 15
np.random.seed(42)

import warnings
warnings.filterwarnings("ignore")

In [2]:
def read_data():
    if os.path.exists('/kaggle/input/data-science-bowl-2019/'):
        data_dir_path = '/kaggle/input/data-science-bowl-2019/'
    else:
        data_dir_path = '../data/raw/'
        
    
    print('Reading train.csv file....')
    train = pd.read_csv(data_dir_path+'train.csv')
    print('Training.csv file have {} rows and {} columns'.format(train.shape[0], train.shape[1]))

    print('Reading test.csv file....')
    test = pd.read_csv(data_dir_path+'test.csv')
    print('Test.csv file have {} rows and {} columns'.format(test.shape[0], test.shape[1]))

    print('Reading train_labels.csv file....')
    train_labels = pd.read_csv(data_dir_path+'train_labels.csv')
    print('Train_labels.csv file have {} rows and {} columns'.format(train_labels.shape[0], train_labels.shape[1]))

    print('Reading specs.csv file....')
    specs = pd.read_csv(data_dir_path+'specs.csv')
    print('Specs.csv file have {} rows and {} columns'.format(specs.shape[0], specs.shape[1]))

    print('Reading sample_submission.csv file....')
    sample_submission = pd.read_csv(data_dir_path+'sample_submission.csv')
    print('Sample_submission.csv file have {} rows and {} columns'.format(sample_submission.shape[0], sample_submission.shape[1]))
    return train, test, train_labels, specs, sample_submission

In [3]:
raw_train, raw_test, train_labels, specs, sample_submission = read_data()

Reading train.csv file....
Training.csv file have 11341042 rows and 11 columns
Reading test.csv file....
Test.csv file have 1156414 rows and 11 columns
Reading train_labels.csv file....
Train_labels.csv file have 17690 rows and 7 columns
Reading specs.csv file....
Specs.csv file have 386 rows and 3 columns
Reading sample_submission.csv file....
Sample_submission.csv file have 1000 rows and 2 columns


In [4]:
ins_id = '0006a69f'
temp_df = raw_train[raw_train['event_code']==2030]

temp_df[temp_df['type']=="Game"]['game_session'].unique().tolist()

['f11eb823348bfa23',
 '07bacda7f9437b38',
 '356348f67a1dfb26',
 '0336db4dee65ad4b',
 '3422611c17545edd',
 '80d34a30c2998653',
 'ebd994d6a6c361b6',
 '1623f262016185e4',
 'cfc0fcbfcace6a52',
 'c884c7fe56b388f1',
 'faed1805baea37b0',
 '8043825259dc7ddd',
 '744663d06c3d0b65',
 '6693061f235f04a7',
 'b478a149cebc4934',
 'fa7e513faec3d0d8',
 '60a7cbbd3cdb859f',
 '203959bf44d06632',
 'e6a6a262a8243ff7',
 '1c691f67ef07d047',
 '2db0bef8a38a7dcb',
 'fa6560f04b64714c',
 'd78d2fa8766987f0',
 'c745a40b98d9efa1',
 'fb13c8a7665bd25c',
 '4da1abfca39656d4',
 '6c2079786aaa2003',
 'ab5231858a5be1a5',
 '051fe5b27fca1e42',
 '8ba64586223f05ef',
 'b2705e46e281e578',
 '072a89143ffc75e2',
 'bac45a29b0458e58',
 'ec932bb0eed268f4',
 '9fbe407b1f1fc194',
 '7073b1c632c48f1c',
 '356cbc0beea48039',
 '9613bf18ec6da3e3',
 '8d6a13bd3f7f2144',
 'eee67f81b133d003',
 '4ce95ea2356a0111',
 '4487bf5a9bcd260a',
 'b5cf00f75d5480ea',
 '34a28a2552afaf54',
 '8a41b4a2f1c612da',
 '5d31279cb9ffbeb3',
 'f09c24a661fe179d',
 'e961588a9db

In [5]:
# for i, session in temp_df.groupby('game_session'):
#     if session['type'].unique()[0]=='Game':
#         num_correct = session['event_data'].str.contains('"correct":true').astype(int).sum()
#         num_incorrect = session['event_data'].str.contains('"correct":false').astype(int).sum()
#         game_accuracy = num_correct/(num_correct+num_incorrect) if (num_correct+num_incorrect)!=0 else -1
#         print(i)
#         print('event_count', session['event_count'].iloc[-1])
#         print(num_correct, num_incorrect, game_accuracy)
#         print('1セッションにおける平均セッション長', session['game_time'].diff().mean())
#         print('1セッションにおけるセッション長の標準偏差', session['game_time'].diff().std())
#         print()

In [6]:
session_1 = '4bca13a9e469b70a'
session1_df = temp_df[temp_df['game_session']==session_1]

# session1_df

In [7]:
session1_df[session1_df['event_data'].str.contains('round')]['event_data'].str.extract('."round":(\w).', expand=True).max()

0    2.0
dtype: float64

In [None]:
for session_1 in temp_df[temp_df['type']=="Game"]['game_session'].unique().tolist()[:10]:
    session1_df = temp_df[temp_df['game_session']==session_1]
    print(session1_df[session1_df['event_data'].str.contains('round')]['event_data'].str.extract('."misses":(\w).').sum()[0])

In [47]:
for i, e in enumerate(session1_df['event_data'].tolist()):
    print(f'{i}: {e}\n')

0: {"duration":33119,"misses":0,"round":1,"event_count":19,"game_time":41038,"event_code":2030}

1: {"duration":24918,"misses":0,"round":2,"event_count":31,"game_time":65956,"event_code":2030}



In [42]:
'''
0: {"version":"1.0","round":0,"event_count":1,"game_time":0,"event_code":2000}

1: {"coordinates":{"x":461,"y":631,"stage_width":1015,"stage_height":762},"round":0,"event_count":2,"game_time":1782,"event_code":4010}

2: {"houses":[4,3,2],"dinosaurs":[{"name":"stacey","size":4},{"name":"buddy","size":3},{"name":"petey","size":2}],"round":1,"event_count":3,"game_time":3415,"event_code":2020}

3: {"media_type":"animation","total_duration":4100,"description":"These dinosaurs want your help getting sorted into the right houses.","identifier":"intro_1331REVA","round":1,"event_count":4,"game_time":3415,"event_code":3010}

4: {"coordinates":{"x":261,"y":634,"stage_width":1015,"stage_height":762},"round":1,"event_count":5,"game_time":4565,"event_code":4070}

5: {"coordinates":{"x":21,"y":571,"stage_width":1015,"stage_height":762},"round":1,"event_count":6,"game_time":6674,"event_code":4070}

6: {"coordinates":{"x":93,"y":626,"stage_width":1015,"stage_height":762},"round":1,"event_count":7,"game_time":6974,"event_code":4070}

7: {"media_type":"animation","duration":4159,"description":"These dinosaurs want your help getting sorted into the right houses.","identifier":"intro_1331REVA","round":1,"event_count":8,"game_time":7574,"event_code":3110}

8: {"media_type":"animation","total_duration":4000,"description":"Drag them to the correct house from tallest to shortest!","identifier":"dragDesc_3102","round":1,"event_count":9,"game_time":7574,"event_code":3010}

9: {"media_type":"animation","duration":4017,"description":"Drag them to the correct house from tallest to shortest!","identifier":"dragDesc_3102","round":1,"event_count":10,"game_time":11591,"event_code":3110}

10: {"dinosaur":"stacey","size":4,"dinosaurs":[{"name":"none","size":0},{"name":"buddy","size":3},{"name":"petey","size":2}],"coordinates":{"x":326,"y":731,"stage_width":1015,"stage_height":762},"round":1,"event_count":11,"game_time":12891,"event_code":4030}

11: {"dinosaur":"stacey","house":{"size":4,"position":1},"size":4,"correct":true,"dinosaurs":[{"name":"none","size":0},{"name":"buddy","size":3},{"name":"petey","size":2}],"coordinates":{"x":440,"y":414,"stage_width":1015,"stage_height":762},"duration":799,"dinosaurs_placed":[{"size":4,"name":"stacey"},{"size":0,"name":"none"},{"size":0,"name":"none"}],"round":1,"event_count":12,"game_time":13691,"event_code":4020}

12: {"coordinates":{"x":421,"y":386,"stage_width":1015,"stage_height":762},"round":1,"event_count":13,"game_time":14524,"event_code":4070}

13: {"coordinates":{"x":420,"y":364,"stage_width":1015,"stage_height":762},"round":1,"event_count":14,"game_time":15208,"event_code":4070}

14: {"coordinates":{"x":583,"y":588,"stage_width":1015,"stage_height":762},"round":1,"event_count":15,"game_time":15824,"event_code":4070}

15: {"dinosaur":"buddy","size":3,"dinosaurs":[{"name":"none","size":0},{"name":"none","size":0},{"name":"petey","size":2}],"coordinates":{"x":581,"y":731,"stage_width":1015,"stage_height":762},"round":1,"event_count":16,"game_time":16141,"event_code":4030}

16: {"dinosaur":"buddy","house":{"size":2,"position":3},"size":3,"correct":false,"dinosaurs":[{"name":"none","size":0},{"name":"none","size":0},{"name":"petey","size":2}],"coordinates":{"x":838,"y":418,"stage_width":1015,"stage_height":762},"duration":800,"dinosaurs_placed":[{"size":4,"name":"stacey"},{"size":0,"name":"none"},{"size":3,"name":"buddy"}],"round":1,"event_count":17,"game_time":16941,"event_code":4020}

17: {"coordinates":{"x":848,"y":371,"stage_width":1015,"stage_height":762},"round":1,"event_count":18,"game_time":18591,"event_code":4070}

18: {"coordinates":{"x":855,"y":367,"stage_width":1015,"stage_height":762},"round":1,"event_count":19,"game_time":19183,"event_code":4070}

19: {"dinosaurs":[{"name":"none","size":0},{"name":"none","size":0},{"name":"petey","size":2}],"houses":[{"size":4,"dinosaur":"stacey"},{"size":3,"dinosaur":"none"},{"size":2,"dinosaur":"buddy"}],"dinosaurs_placed":[{"size":4,"name":"stacey"},{"size":0,"name":"none"},{"size":3,"name":"buddy"}],"round":1,"event_count":20,"game_time":21347,"event_code":4090}

20: {"media_type":"animation","total_duration":4100,"description":"These dinosaurs want your help getting sorted into the right houses.","identifier":"intro_1331REVA","round":1,"event_count":21,"game_time":21347,"event_code":3010}

21: {"coordinates":{"x":846,"y":375,"stage_width":1015,"stage_height":762},"round":1,"event_count":22,"game_time":23367,"event_code":4070}

22: {"media_type":"animation","duration":4095,"description":"These dinosaurs want your help getting sorted into the right houses.","identifier":"intro_1331REVA","round":1,"event_count":23,"game_time":25442,"event_code":3110}

23: {"media_type":"animation","total_duration":4000,"description":"Drag them to the correct house from tallest to shortest!","identifier":"dragDesc_3102","round":1,"event_count":24,"game_time":25442,"event_code":3010}

24: {"dinosaur":"petey","size":2,"dinosaurs":[{"name":"none","size":0},{"name":"none","size":0},{"name":"none","size":0}],"coordinates":{"x":837,"y":731,"stage_width":1015,"stage_height":762},"round":1,"event_count":25,"game_time":25808,"event_code":4030}

25: {"dinosaur":"petey","house":{"size":3,"position":2},"size":2,"correct":false,"dinosaurs":[{"name":"none","size":0},{"name":"none","size":0},{"name":"none","size":0}],"coordinates":{"x":657,"y":381,"stage_width":1015,"stage_height":762},"duration":1250,"dinosaurs_placed":[{"size":4,"name":"stacey"},{"size":2,"name":"petey"},{"size":3,"name":"buddy"}],"round":1,"event_count":26,"game_time":27058,"event_code":4020}

26: {"media_type":"animation","duration":2100,"description":"Drag them to the correct house from tallest to shortest!","identifier":"dragDesc_3102","round":1,"event_count":27,"game_time":27542,"event_code":3110}

27: {"media_type":"animation","total_duration":7133,"description":"Hmm, you're getting close but the dinosaurs aren't all in the right houses yet. Try again!","identifier":"tryAgain0_1333","round":1,"event_count":28,"game_time":27542,"event_code":3020}

28: {"houses":[4,3,2],"dinosaurs":[{"name":"none","size":0},{"name":"buddy","size":3},{"name":"petey","size":2}],"dinosaurs_placed":[{"size":4,"name":"stacey"},{"size":0,"name":"none"},{"size":0,"name":"none"}],"round":1,"event_count":29,"game_time":27542,"event_code":2025}

29: {"coordinates":{"x":825,"y":697,"stage_width":1015,"stage_height":762},"round":1,"event_count":30,"game_time":29917,"event_code":4070}

30: {"coordinates":{"x":828,"y":688,"stage_width":1015,"stage_height":762},"round":1,"event_count":31,"game_time":30967,"event_code":4070}

31: {"coordinates":{"x":833,"y":679,"stage_width":1015,"stage_height":762},"round":1,"event_count":32,"game_time":31717,"event_code":4070}

32: {"coordinates":{"x":409,"y":396,"stage_width":1015,"stage_height":762},"round":1,"event_count":33,"game_time":33034,"event_code":4070}

33: {"coordinates":{"x":2,"y":7,"stage_width":1015,"stage_height":762},"round":1,"event_count":34,"game_time":34584,"event_code":4070}

34: {"media_type":"animation","duration":7158,"description":"Hmm, you're getting close but the dinosaurs aren't all in the right houses yet. Try again!","identifier":"tryAgain0_1333","round":1,"event_count":35,"game_time":34700,"event_code":3120}
'''
print('')




In [9]:
session_2 = '901acc108f55a5a1'
session2_df = temp_df[temp_df['game_session']==session_2]

In [10]:
session2_df

Unnamed: 0,event_id,game_session,timestamp,event_data,installation_id,event_count,event_code,game_time,title,type,world
2185,3bfd1a65,901acc108f55a5a1,2019-08-06T05:22:01.344Z,"{""version"":""1.0"",""event_count"":1,""game_time"":0...",0006a69f,1,2000,0,Mushroom Sorter (Assessment),Assessment,TREETOPCITY
2186,db02c830,901acc108f55a5a1,2019-08-06T05:22:01.400Z,"{""event_count"":2,""game_time"":37,""event_code"":2...",0006a69f,2,2025,37,Mushroom Sorter (Assessment),Assessment,TREETOPCITY
2187,a1e4395d,901acc108f55a5a1,2019-08-06T05:22:01.403Z,"{""description"":""Pull three mushrooms out of th...",0006a69f,3,3010,37,Mushroom Sorter (Assessment),Assessment,TREETOPCITY
2188,a52b92d5,901acc108f55a5a1,2019-08-06T05:22:05.242Z,"{""description"":""Pull three mushrooms out of th...",0006a69f,4,3110,3901,Mushroom Sorter (Assessment),Assessment,TREETOPCITY
2189,a1e4395d,901acc108f55a5a1,2019-08-06T05:22:05.244Z,"{""description"":""To pick a mushroom, pull it ou...",0006a69f,5,3010,3901,Mushroom Sorter (Assessment),Assessment,TREETOPCITY
2190,28ed704e,901acc108f55a5a1,2019-08-06T05:22:07.812Z,"{""height"":4,""coordinates"":{""x"":329,""y"":550,""st...",0006a69f,6,4025,6475,Mushroom Sorter (Assessment),Assessment,TREETOPCITY
2191,a52b92d5,901acc108f55a5a1,2019-08-06T05:22:07.812Z,"{""description"":""To pick a mushroom, pull it ou...",0006a69f,7,3110,6475,Mushroom Sorter (Assessment),Assessment,TREETOPCITY
2192,9d29771f,901acc108f55a5a1,2019-08-06T05:22:07.816Z,"{""description"":""That's one!"",""identifier"":""Dot...",0006a69f,8,3021,6475,Mushroom Sorter (Assessment),Assessment,TREETOPCITY
2193,c74f40cd,901acc108f55a5a1,2019-08-06T05:22:08.427Z,"{""description"":""That's one!"",""identifier"":""Dot...",0006a69f,9,3121,7084,Mushroom Sorter (Assessment),Assessment,TREETOPCITY
2194,28ed704e,901acc108f55a5a1,2019-08-06T05:22:09.742Z,"{""height"":1,""coordinates"":{""x"":176,""y"":526,""st...",0006a69f,10,4025,8400,Mushroom Sorter (Assessment),Assessment,TREETOPCITY
