In [1]:
import numpy as np
import pandas as pd
import seaborn as sb
import statsmodels
import json
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os

from fourinarowfunctions import *

# directory where your data is stored
direc = '../data/'

In [2]:
def should_be_equal(name, actual, expected, warnings):
    if expected != actual:
        warnings.append(f"{name} was expected to be {expected} but was {actual}")

def check_event_counts(data,num_games):
    event_count_dict = defaultdict(lambda: 0, dict(zip(*np.unique([e['event_type'] for e in data],return_counts=True))))
    warnings = []
    should_be_equal("Number of game starts", event_count_dict['start game'], num_games, warnings)
    should_be_equal("Number of game endings", event_count_dict['end game'], num_games, warnings)
    should_be_equal("User moves", event_count_dict['user move'], event_count_dict['your turn'], warnings)
    should_be_equal("Opponent moves", event_count_dict['opponent move'], event_count_dict['waiting for opponent'], warnings)
    should_be_equal("Game outcomes", event_count_dict['opponent win'] + event_count_dict['user win'] + event_count_dict['draw'], event_count_dict['start game'], warnings)
    return warnings

In [3]:
#name of your data file
filename = direc + 'trialdata.csv'
data_dict = load_data(filename)
parsed_dict = {}
for username, data in data_dict.items():
    try:
        parsed_dict[username] = get_parsed_data(data)
    except AssertionError as e:
        print(f"Incomplete {username}: {e}")
        continue
usernames = parsed_dict.keys()
print(f"{len(usernames)} users with complete data")

drop test_im:R_2YspUds9g1Pyp6J
drop test_im:R_1LYfkmETpZRD3kn
drop debug:R_3G7Yq6aginiNva2
drop test_im2:R_2YspUds9g1Pyp6J
drop im_full_test:assign_full_test
drop test_im3:R_2YspUds9g1Pyp6J
drop null:R_emsNDLsvWMLPb7X
drop test_im_ff:test_FF_win
drop test_im_edge:test_edge_win
drop twindemo:noas
drop null:R_2qCyb2CeLwcRxH2
drop null:R_25HjvhOMhc79iaa
drop debugYZNI1G:debugDVJFSO
drop test_BO_11:R_1eXnot6X58rnPMv
drop null:R_2zhw77FMGEb36zK
drop debugOCOGDF:debug2TDLTY
drop test_im_ff2:test_FF2_win
drop null:R_2aV9fwk4CR3GjBg
drop debugQWOQ1C:debugOQ8IEY
drop CPTest:R_1QxPuBVlzMbldXa
drop CPTest:R_1FsoM2AkWpLn1Qw
drop im_full_test:ssign_full_test
drop just_a_test_:test
drop debugfull:noas
drop null:R_vou3qrtMy6aHysp
drop debugscreenshot:noas
Loaded 192 participants
Incomplete A108:R_2TnOx7dVPSonpTg: user only finished 27 games
Incomplete A123:R_2viEesh2c9XCBMe: user only finished 27 games
Incomplete A120:R_PONVLZnwDK7wSpr: user only finished 27 games
Incomplete 115:R_241tt6O6tVQs9NR: us

In [4]:
#load other datafiles
age = pd.read_csv(direc + "SubjectAge.csv")
bins = [7, 12, 18, 26]
age['ageBins'] = pd.cut(age['FlooredAge'], bins)

pars = pd.read_csv(direc + "params_with_metrics.csv", index_col=None)
pars['StudyID'] = pars['subject'].str.split(':').str[0]
pars = pd.merge(age, pars, on='StudyID', how='outer')
# pars = pars[pars['iteration'].notna()]

df_rt = pd.read_csv(direc + "reactiontimes.csv")
df_rt['StudyID'] = df_rt['User'].str.split(':').str[0]
df_rt = pd.merge(age, df_rt, on='StudyID', how='outer')
# df_rt.to_csv(direc + 'df_rt.csv', index = False) 

In [5]:
# create reaction times csv
with open(direc + "reactiontimes.csv", "w") as outfile:
    outfile.write("User,GameNr,Category,Level,MoveNr,RT,UserMoves,OpponentMoves,Result\n")
    for username in usernames:
        # First go through all the events and record gane results
        game_data = {}
        for event in data_dict[username]:
            if event["event_type"] == "start game":
                user_moves = 0
                opponent_moves = 0
            elif event["event_type"] == "user move":
                user_moves += 1
            elif event["event_type"] == "opponent move":
                opponent_moves += 1
            elif event["event_type"] == "end game":
                # practice rounds will be overwritten because the "real" rounds have the same game_num
                game_data[event["event_info"]["game_num"]] = {
                    "user_moves": user_moves,
                    "opponent_moves": opponent_moves,
                    "outcome": event["event_info"]["result"]
                }
        is_practice = False
        # Go through all the events a second time and write csv lines as we go
        for event in data_dict[username]:
            if event["event_type"] == "start game":
                ei = event["event_info"]
                game_nr = ei["game_num"]
                move_nr = 0
                is_practice = ei["is_practice"]
                game_info = ",".join([str(x)for x in [game_nr, ei["category"], ei["level"]]])
            elif event["event_type"] == "your turn":
                prompt_time = int(event["event_time"])
            elif event["event_type"] == "user move":
                if is_practice:
                    continue
                move_time = int(event["event_time"])
                move_nr += 1
                gd = game_data[game_nr]
                line = ",".join([username, game_info, str(move_nr), str((move_time - prompt_time) / 1000), str(gd["user_moves"]), str(gd["opponent_moves"]), gd["outcome"]])
                outfile.write(line + "\n")

In [6]:
# Create durations csv
with open(direc + "gameDurations.csv", "w") as outfile:
    outfile.write("User,TotalDurationSeconds,MainDurationSeconds,GamesPlayed\n")
    for username in usernames:
        games_played = 0
        startTime = 0
        endTime = 0
        events = data_dict[username]
        for event in events:
            if event["event_type"] == "start game" and event["event_info"]["game_num"] == 0:
                # Practice and main task both have a game 0, but main task is assigned last
                startTime = event["event_time"]
            elif event["event_type"] == "end game":
                # The endtime keeps being overwritten so we end up with the last one
                endTime = event["event_time"]
                games_played = event["event_info"]["game_num"] + 1 # 0-indexed
        line_elements = [username]
        line_elements.append(str((endTime - events[0]["event_time"]) / 1000) if len(events) >= 1 else "-")
        line_elements.append(str((endTime - startTime) / 1000))
        line_elements.append(str(games_played))
        outfile.write(",".join(line_elements) + "\n")

In [None]:
#the second argument in check_data is the number of games that ought to exist for this user
#if this function failes an assertion, something is wrong with your data, including practise trials
#this function will print the count of all the event type in the data
had_warnings = False
for index in range(len(data)):
    d = data[index]
    warnings = check_event_counts(d,37)
    if warnings:
        had_warnings = True
        print(usernames[index] + " " + "; ".join(warnings))
    for bp,wp,m,c,rt in get_parsed_data(d):
        assert(len(bp.split('1'))-len(bp.split('1')) in [0,1])
print("Some things didn't add up!" if had_warnings else "Everything looks ok")

In [None]:
hists = dataFrame['category'].hist(by=dataFrame['subject'], range=[1,20], figsize=(50, 30), xlabelsize=30, ylabelsize=30)
for x in hists.ravel():
    x.title.set_size(32)


In [None]:
plt.figure()
dataFrame['count'] = 1
dataFrame['win'] = (dataFrame['outcome'] == '1').astype(int)
dataFrame['category'] = pd.to_numeric(dataFrame['category'])
dataWinCount = dataFrame.groupby(['subject', 'category'], as_index=False).sum()
dataWinCount['winRate'] = dataWinCount['win']/dataWinCount['count']
dataWinCount = dataWinCount.groupby(['category'], as_index=False).mean()

plt.plot(dataWinCount['category'],dataWinCount['winRate'] )

In [None]:
ax = sb.barplot(x="category", y="winRate", data=dataWinCount, ci = None, color = "#3182bd")
plt.ylabel("Win rate")
plt.xlabel("Category")

In [None]:
dataWinCount.head()

In [None]:
dataFrame['subjectNr'] = dataFrame['subject'].str.split(':').str[0]
age['subjectNr'] = age['StudyID']
df_age_cat = pd.merge(age, dataFrame, on='subjectNr', how='outer')
df_age_cat = df_age_cat[df_age_cat['gameNumber'].notna()]
df_age_winrate = df_age_cat.groupby(['subjectNr'], as_index=False).mean()
df_age_medcat = df_age_cat.groupby(['subjectNr'], as_index=False).median()
df_age_maxcat = df_age_cat.groupby(['subjectNr'], as_index=False).max()

# Get all games numbered 0 or 1 (practice and task)
first_games = df_age_cat[pd.to_numeric(df_age_cat['gameNumber']) < 2]
# First games has 4 games per subject: 2 practice 2 task
counter = range(int(len(first_games) / 4))
# Make a list of every 1st and 2nd game (practice games), so excluce every 3rd and 4th (task games)
indices = [x * 4 for x in counter]
indices.extend([x * 4 + 1 for x in counter])
indices.sort()
# Select all the practice games from the df so we can get a list of their indices
practice_games = first_games.iloc[indices]
# Remove the practice indices from df_age_cat. What's left are only task games.
df_age_cat = df_age_cat.drop(practice_games.index)

df_age_cat

In [None]:
ax = sb.barplot(x="subject", y="win", data=dataFrame, ci = None, color = "#3182bd")
plt.ylabel("Total win rate")
plt.xlabel("Subject")


In [None]:
win_age = sb.pointplot(x="gameNumber", y="category", data=df_age_cat, ci =68)
plt.ylabel("Opponent category", fontsize=20)
plt.xlabel("Game number", fontsize=20)
bins = [7, 12, 18, 26]
df_age_cat['ageBins'] = pd.cut(df_age_cat['FlooredAge'], bins)
df_age_cat.loc[df_age_cat.index.intersection([x * 30 for x in range(20)]),:]

In [None]:
df_age_cat2 = df_age_cat
df_age_cat2["gameNumber"] = pd.to_numeric(df_age_cat2["gameNumber"])
win_age = sb.pointplot(x="gameNumber", y="category", data=df_age_cat2, ci =68, hue = "ageBins", dodge=True,plot_kws=dict(alpha=0.3))
plt.setp(win_age.collections, alpha=.5) #for the markers
plt.setp(win_age.lines, alpha=.5) 
plt.ylabel("Opponent category", fontsize=20)
plt.xlabel("Game number", fontsize=20)
plt.legend(loc='upper left', frameon=False)

In [None]:
plt.figure(figsize=(10,5))
df_age_cat2 = df_age_cat
df_age_cat2["gameNumber"] = pd.to_numeric(df_age_cat2["gameNumber"])
win_age = sb.pointplot(x="gameNumber", y="category", data=df_age_cat2, ci =68, 
                       hue = "ageBins", dodge=True,plot_kws=dict(alpha=1), palette=sb.color_palette("Set2"),  
                       legend=False)
plt.setp(win_age.collections, alpha=1) #for the markers
plt.setp(win_age.lines, alpha=1) 
plt.ylabel("Opponent category", fontsize=20)
plt.xlabel("Game number", fontsize=20)
plt.legend(loc='upper left', frameon=False)

In [None]:
pars_mean = pars.groupby(['StudyID'], as_index=False).mean()
win_age = sb.regplot(x="ExactAge", y="FeatureDropRate", data=pars_mean, ci =68, color = "green")
plt.ylabel("Feature drop rate", fontsize=20)
plt.xlabel("Age", fontsize=20)


In [None]:
win_age = sb.regplot(x="ExactAge", y="category", data=df_age_medcat, ci =68)
plt.ylabel("Median category", fontsize=20)
plt.xlabel("Age", fontsize=20)


In [None]:
win_age = sb.regplot(x="ExactAge", y="category", data=df_age_maxcat, ci = 68, color = "Darkblue")
plt.ylabel("Max category", fontsize=20)
plt.xlabel("Age", fontsize=20)

In [None]:
win_age = sb.regplot(x="ExactAge", y="win", data=df_age_winrate, ci=68, color = 'Purple')
plt.ylabel("Win rate", fontsize=20)
plt.xlabel("Age", fontsize=20)

In [None]:
#plot winrate as function of category
dataFrame['count'] = 1
dataFrame['win'] = (dataFrame['outcome'] == '1').astype(int)
dataFrame['category'] = pd.to_numeric(dataFrame['category'])
dataWinCount = dataFrame.groupby(['subject', 'category'], as_index=False).sum()
dataWinCount['winRate'] = dataWinCount['win']/dataWinCount['count']
dataWinCount = dataWinCount.groupby(['category'], as_index=False).mean()
print(dataWinCount)
dataWinCount['winRate'].plot.line()

In [None]:
inputDir = '/Users/ilima/Google Drive/Projects/Cate Hartley Weiji/4inarow_fitting_results'
with open(inputDir + '/paramsMatrix.csv', 'w') as outfile:
    for subject in range(48):
        for paramnr in range(5):
            outfile.write(str(subject+1)+','+str(paramnr+1) + ',')
            with open(inputDir + '/' + str(subject + 1) + '/' + 'params' + str(paramnr+1) + '.csv')as infile:
                line = infile.readline()
                outfile.write(line)