# Imports

In [None]:
import numpy as np
import os
import math
import pandas as pd
import plotly.graph_objects as go
import plotly.offline as pyo
pyo.init_notebook_mode()
from plotly.subplots import make_subplots
import csv
import json
from scipy.stats import wasserstein_distance
import statistics
from tqdm import tqdm
import shutil
import random
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from sklearn import svm
import matplotlib.pyplot as plt
import re

# Data Loading Functions

In [None]:
def read_stats_data(name, mechanics, folder):
    data = {}
    for mech in mechanics:
        data[mech] = []
    data["EndTurn"] = []
    data["MapNumber"] = []
    file_names = [f for f in os.listdir(folder) if name in f]
    for fn in file_names:
        with open(os.path.join(folder,fn)) as f:
            json_data = json.load(f)
            if "results" in json_data:
                json_data = json_data["results"]
            for playtrace in json_data:
                for mech in mechanics:
                    if mech in playtrace["frequencies"]:
                        data[mech].append(playtrace["frequencies"][mech])
                    else:
                        data[mech].append(0.0)
                data["EndTurn"].append(playtrace["levelReport"]["turnsTaken"])
                if "map_number" in playtrace:
                    data["MapNumber"].append(int(playtrace["map_number"]))
                else:
                    data["MapNumber"].append(int(re.findall("\d+", fn)[0]))
    return pd.DataFrame(data)

def build_users_data(folder, mechanics, users):
    full_data = read_stats_data(users[0], mechanics, folder)
    full_data['id'] = users[0]
    for user in users[1:]:
        user_data = read_stats_data(user, mechanics, folder)
        user_data['id'] = user
        full_data = pd.concat([full_data, user_data])
    full_data.set_index('id', inplace=True)
    return full_data

def read_questionaire(users, classes, scores, folder):
    data = {}
    for key in classes:
        data[key] = []
    data['id'] = []
    for user in users:
        data['id'].append(user)
        with open(os.path.join(folder, user + ".json")) as f:
            json_data = json.load(f)
            if "Q1" in json_data:
                for key in classes:
                    data[key].append(0.0)
                    for q in classes[key]:
                        data[key][-1] += scores[json_data[q]]
                    data[key][-1] /= len(classes[key])
    df = pd.DataFrame(data)
    df.set_index('id', inplace=True)
    return df

# Helper Functions

In [None]:
def get_mechanics(folders):
    uniques = set()
    for folder in folders:
        filenames = [f for f in os.listdir(folder) if ".json" in f]
        for fn in filenames:
            with open(os.path.join(folder,fn)) as f:
                json_data = json.load(f)
                if "results" in json_data:
                    json_data = json_data["results"]
                for playtrace in json_data:
                    for key in playtrace["frequencies"].keys():
                        uniques.add(key)
    return list(uniques)

def get_users(folders):
    uniques = set()
    for folder in folders:
        filenames = [f for f in os.listdir(folder) if ".json" in f]
        for fn in filenames:
            uuid = fn.split(".")[0]
            uniques.add(uuid)
    return list(uniques)

def get_majority_voting(clf, data):
    labels = clf.predict(data).reshape((-1,3))
    result = []
    for row in labels:
        vals,counts= np.unique(row, return_counts=True)
        index = np.argmax(counts)
        result.append(vals[index])
    return np.array(result)

def divide_data_set(input_data, output_data, percentage):
    indeces = list(range(len(input_data)))
    random.shuffle(indeces)
    size = int(percentage * len(indeces))
    train_set, train_label, val_data, val_label = [],[],[],[]
    for i in range(size):
        train_set.append(input_data[indeces[i]])
        train_label.append(output_data[indeces[i]])
    for i in range(size,len(indeces)):
        val_data.append(input_data[indeces[i]]) 
        val_label.append(output_data[indeces[i]])
    return np.array(train_set), np.array(train_label), np.array(val_data), np.array(val_label)

def filter_traces(data, column, value, IsUser=None):
    remove_columns = (data.columns != 'IsUser') & (data.columns != 'Label')
    if column != 'index':
        filter_values = data[column] == value
        if IsUser != None:
            filter_values = filter_values & (data["IsUser"] == IsUser)
        return data.loc[filter_values, remove_columns]
    else:
        new_data = data[data.index == value]
        filter_values = [True] * len(new_data)
        if IsUser != None:
            filter_values = filter_values & (new_data["IsUser"] == IsUser)
        return new_data.loc[filter_values, remove_columns]

def calc_mech_importance(fil_data, all_data, mech):
    fil_array = fil_data[mech].to_numpy()
    all_array = all_data[mech].to_numpy()
    sign = np.sign(fil_array.mean() - all_array.mean())
    if sign == 0:
        sign = 1
    value = wasserstein_distance(fil_array, all_array)
    return sign * value

def calc_mech_axis(data, column, values, mechanics):
    value_temp = {}
    row_index = []
    for mech in mechanics:
        value_temp[mech] = []
        for v in values:
            traces = filter_traces(data, column, v)
            if len(traces) > 0:
                value_temp[mech].append(calc_mech_importance(traces, data, mech))
                if v not in row_index:
                    row_index.append(v)
    return pd.DataFrame(value_temp, index=row_index)

# Render Helper Functions

In [None]:
def draw_mech_graph(data, title):
    colors = ['#FD3216', '#00FE35', '#6A76FC', '#FED4C4', '#FE00CE', 
          '#0DF9FF', '#F6F926', '#FF9616', '#479B55', '#EEA6FB', 
          '#DC587D', '#D626FF', '#6E899C', '#00B5F7', '#B68E00', 
          '#C9FBE5', '#FF0092', '#22FFA7', '#E3EE9E', '#86CE00', 
          '#BC7196', '#7E7DCD', '#FC6955', '#E48F72']
    
    fig = go.Figure(layout={"width": 500, "height":500})
    # Draw Sectors
    fig.add_shape(
        type="rect",
        x0=0, y0=0, x1=1.2, y1=1.2,
        line=dict(color="#43a047"),
        fillcolor="#76d275",
        layer="below",
        opacity=0.5
    )
    fig.add_shape(
        type="rect",
        x0=0, y0=0, x1=-1.2, y1=-1.2,
        line=dict(color="#e53935"),
        fillcolor="#ff6f60",
        layer="below",
        opacity=0.5
    )
    fig.add_shape(
        type="rect",
        x0=0, y0=0, x1=1.2, y1=-1.2,
        line=dict(color="#2196f3"),
        fillcolor="#6ec6ff",
        layer="below",
        opacity=0.5
    )
    fig.add_shape(
        type="rect",
        x0=0, y0=0, x1=-1.2, y1=1.2,
        line=dict(color="#fbc02d"),
        fillcolor="#fff263",
        layer="below",
        opacity=0.5
    )


    for idx, entry in enumerate(data):
        x_values = entry[2]
        y_values = entry[3]
        fig.add_trace(
            go.Scatter(
                x=x_values,
                y=y_values,
                mode='markers',
                marker_symbol=idx,
                marker=dict(
                    color=colors,
                    size=15,
                    line=dict(width=1,color='DarkSlateGrey')),
                text=entry[1],
                name=entry[0]
            )
        )
    fig.update_layout(title_text=title)
    fig.show()

# Building Data

In [None]:
mechanics = get_mechanics(["results", "results_study"])
users = get_users(["results_study"])
classes = {
    "MK": ["Q5", "Q6", "Q10"],
    "TC": ["Q3", "Q4", "Q8"],
    "R": ["Q2", "Q7", "Q9"]
}
scores = {
    "Never": 0, 
    "Rarely": 1, 
    "Sometimes": 2, 
    "Often": 3, 
    "Always": 4
}
labels = {
    "MK": 0,
    "TC": 1,
    "R": 2
}
kill_mechanics = ["OgreHit", "GoblinMeleeHit", "GoblinRangedHit", "BlobHit"]

# Reading all the data file
mk_data = read_stats_data("MK", mechanics, "results")
mk_data["EnemyKill"] = 0
for kill in kill_mechanics:
    mk_data["EnemyKill"] += mk_data[kill]
tc_data = read_stats_data("TC", mechanics, "results")
tc_data["EnemyKill"] = 0
for kill in kill_mechanics:
    tc_data["EnemyKill"] += tc_data[kill]
r_data = read_stats_data("R", mechanics, "results")
r_data["EnemyKill"] = 0
for kill in kill_mechanics:
    r_data["EnemyKill"] += r_data[kill]
user_data = build_users_data("results_study", mechanics, users)
user_data["EnemyKill"] = 0
for kill in kill_mechanics:
    user_data["EnemyKill"] += user_data[kill]
ques_data = read_questionaire(users, classes, scores, "results_study")

# Removing User Data that didn't do any of the questionaire
user_filter = ques_data['MK'] + ques_data['TC'] + ques_data['R'] > 0
user_indeces = ques_data.index[user_filter]

user_data = user_data.loc[user_indeces]
ques_data = ques_data.loc[user_indeces]

# Combine All Data

In [None]:
temp_data = mk_data.copy()
temp_data["Label"] = "MK"
temp_data["IsUser"] = False
total_data = pd.concat([temp_data], ignore_index=True)
temp_data = tc_data.copy()
temp_data["Label"] = "TC"
temp_data["IsUser"] = False
total_data = pd.concat([total_data, temp_data], ignore_index=True)
temp_data = r_data.copy()
temp_data["Label"] = "R"
temp_data["IsUser"] = False
total_data = pd.concat([total_data, temp_data], ignore_index=True)
temp_data = user_data.copy()
temp_data["Label"] = ""
temp_data["IsUser"] = True
total_data = pd.concat([total_data, temp_data])
    
n_total_data = total_data.copy()
for col in n_total_data.columns:
    if col == "Label" or col == "IsUser" or col == "MapNumber":
        continue
    n_total_data[col] /= 1.0 * n_total_data[col].max()

# Building Data Set

In [None]:
remove_mechs = ["MapNumber"]

input_data = pd.concat([mk_data.drop(columns=remove_mechs), tc_data.drop(columns=remove_mechs), r_data.drop(columns=remove_mechs)], ignore_index=True).to_numpy()
output_data = pd.Series([0]*len(mk_data) + [1]*len(tc_data) + [2]*len(r_data)).to_numpy()
avg_test_set = user_data.drop(columns=remove_mechs).groupby('id').mean().to_numpy()
maj_test_set = user_data.drop(columns=remove_mechs).to_numpy()
ques_set = ques_data.to_numpy()

total_set = np.concatenate((input_data, avg_test_set, maj_test_set)) 

n_input_data = input_data / total_set.max(axis=0)
n_avg_test_set = avg_test_set / total_set.max(axis=0)
n_maj_test_set = maj_test_set / total_set.max(axis=0)
n_ques_set = ques_set / ques_set.sum(axis=1).reshape(-1,1).repeat(3, axis=1)

# Training Classifier

In [None]:
train_data,train_label,val_data,val_label = divide_data_set(n_input_data, output_data, 0.7)

dt = tree.DecisionTreeClassifier(max_depth=5)
dt = dt.fit(train_data, train_label)
print(f"Decision Tree Training Score: {dt.score(train_data, train_label)}")
print(f"Decision Tree Validation Score: {dt.score(val_data, val_label)}\n")

knn = KNeighborsClassifier(n_neighbors=3)
knn = knn.fit(train_data, train_label)
print(f"K Nearest Neighbor Training Score: {knn.score(train_data, train_label)}")
print(f"K Nearest Neighbor Validation Score: {knn.score(val_data, val_label)}\n")

svc = svm.SVC(decision_function_shape='ovo', probability=True)
svc = svc.fit(train_data, train_label)
print(f"Support Vector Machine Training Score: {svc.score(train_data, train_label)}")
print(f"Support Vector Machine Validation Score: {svc.score(val_data, val_label)}")

dt = dt.fit(n_input_data, output_data)
knn = knn.fit(n_input_data, output_data)
svc = svc.fit(n_input_data, output_data)

# Predicting Users

In [None]:
dt_avg_probs = dt.predict_proba(n_avg_test_set)
knn_avg_probs = knn.predict_proba(n_avg_test_set)
svc_avg_probs = svc.predict_proba(n_avg_test_set)

dt_avg_labels = dt.predict(n_avg_test_set)
knn_avg_labels = knn.predict(n_avg_test_set)
svc_avg_labels = svc.predict(n_avg_test_set)

print(f"Average DT vs KNN: {(dt_avg_labels == knn_avg_labels).sum()/len(avg_test_set)}")
print(f"Average DT vs SVC: {(dt_avg_labels == svc_avg_labels).sum()/len(avg_test_set)}")
print(f"Average KNN vs SVC: {(knn_avg_labels == svc_avg_labels).sum()/len(avg_test_set)}")

dt_maj_labels = get_majority_voting(dt, n_maj_test_set)
knn_maj_labels = get_majority_voting(knn, n_maj_test_set)
svc_maj_labels = get_majority_voting(svc, n_maj_test_set)

dt_maj_probs = dt.predict_proba(n_maj_test_set).reshape((-1,3,3)).mean(axis=1)
knn_maj_probs = knn.predict_proba(n_maj_test_set).reshape((-1,3,3)).mean(axis=1)
svc_maj_probs = svc.predict_proba(n_maj_test_set).reshape((-1,3,3)).mean(axis=1)

print(f"Majority DT vs KNN: {(dt_maj_labels == knn_maj_labels).sum()/len(avg_test_set)}")
print(f"Majority DT vs SVC: {(dt_maj_labels == svc_maj_labels).sum()/len(avg_test_set)}")
print(f"Majority KNN vs SVC: {(knn_maj_labels == svc_maj_labels).sum()/len(avg_test_set)}")

print(f"Probability Distribution of DT: {dt_maj_probs.mean(axis=0)}")
print(f"Probability Distribution of KNN: {knn_maj_probs.mean(axis=0)}")
print(f"Probability Distribution of SVC: {svc_maj_probs.mean(axis=0)}")
print(f"Probability Distribution of User Questionaire: {n_ques_set.mean(axis=0)}")

# Calculating Mechanic Importance

In [None]:
import_mechs = mechanics + ["EndTurn", "EnemyKill"]

x_import = calc_mech_axis(n_total_data, "ReachStairs", [0, 1], import_mechs)
y_import = calc_mech_axis(n_total_data, "Label", labels.keys(), import_mechs)
user_import = calc_mech_axis(n_total_data, "index", users, import_mechs)

dt_data = n_total_data.copy()
dt_data["Label"] = ""
for i, label in enumerate(dt_maj_labels):
    index = dt_data[dt_data["IsUser"] == True].index[i*3]
    dt_data.loc[index, "Label"] = list(labels.keys())[label]
    
dt_import = calc_mech_axis(dt_data, "Label", labels.keys(), import_mechs)
    
knn_data = n_total_data.copy()
knn_data["Label"] = ""
for i, label in enumerate(knn_maj_labels):
    index = knn_data[knn_data["IsUser"] == True].index[i*3]
    knn_data.loc[index, "Label"] = list(labels.keys())[label]
    
knn_import = calc_mech_axis(knn_data, "Label", labels.keys(), import_mechs)
    
svc_data = n_total_data.copy()
svc_data["Label"] = ""
for i, label in enumerate(svc_maj_labels):
    index = svc_data[svc_data["IsUser"] == True].index[i*3]
    svc_data.loc[index, "Label"] = list(labels.keys())[label]
    
svc_import = calc_mech_axis(svc_data, "Label", labels.keys(), import_mechs)

uq_data = n_total_data.copy()
uq_data["Label"] = ""
for u in users:
    if u in ques_data.index:
        q_value = ques_data.loc[u]
        uq_data.loc[u, "Label"] = q_value.index[q_value.argmax()]

uq_import = calc_mech_axis(uq_data, "Label", labels.keys(), import_mechs)


temp_data = [mk_data.copy(), tc_data.copy(), r_data.copy()]
temp_data[0]["Label"] = "MK"
temp_data[0]["IsUser"] = False
temp_data[1]["Label"] = "TC"
temp_data[1]["IsUser"] = False
temp_data[2]["Label"] = "R"
temp_data[2]["IsUser"] = False
a_no_user_data = pd.concat(temp_data, ignore_index=True)
# Normalize the data frame
for col in a_no_user_data.columns:
    if col == "Label" or col == "IsUser" or col == "MapNumber":
        continue
    a_no_user_data[col] /= 1.0 * total_data[col].max()
a_no_user_import = calc_mech_axis(a_no_user_data, "Label", labels.keys(), import_mechs)

uq_no_agent_data = user_data.copy()
uq_no_agent_data["Label"] = ""
for u in users:
    if u in ques_data.index:
        q_value = ques_data.loc[u]
        uq_no_agent_data.loc[u, "Label"] = q_value.index[q_value.argmax()]
# Normalize the data frame
for col in uq_no_agent_data.columns:
    if col == "Label" or col == "IsUser" or col == "MapNumber":
        continue
    uq_no_agent_data[col] /= 1.0 * total_data[col].max()
uq_no_agent_import = calc_mech_axis(uq_no_agent_data, "Label", labels.keys(), import_mechs)

In [None]:
user_data = total_data[total_data["IsUser"]==False]
agent_data = total_data[total_data["IsUser"]==True]






In [None]:
a_no_user_import

In [None]:
uq_no_agent_import

In [None]:
n_x_import = x_import / max(x_import.loc[1].max(), abs(x_import.loc[1].min()))
total_y_import = pd.concat([y_import, dt_import, knn_import, svc_import, uq_import])
n_y_import = y_import / max(total_y_import.max().max(), abs(total_y_import.min().min()))
n_dt_import = dt_import / max(total_y_import.max().max(), abs(total_y_import.min().min()))
n_svc_import = svc_import / max(total_y_import.max().max(), abs(total_y_import.min().min()))
n_knn_import = knn_import / max(total_y_import.max().max(), abs(total_y_import.min().min()))
n_uq_import = uq_import / max(total_y_import.max().max(), abs(total_y_import.min().min()))

total_ynew_import = pd.concat([uq_no_agent_import.copy(), a_no_user_import.copy()], ignore_index=True)
n_uq_no_agent_import = uq_no_agent_import / max(total_ynew_import.max().max(), abs(total_ynew_import.min().min()))
n_a_no_user_import = a_no_user_import / max(total_ynew_import.max().max(), abs(total_ynew_import.min().min()))

In [None]:
n_uq_no_agent_import

In [None]:
n_a_no_user_import

In [None]:
total_ynew_import

# Draw Mech Importance

In [None]:
algo_import = {
    # "AGENT": n_y_import,
    # "SVM": n_svc_import,
    # "KNN": n_knn_import,
    # "DT": n_dt_import,
    # "USER": n_uq_import,
    "U_USER": n_uq_no_agent_import,
    "A_AGENT": n_a_no_user_import
}

remove_mechs = ["OgreHit", "GoblinMeleeHit", "GoblinRangedHit", "BlobHit", 
                "JavelinThrow", "OgreTreasure", "BlobCombine", "BlobPotion"]

# for algo in algo_import.keys():
#     data = []
#     for persona in labels.keys():
#         x_values = []
#         y_values = []
#         mech_values = []
#         for mech in import_mechs:
#             if mech in remove_mechs:
#                 continue
#             x_values.append(n_x_import[mech][1])
#             y_values.append(algo_import[algo][mech][persona])
#             mech_values.append(mech)
#         data.append((persona, mech_values, np.array(x_values), np.array(y_values)))
#     draw_mech_graph(data, algo)

all_data = {}
for persona in labels.keys():
    data = []
    for algo in algo_import.keys():
        x_values = []
        y_values = []
        mech_values = []
        for mech in import_mechs:
            if mech in remove_mechs:
                continue
            x_values.append(n_x_import[mech][1])
            y_values.append(algo_import[algo][mech][persona])
            mech_values.append(mech)
        data.append((algo, mech_values, np.array(x_values), np.array(y_values)))
        all_data[persona] = data
    draw_mech_graph(data, persona)
    

In [None]:
for per in ["MK", "TC", "R"]:
    print(f"*** {per} ***")
    print(all_data[per][0][0])
    print(all_data[per][0][3])
    print(all_data[per][1][0])
    print(all_data[per][1][3])
    cs = cosine_similarity(all_data[per][0][3].reshape(1, -1), all_data[per][1][3].reshape(1, -1))
    print(f"CS: {cs}")

In [None]:
for per1 in ["MK", "TC", "R"]: 
    for per2 in ["MK", "TC", "R"]:
        value = cosine_similarity(n_a_no_user_import.loc[per1].to_numpy().reshape(1,-1), 
                                  n_uq_no_agent_import.loc[per2].to_numpy().reshape(1,-1))
        print(f"Agent - {per1} | User - {per2}: {value}")

In [None]:
n_a_no_user_import.loc["MK"].to_numpy()

# Checking Similarity between Mechanic Importance

In [None]:
for algo in algo_import.keys():
    if algo == "AGENT":
        continue
    data = []
    label_x = list(labels.keys())[::-1]
    label_y = list(labels.keys())
    agent_import = algo_import["AGENT"]
    clf_import = algo_import[algo]
    for p_x in label_x:
        data.append([])
        for p_y in label_y:
            agent_persona = agent_import.loc[p_x].to_numpy().reshape((1,-1))
            clf_persona = clf_import.loc[p_y].to_numpy().reshape((1, -1))
            data[-1].append(cosine_similarity(agent_persona, clf_persona)[0][0])
    plt.figure()
    plt.imshow(np.array(data).reshape((3,3,1)), cmap='RdBu',  vmin=-0.9, vmax=0.9)
    plt.colorbar()
    plt.xticks(range(len(label_y)),label_y)
    plt.yticks(range(len(label_x)),label_x)
    plt.ylabel("Agent")
    plt.xlabel(algo)
    plt.tight_layout()
    plt.savefig(f"Agent_{algo}.pdf", bbox_inches='tight')
    
for algo in algo_import.keys():
    if algo == "USER":
        continue
    data = []
    label_x = list(labels.keys())[::-1]
    label_y = list(labels.keys())
    agent_import = algo_import["USER"]
    clf_import = algo_import[algo]
    for p_x in label_x:
        data.append([])
        for p_y in label_y:
            agent_persona = agent_import.loc[p_x].to_numpy().reshape((1,-1))
            clf_persona = clf_import.loc[p_y].to_numpy().reshape((1, -1))
            data[-1].append(cosine_similarity(agent_persona, clf_persona)[0][0])
    plt.figure()
    plt.imshow(np.array(data).reshape((3,3,1)), cmap='RdBu',  vmin=-0.9, vmax=0.9)
    plt.colorbar()
    plt.xticks(range(len(label_y)),label_y)
    plt.yticks(range(len(label_x)),label_x)
    plt.ylabel("User")
    plt.xlabel(algo)
    plt.savefig(f"User_{algo}.pdf", bbox_inches='tight')
    
data = []
label_x = list(labels.keys())[::-1]
label_y = list(["AGENT-SVM", "AGENT-USER", "USER-SVM"])
for persona in label_x:
    data.append([])
    for algo in label_y:
        parts = algo.split("-")
        algo1_import = algo_import[parts[0]].loc[persona].to_numpy().reshape((1,-1))
        algo2_import = algo_import[parts[1]].loc[persona].to_numpy().reshape((1,-1))
        data[-1].append(cosine_similarity(algo1_import, algo2_import)[0][0])
#             print(f"{persona} : {a_x},{a_y} : {data[-1][-1]}")
plt.figure()
plt.imshow(np.array(data).reshape((3,3,1)), cmap='RdBu',  vmin=-0.9, vmax=0.9)
plt.colorbar()
plt.xticks(range(len(label_y)),label_y)
plt.yticks(range(len(label_x)),label_x)
plt.xlabel("Agreement")
plt.ylabel("Persona")
plt.savefig(f"AgreementPersona.pdf", bbox_inches='tight')

# User Questionaire Agreement

In [None]:
algo_probs = {
    "USER": n_ques_set,
    "SVM": svc_maj_probs,
    "KNN": knn_maj_probs,
    "DT": dt_maj_probs
}

distance = []
for algo in algo_probs.keys():
    if algo == "USER":
        continue
    dist = []
    for i in range(len(algo_probs[algo])):
        user_prob = algo_probs["USER"][i].reshape(1,-1)
        clf_prob = algo_probs[algo][i].reshape(1,-1)
        dist.append(cosine_similarity(user_prob, clf_prob)[0][0])
    plt.hist(dist)
    plt.title(algo)
    plt.show()

# Distribution of Labels From Different Machine Learning

In [None]:
plt.figure()
svc_data[svc_data["IsUser"] == True]["Label"].hist()
plt.title('Support Vector Machines Classes')
plt.figure()
dt_data[dt_data["IsUser"] == True]["Label"].hist()
plt.title('Decision Tree Classes')
plt.figure()
knn_data[knn_data["IsUser"] == True]["Label"].hist()
plt.title('K-Nearest Neighbor Classes')
plt.show()

uq_data = total_data.copy()
uq_data["Label"] = ""
for u in users:
    if u in ques_data.index:
        q_value = ques_data.loc[u]
        uq_data.loc[u, "Label"] = q_value.index[q_value.argmax()]
        
uq_data[knn_data["IsUser"] == True]["Label"].hist()
plt.title('Users')
plt.show()

# Calculating Model Data

In [None]:
dt_data = total_data.copy()
dt_data["Label"] = ""
for i, label in enumerate(dt_maj_labels):
    index = dt_data[dt_data["IsUser"] == True].index[i*3]
    dt_data.loc[index, "Label"] = list(labels.keys())[label]
    
knn_data = total_data.copy()
knn_data["Label"] = ""
for i, label in enumerate(knn_maj_labels):
    index = knn_data[knn_data["IsUser"] == True].index[i*3]
    knn_data.loc[index, "Label"] = list(labels.keys())[label]
    
svc_data = total_data.copy()
svc_data["Label"] = ""
for i, label in enumerate(svc_maj_labels):
    index = svc_data[svc_data["IsUser"] == True].index[i*3]
    svc_data.loc[index, "Label"] = list(labels.keys())[label]
    
uq_data = total_data.copy()
uq_data["Label"] = ""
for u in users:
    if u in ques_data.index:
        q_value = ques_data.loc[u]
        uq_data.loc[u, "Label"] = q_value.index[q_value.argmax()]

# Calculating Statistics

In [None]:
stats_data = {
    "AGENT": total_data,
    "SVM": svc_data,
    "KNN": knn_data,
    "DT": dt_data,
    "USER": uq_data
}
personas = list(labels.keys())

stats = {
    "CollectTreasure": [],
    "CollectTreasure_STD": [],
    "EnemyKill": [],
    "EnemyKill_STD": [],
    "EndTurn": [],
    "EndTurn_STD": [],
    "Type": [],
    "Size": [],
}
indeces = []
for p in personas:
    for t in stats_data.keys():
        data = stats_data[t]
        data = data[data["Label"] == p]
        for s in stats.keys():
            if s == "Type" or "STD" in s or "Size" in s:
                continue
            stats[s].append(data[s].mean())
            stats[s + "_STD"].append(data[s].std())
        stats["Type"].append(t)
        stats["Size"].append(data.shape[0])
        indeces.append(p)
stats_data = pd.DataFrame(stats, index=indeces)

# Visualize Statistics

In [None]:
graphs = []
for s in stats.keys():
    if s == "Type" or "STD" in s or "Size" in s:
        continue
    graphs.append(s)

personas = list(labels.keys())
types = ["AGENT", "SVM", "USER"]
x = np.arange(len(types))
width = 0.7/len(types)

for s in graphs:
    plt.figure()
    for i,t in enumerate(types):
        means = []
        stds = []
        for p in personas:
            data = stats_data.loc[p]
            data = data[data["Type"] == t]
            means.append(data[s].to_numpy()[0])
            stds.append(data[s + "_STD"].to_numpy()[0] / np.sqrt(data["Size"].to_numpy()[0]))
        plt.bar(x + (i-int(len(types)/2))*width, means, width, yerr=stds, label=t)
    # Add some text for labels, title and custom x-axis tick labels, etc.
    plt.ylabel('Frequency')
    plt.xticks(x, labels=personas)
    plt.legend()
    plt.title(s)

    plt.tight_layout()
    plt.show()

# Calculate User Persona Change Between Levels

In [None]:
dt_diff_labels = dt.predict(n_maj_test_set)
knn_diff_labels = knn.predict(n_maj_test_set)
svc_diff_labels = svc.predict(n_maj_test_set)

dt_maj_labels = get_majority_voting(dt, n_maj_test_set)
knn_maj_labels = get_majority_voting(knn, n_maj_test_set)
svc_maj_labels = get_majority_voting(svc, n_maj_test_set)

predict_labels = {
    "DT": [dt_diff_labels, dt_maj_labels],
    "KNN": [knn_diff_labels, knn_maj_labels],
    "SVM": [svc_diff_labels, svc_maj_labels]
}

for algo in predict_labels.keys():
    value = [0, 0, 0]
    algo_data = predict_labels[algo]
    for i, label in enumerate(algo_data[0]):
        if label != algo_data[1][int(i/3)]:
            value[i % 3] += 1
    plt.figure()
    plt.bar(range(len(value)), value)
    plt.title(algo)
    plt.xlabel("Order")
    plt.ylabel("Classification Mistakes")
    plt.xticks(range(len(value)), labels=range(len(value)))
    plt.show()

print()

for algo in predict_labels.keys():
    algo_data = predict_labels[algo]
    dt_data = total_data.copy()
    level_value = {}
    for i, label in enumerate(algo_data[0]):
        index = dt_data[dt_data["IsUser"] == True].index[i]
        map_number = dt_data.loc[index, "MapNumber"][0]
        if map_number not in level_value:
            level_value[map_number] = 0
        if label != algo_data[1][int(i/3)]:
            level_value[map_number] += 1
    values = []
    x_values = []
    for l in level_value.keys():
        size = len(dt_data[(dt_data["MapNumber"] == l) & (dt_data["IsUser"] == True)])
        level_value[l] /= size
        values.append(level_value[l])
        x_values.append(l)
    plt.figure()
    plt.bar(range(len(values)), values)
    plt.title(algo)
    plt.xlabel("Maps")
    plt.ylabel("Classification Mistakes")
    plt.xticks(range(len(x_values)), labels=x_values)
    plt.show()

In [None]:
stats_data = {
    "AGENT": total_data,
    "SVM": svc_data,
    "USER": uq_data
}

for agent in stats_data:
    for persona in labels:
        value = stats_data[agent].query(f"Label == '{persona}'")["ReachStairs"].sum()
        total = len(stats_data[agent].query(f"Label == '{persona}'"))
        print(f"{agent} - {persona}: {value / total}")

In [None]:
stats_data = {
    "DT": dt_data,
    "KNN": knn_data,
    "SVM": svc_data,
    "USER": uq_data
}

for key in stats_data:
    if key == "USER":
        continue
    agreement = 0
    total = 0
    for user in users:
        if user in svc_data.index:
            key_label = stats_data[key].loc[user]["Label"]
            user_label = stats_data["USER"].loc[user]["Label"]
            if key_label[0] == user_label[0]:
                agreement += 1
            total += 1
    print(f"{key} agreemenet: {agreement / total}")

In [None]:
svc_maj_probs

In [None]:
n_ques_set