For our paper, we decide to go forward with groups 2 and 4. We thus ignore users who had reflective prompts, as it is hard to include it in the paper, (to explain, discuss its importance in the study) so we only focus on adaptive feedback. 

| Group 2 | Group 4|
| :----: | :---:|
|Adaptive Feedback | Non-Adaptive feedback  |

They thus become the primary groups and we will call them group 1 and group 2 respectively.

First, we will extract users from each group and create a new csv file with only users from these groups and create a new dataset from these.

In [70]:
import pandas as pd
import numpy as np

%store -r df
%store -r INDICES

Basically, in the matching data, we have more users than in our dataset, so we keep only the ones which are in the dataset

In [69]:
group_data = pd.read_csv('data/groupmatching.csv')
original_df = pd.read_csv('data/keystrokes-recipes-modified.csv')

users_in_df = original_df['user_id'].values

def compute_users_in_group(group_number):
    return [value for value in group_data[group_data['group'] == group_number]['user_id'] if value in users_in_df]

group2_users = compute_users_in_group(2)
group4_users = compute_users_in_group(4)

dataset = []
def retrieve_data(users_list, groupnum):
    for user in users_list:
        indices_where_written = original_df[original_df['user_id'] == user].index
        assert np.all(sorted(indices_where_written) == indices_where_written)
        for index in indices_where_written:
            row = original_df.iloc[index]
            dataset.append({'event_date': row['event_date'], 'group': groupnum, 'user_id': row['user_id'], 'ks': row['ks'], 'recipe': row['recipe']})

retrieve_data(group2_users, 1)
retrieve_data(group4_users, 2)
dataset_df = pd.DataFrame(dataset)

dataset_df.sort_values(by=['user_id','event_date'], inplace=True)
dataset_df.to_csv('data4paper/dataset.csv', index=False)

now we replicated our dataset and created one where we have only users from group 2 and 4.

In [None]:
group2_indices, group4_indices = INDICES[1], INDICES[3]

fig, ax = plt.subplots(5,1, figsize=(50,60), sharey=True)
plt.rcParams["font.size"]  = 10

recipe_num = 0
max_revisions, means, variances, avr_rev_lengths, _ = data_computation_on_groups(recipe_num, INDICES)
group_characteristics = ["With Adaptive Feedback\nWith Reflective Prompts", "With Adaptive Feedback\nWithout Reflective Prompts", 
"Without Adaptive Feedback\nWith Reflective Prompts", "Without Adaptive Feedback\nWithout Reflective Prompts", "Control Group\nNo Adaptive Feedback\nNo Reflective Prompts"]
for group in [2,4]:
    text = \
    """
    Maximum number of revisions: {max}
    {mean}
    {var}
    {revision_length_mean}
    {average_time_spent}
    """.format(max=max_revisions[group-1], 
    mean=r'$\mu_{revisions}=%.3f$' % (means[group-1]), 
    var=r'$\sigma^2_{revisions}=%.3f$' % (variances[group-1]),
    revision_length_mean=r'$\mu_{revision lengths}=%.3f$' % (avr_rev_lengths[group-1]),
    average_time_spent=r'$\mu_{time spent revising}=%.3f$' % (get_average_time_spent_per_group(INDICES[group-1], recipe_num=recipe_num))
    )

    data = []
    for i, _ in enumerate(sorted_users):
        if i in INDICES[group-1]:
            try: 
                dframe = get_data_per_session(i)[recipe_num]
                dframe.reset_index(inplace=True)
                data.append(dframe)
            except: 
                    data.append(pd.DataFrame())

    data = sorted(data, key = lambda dframe: len(dframe), reverse=True)    
    list_of_df = [df for df in data]
    dframes = pd.concat(list_of_df)

    y = [[i]*len(list_of_df[i]) for i in range(len(list_of_df))]
    x = [np.arange(len(y_i)) for y_i in y ]
    x = [index for sub in x for index in sub]
    y = [i for sub in y for i in sub]
    z = np.array([dframes['insertions']])

    ax[group-1].scatter(x=y, y=x, s=z*5, alpha=0.5, linewidths=2) 
    ax[group-1].scatter(x=y, y=x, s=dframes['deletions'] * 5, alpha=0.5, linewidths=2, c='blue')

# place a text box in upper left in axes coords
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    ax[group-1].text(0.7, 0.95, text, transform=ax[group-1].transAxes, fontsize=50,
        verticalalignment='top', bbox=props)

    new_props = dict(boxstyle='round', facecolor='lightcoral', alpha=0.5)
    ax[group-1].text(0.1, 0.95, group_characteristics[group-1], transform=ax[group-1].transAxes, fontsize=40,
        verticalalignment='top', bbox=new_props)

plt.xlabel(xlabel='Insertions and deletions for users in each group, recipe number {num}'.format(num=recipe_num+1), fontsize=20)
plt.ylabel(ylabel='Revision steps', fontsize=20)
fig.tight_layout()
plt.savefig('results/bubble plots/recipe {num}.png'.format(num=recipe_num+1))
