In [None]:
##########################
import pandas as pd
import statistics
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as scipy
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols

session_data = pd.read_json('iui20_mturk-sessions.json')
# Remove unusable session (only second session is unusable)
session_data = session_data[session_data['session2_brainstormingSessionId'] != 'http://purl.org/innovonto/brainstormingSession/241']
ideator_types = ['SEEKER', 'AVOIDER', 'OVERALL']
conditions = ['baseline','on-demand', 'on-idle']
colors = ['#4F759B', '#8CBCB9', '#B6A6CA', '#A9A9A9']
dpi = 300

plt.style.use('ggplot')

plt.rcParams.update({'font.size': 21})

## TODO: Add labels for axes

def draw_bar_plot(attribute, conditions, y_max = None, session_number = 2):
    fig = plt.figure(figsize=(20, 5))
    title = None
    if attribute == 'Requests':
        title = 'Inspirations (Session ' + str(session_number) + ')'  
    else:
        title = 'Fluency (Session ' + str(session_number) + ')'
    #fig.suptitle(title, fontsize = 20, y = 1.03)
    for ideator_type_count, ideator_type in enumerate(ideator_types):
        requests_per_condition = []
        confidence_intervals = []
        for condition in conditions:
            df = session_data
            if ideator_type != 'OVERALL':
                df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/' + ideator_type]
            df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/' + condition]
            requests = df['session' + str(session_number) + '_numberOf' + attribute]
            if condition == 'on-idle' and attribute == 'Requests' and session_number == 2:
                requests = requests * 3
            mean = statistics.mean(requests)
            requests_per_condition.append(mean)
            confidence_interval = scipy.sem(requests)
            confidence_intervals.append(confidence_interval)

        fig.add_subplot(1,3,ideator_type_count+1)
        plt.bar(conditions, requests_per_condition, yerr = confidence_intervals, capsize = 10, color = colors)
        plt.grid(axis='x')
        title = None
        if attribute == 'Requests':
            title = 'inspirations'
        else:
            title = 'idea submits'
        if ideator_type_count == 0:
            plt.ylabel(title.capitalize())
        plt.xlabel('Conditions')
        plt.title(ideator_type.capitalize())
        if y_max != None: 
            plt.ylim(0, y_max)
        for i, v in enumerate(conditions):
            plt.text(i, 
                  y_max*0.12, 
                  round(requests_per_condition[i],1), 
                  horizontalalignment="center",
                  fontsize=22, 
                  color='gray' if requests_per_condition[i] < 4 else 'white')
    plt.show()
    return fig

plot = draw_bar_plot(attribute = 'Requests', conditions = ['on-demand', 'on-idle'], y_max = 45)
plot.savefig("iui20-requests_per-type+condition_sessions2.jpg", bbox_inches='tight', dpi = dpi)
plot = draw_bar_plot(attribute = 'Requests', conditions = conditions, y_max = 20, session_number = 1)
plot = draw_bar_plot(attribute = 'Submits', conditions = conditions, y_max = 12)
plot.savefig("iui20-fluency_per-type+condition_sessions2.jpg", bbox_inches='tight', dpi = dpi)

print('== FIRST SESSION ==')
print('First session does not have a condition. It shows the same pattern as with condition. => Probably has nothing to do with conditions')
plot = draw_bar_plot(attribute = 'Submits', conditions = conditions, y_max = 9, session_number = 1)
plot.savefig("iui20-fluency_per-type+condition_sessions1.jpg", bbox_inches='tight', dpi = dpi)



In [None]:
%config InlineBackend.figure_format = 'retina'

plt.rcParams.update({'font.size': 15})
def draw_bar_plot_without_conditions(attribute, y_max = None, session_number = 2, title = None):
    ideator_types = ['SEEKER', 'AVOIDER', 'OVERALL']
    requests_per_type = []
    confidence_intervals = []
    
    for ideator_type_count, ideator_type in enumerate(ideator_types):
        df = session_data
        if ideator_type != 'OVERALL':
            df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/' + ideator_type]
        else: 
            df = df[df['session1_ideatorType'] != 'http://purl.org/innovonto/ideatorType/UNMOTIVATED']
        current_attribute = 'session' + str(session_number) + '_numberOf' + attribute
        requests = df[current_attribute][df[current_attribute].notna()]
        mean = statistics.mean(requests)
        requests_per_type.append(mean)
        confidence_interval = scipy.sem(requests)
        confidence_intervals.append(confidence_interval)

    plt.bar([i.capitalize() for i in ideator_types], requests_per_type, yerr = confidence_intervals, capsize = 10, color = colors)
    plt.grid(axis='x')
    plt.xlabel('Ideator types')
    plt.ylabel('Idea submits')
    if title == None:
        plt.title('Session ' + str(session_number))
    else:
        plt.title(title)
    if y_max != None: 
        plt.ylim(0, y_max)
    for i, v in enumerate(ideator_types):
        plt.text(i,
              1, 
              round(requests_per_type[i],1), 
              fontsize=14, 
              horizontalalignment="center",
              color='gray' if requests_per_type[i] < 4 else 'white')



fig = plt.figure(figsize=(17, 5))
fig.suptitle('Fluency', fontsize = 20, y = 1.03)
fig.add_subplot(1,2,1)
draw_bar_plot_without_conditions('Submits', session_number = 1, y_max = 11)
fig.add_subplot(1,2,2)
draw_bar_plot_without_conditions('Submits', session_number = 2, y_max = 11)
plt.show()
fig.savefig("iui20-fluency_per-type_both-sessions.jpg", bbox_inches='tight')

print('TODO: on-idle requests * 3')

fig = plt.figure(figsize=(17, 5))
fig.suptitle('Inspirations', fontsize = 20, y = 1.03)
fig.add_subplot(1,2,1)
draw_bar_plot_without_conditions('Requests', session_number = 1, y_max = 15)
fig.add_subplot(1,2,2)
draw_bar_plot_without_conditions('Requests', session_number = 2, y_max = 15)
plt.show()
fig.savefig("iui20-requests_per-type_both-sessions.jpg", bbox_inches='tight')


fig = plt.figure(figsize=(5, 5))
draw_bar_plot_without_conditions('Requests', session_number = 1, y_max = 15, title = '')
plt.ylabel('Inspirations')
plt.show()
fig.savefig("iui20-requests_per-type_session1.jpg", bbox_inches='tight', dpi = dpi)

In [None]:
def get_ratings_per_type(aggregator = 'mean'):
    idea_ratings = pd.read_json('iui20-idea-ratings.json')
    ideas = pd.read_json('iui20-ideas-fabric-display.json')
    pd.set_option("display.max_colwidth", 10000)
    if aggregator == 'max':
        ratings_per_session = idea_ratings.merge(ideas, on = 'ideaId').groupby('brainstormingSessionId').max()
    elif aggregator == 'min':
        ratings_per_session = idea_ratings.merge(ideas, on = 'ideaId').groupby('brainstormingSessionId').min()
    else:
        ratings_per_session = idea_ratings.merge(ideas, on = 'ideaId').groupby('brainstormingSessionId').mean()
    session_and_type = session_data[['session2_brainstormingSessionId', 'session1_ideatorType', 'session2_condition']][session_data['session2_brainstormingSessionId'].notna()]
    session_and_type = session_and_type.rename(columns = {'session2_brainstormingSessionId': 'brainstormingSessionId'})
    return session_and_type.merge(ratings_per_session, on = 'brainstormingSessionId')

ratings_per_type = get_ratings_per_type()
ratings_per_type_max = get_ratings_per_type(aggregator = 'max')
ratings_per_type_min = get_ratings_per_type(aggregator = 'min')

In [None]:
def get_rated_ideas():
    idea_ratings = pd.read_json('iui20-idea-ratings.json')
    ideas = pd.read_json('iui20-ideas-fabric-display.json')

    rated_ideas = idea_ratings.merge(ideas, on = 'ideaId')
    #display(rated_ideas.sort_values('novelty', ascending = False)[['text', 'value', 'novelty']])
    return rated_ideas
    
df = get_rated_ideas()
df['text'] = df['text'].apply(lambda x: len(x.split()))
#df = df[df['text'] < 50]
linear_model=ols('value ~ text', data = df).fit()
print('ANOVA idea value')
anova = sm.stats.anova_lm(linear_model, typ = 3)
#display(linear_model.summary())
#display(anova)

def plot_regression(column_name):
    from scipy.stats import linregress
    result = linregress(df[['text', column_name]])
    print(result)
    plt.plot(df['text'], df[column_name], 'o', label='original data', c = colors[1])
    plt.plot(df['text'], result.intercept + result.slope*df['text'], 'darkblue', label='fitted line')
    plt.ylabel(column_name.capitalize())
    plt.xlabel('Word Count')
    plt.ylim(-2.3, 2.1)
    plt.legend()
    plt.show()

plot_regression('value')
plot_regression('novelty')

# Multivariate Modell zwei Dimensionen
#from scipy.stats import multivariate_normal
#multivariate_normal.pdf(df[['text', 'value']])


In [None]:
%config InlineBackend.figure_format = 'retina'

ideator_types = ['SEEKER', 'AVOIDER', 'OVERALL']
conditions = ['baseline','on-demand', 'on-idle']

def draw_bar_plot_ratings(attribute, conditions, y_max = None, y_min = None, ratings_per_type = ratings_per_type):
    fig = plt.figure(figsize=(20, 5))
    for ideator_type_count, ideator_type in enumerate(ideator_types):
        ratings_per_condition = []
        confidence_intervals = []
        for condition in conditions:
            df = ratings_per_type
            if ideator_type != 'OVERALL':
                df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/' + ideator_type]
            if condition != 'all':
                df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/' + condition]
            ratings = df[attribute]
            mean = statistics.mean(ratings)
            ratings_per_condition.append(mean)
            confidence_intervals.append(scipy.sem(ratings))

        ax = fig.add_subplot(1,3,ideator_type_count+1)
        plt.bar(conditions, ratings_per_condition, yerr = confidence_intervals, capsize = 10, color = colors)
        plt.grid(axis='x')
        plt.title(ideator_type.capitalize())
        if ideator_type_count == 0:
            plt.ylabel(attribute.capitalize())
        plt.xlabel('Conditions')
        if y_max != None:
            if y_min == None:
                y_min = -1 * y_max
            plt.ylim(y_min, y_max) 
        for i, v in enumerate(conditions):
            plt.text(i, 
                  y_min + 0.025, 
                  round(ratings_per_condition[i],2), 
                  fontsize=18, 
                  horizontalalignment="center",
                  color='gray' if y_min != 0 else 'white')
    plt.show()
    return fig

In [None]:
ideator_types = ['SEEKER', 'AVOIDER', 'OVERALL']

def draw_bar_plot_ratings_per_type(y_max = None, y_min = None, ratings_per_type = ratings_per_type):
    fig = plt.figure(figsize=(20, 5))
    fig.suptitle('Idea Quality per Type', fontsize = 20, y = 1.07)
    for attributes_count, attribute in enumerate(['novelty', 'value']):
        ratings_per_condition = []
        confidence_intervals = []
        for ideator_type_count, ideator_type in enumerate(ideator_types):
            df = ratings_per_type
            if ideator_type != 'OVERALL':
                df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/' + ideator_type]
            ratings = df[attribute]
            mean = statistics.mean(ratings)
            ratings_per_condition.append(mean)
            confidence_intervals.append(scipy.sem(ratings))

        ax = fig.add_subplot(1,2,attributes_count+1)
        plt.bar([i.capitalize() for i in ideator_types], ratings_per_condition, yerr = confidence_intervals, capsize = 10, color = colors)
        plt.grid(axis='x')
        plt.title(attribute.capitalize())
        plt.xticks(fontsize = 14)
        plt.yticks(fontsize = 14)
        if y_max != None: 
            if y_min == None:
                y_min = -1 * y_max
            plt.ylim(y_min, y_max)
        for i, v in enumerate(ideator_types):
            plt.text(i,
                  y_min + 0.035, 
                  round(ratings_per_condition[i],2), 
                  fontsize=18, 
                  horizontalalignment="center",
                  color='gray' if y_min != 0 else 'white')
    return fig



In [None]:
#plt.rcParams.update({'font.size': 15})
plot = draw_bar_plot_ratings('novelty', conditions = conditions, y_max = 0.2)
plot.savefig("iui20-novelty-ratings_per-type+condition.jpg", bbox_inches='tight', dpi = dpi)
plot = draw_bar_plot_ratings('value', conditions = conditions, y_max = 0.2)
plot.savefig("iui20-value-ratings_per-type+condition.jpg", bbox_inches='tight', dpi = dpi)
plot = draw_bar_plot_ratings_per_type(y_max = 0.1)
plot.savefig("iui20-ratings_per-type.jpg", bbox_inches='tight')
plt.show()

In [None]:
ideator_types = ['SEEKER', 'AVOIDER', 'OVERALL']
plot = draw_bar_plot_ratings('novelty', conditions = conditions, y_max = 1.1, y_min = 0, ratings_per_type = ratings_per_type_max)
plot.savefig("iui20-novelty-ratings_per-type+condition_max.jpg", bbox_inches='tight', dpi = dpi)
plot = draw_bar_plot_ratings('value', conditions = conditions, y_max = 1.1, y_min = 0, ratings_per_type = ratings_per_type_max)
plot.savefig("iui20-value-ratings_per-type+condition_max.jpg", bbox_inches='tight', dpi = dpi)
plot = draw_bar_plot_ratings_per_type(y_max = 1, y_min = 0, ratings_per_type = ratings_per_type_max)
plot.savefig("iui20-ratings_per-type_max.jpg", bbox_inches='tight')
plt.show()

In [None]:
ideator_types = ['SEEKER', 'AVOIDER', 'OVERALL']
plot = draw_bar_plot_ratings('novelty', conditions = conditions, y_max = 0, y_min = -1.2, ratings_per_type = ratings_per_type_min)
plot.savefig("iui20-novelty-ratings_per-type+condition_min.jpg", bbox_inches='tight', dpi = dpi)
plot = draw_bar_plot_ratings('value', conditions = conditions, y_max = 0, y_min = -1.2, ratings_per_type = ratings_per_type_min)
plot.savefig("iui20-value-ratings_per-type+condition_min.jpg", bbox_inches='tight', dpi = dpi)
plot = draw_bar_plot_ratings_per_type(y_max = 0, y_min = -1.2, ratings_per_type = ratings_per_type_min)
plot.savefig("iui20-ratings_per-type_min.jpg", bbox_inches='tight')
plt.show()

In [None]:
df = ratings_per_type
df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/SEEKER']
df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/on-idle']
seeker_on_idle_value = df['value']

df = ratings_per_type
df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/SEEKER']
df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/baseline']
seeker_baseline_value = df['value']

df = ratings_per_type
df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/AVOIDER']
df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/on-demand']
avoider_on_demand_value = df['value']

df = session_data
df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/SEEKER']
df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/on-idle']
seeker_on_idle_fluency = df['session2_numberOfSubmits']

df = session_data
df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/AVOIDER']
df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/on-demand']
avoider_on_demand_fluency = df['session2_numberOfSubmits']

df = session_data
df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/SEEKER']
df = df[df['session2_condition'].notna()]
seeker_fluency = df['session2_numberOfSubmits']

df = session_data
df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/AVOIDER']
df = df[df['session2_condition'].notna()]
avoider_fluency = df['session2_numberOfSubmits']

df = session_data
df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/baseline']
baseline_fluency = df['session2_numberOfSubmits']

df = session_data
df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/on-demand']
on_demand_fluency = df['session2_numberOfSubmits']

df = session_data
df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/on-idle']
on_idle_fluency = df['session2_numberOfSubmits']

print('==== Seeker Fluency vs Avoider Fluency ====')
display(scipy.f_oneway(seeker_fluency, avoider_fluency))
display(scipy.ttest_ind(seeker_fluency, avoider_fluency))
display(scipy.ttest_ind(seeker_fluency, avoider_fluency, equal_var=False))
print()

print('====Fluency by conditions====')
display(scipy.f_oneway(baseline_fluency, on_demand_fluency, on_idle_fluency))
display(scipy.ttest_ind(baseline_fluency, on_demand_fluency))
display(scipy.ttest_ind(baseline_fluency, on_idle_fluency))
display(scipy.ttest_ind(on_idle_fluency, on_demand_fluency))
print()

#plt.boxplot([avoider_on_demand_fluency, seeker_on_idle_fluency])


# equal_var = False ?
print('Value: Seeker on-idle vs Avoider on-demand:', scipy.ttest_ind(seeker_on_idle_value, avoider_on_demand_value, equal_var = False))
#print(scipy.f_oneway(seeker_on_idle_value, avoider_on_demand_value))
print()


print('Value: Seeker on-idle vs Seeker baseline:', scipy.ttest_ind(seeker_on_idle_value, seeker_baseline_value, equal_var = False))
print()

print('Fluency: Avoider on-demand vs Seeker on-idle:', scipy.ttest_ind(avoider_on_demand_fluency, seeker_on_idle_fluency, equal_var = False))

In [None]:
"""
metrics = ['novelty', 'value']

def all_t_tests():
    ideator_types = ['SEEKER', 'AVOIDER', 'OVERALL']
    conditions = ['baseline', 'on-demand', 'on-idle', 'OVERALL']
    ideator_type_condition = [(i, c) for i in ideator_types for c in conditions]
    def get_values(i, c, m):
        df = ratings_per_type
        if i != 'OVERALL':
            df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/' + i]
        if c != 'OVERALL':
            df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/' + c]
        else:
            df = df[df['session2_condition'].notna()]
        return df[m]

    p_values = []
    coeffs = []
    for m in metrics:    
        for i, c in ideator_type_condition:
            values1 = get_values(i, c, m)
            for i2, c2 in ideator_type_condition:
                values2 = get_values(i2, c2, m)
                t_test = scipy.ttest_ind(values1, values2, equal_var = False)
                p_values.append(t_test.pvalue)
                coeffs.append(t_test.statistic)
                print('%s: (%s, %s) vs (%s, %s):\n %s\n\n' % (m, i, c, i2, c2, t_test))

    #remove duplicates
    p_values = list(set(p_values))
    p_values.sort()
    print(p_values[:3])
    
    coeffs = list(set(coeffs))
    coeffs.sort()
    print(coeffs[:3])
all_t_tests()
"""

In [None]:
"""
ideator_types = ['SEEKER', 'AVOIDER', 'OVERALL']
ideator_type_condition = [(i, c) for i in ideator_types for c in conditions]

def get_values(i, c, m): 
    df = session_data
    if i != 'OVERALL':
        df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/' + i]
    if c != 'OVERALL':
        df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/' + c]
    else:
        df = df[df['session2_condition'].notna()]
    return df[m]

p_values = []
coeffs = []
m = 'session2_numberOfSubmits'
for i, c in ideator_type_condition:
    values1 = get_values(i, c, m)
    for i2, c2 in ideator_type_condition:
        values2 = get_values(i2, c2, m)
        t_test = scipy.ttest_ind(values1, values2, equal_var = False)
        p_values.append(t_test.pvalue)
        coeffs.append(t_test.statistic)
        print('%s: (%s, %s) vs (%s, %s):\n %s\n\n' % (m, i, c, i2, c2, t_test))

#remove duplicates
p_values = list(set(p_values))
p_values.sort()
print(p_values[:3])

coeffs = list(set(coeffs))
coeffs.sort()
print(coeffs[:3])
print(coeffs[3:])
"""

In [None]:
df = session_data[['session2_condition', 'session1_ideatorType', 'session2_numberOfSubmits']]
df = df[df['session2_condition'].notna()]
df = df[df['session1_ideatorType'] != 'http://purl.org/innovonto/ideatorType/UNDETERMINED']
linear_model=ols('session2_numberOfSubmits ~ C(session1_ideatorType)', data = df).fit()
print('ANOVA idea submits')
display(linear_model.summary())
display(sm.stats.anova_lm(linear_model, typ = 3))

df = ratings_per_type[['session2_condition', 'session1_ideatorType', 'novelty']]
df = df[df['session2_condition'].notna()]
linear_model_novelty=ols('novelty ~ C(session1_ideatorType)', data = df).fit()
print('ANOVA idea novelty')
display(linear_model_novelty.summary())
display(sm.stats.anova_lm(linear_model_novelty, typ = 3))

df = ratings_per_type[['session2_condition', 'session1_ideatorType', 'value']]
df = df[df['session2_condition'].notna()]
linear_model=ols('value ~ C(session1_ideatorType)', data = df).fit()
print('ANOVA idea value')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(linear_model.summary())
display(anova)

df = ratings_per_type[['session1_ideatorType', 'value', 'session2_condition']]
df = df[df['session2_condition'].notna()]
linear_model=ols('value ~ C(session1_ideatorType)', data = df).fit()
print('ANOVA idea value (type only)')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(anova)

df = ratings_per_type[['value', 'session2_condition']]
df = df[df['session2_condition'].notna()]
linear_model=ols('value ~ C(session2_condition)', data = df).fit()
print('ANOVA idea value (condition only)')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(anova)

df = session_data[['session1_ideatorType', 'session2_numberOfSubmits', 'session2_condition']]
df = df[df['session2_condition'].notna()]
linear_model=ols('session2_numberOfSubmits ~ C(session1_ideatorType)', data = df).fit()
print('ANOVA idea submits (type only)')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(anova)

df = session_data[['session1_ideatorType', 'session2_numberOfSubmits', 'session2_condition']]
df = df[df['session2_condition'].notna()]
linear_model=ols('session2_numberOfSubmits ~ C(session2_condition)', data = df).fit()
print('ANOVA idea submits (condition only)')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(anova)

In [None]:
df = ratings_per_type_max[['session1_ideatorType', 'novelty', 'session2_condition']]
#df = df[df['session2_condition'] != 'http://purl.org/innovonto/condition/on-idle']
#df = df[df['session1_ideatorType'] != 'http://purl.org/innovonto/ideatorType/SEEKER']
df = df[df['session2_condition'].notna()]
df = df[df['novelty'] > 0]
linear_model=ols('novelty ~ C(session1_ideatorType) * C(session2_condition)', data = df).fit()
print('ANOVA idea novelty_max ')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(anova)
display(linear_model.summary())
df['session1_ideatorType'] = df['session1_ideatorType'].apply(lambda x: 0 if x == 'http://purl.org/innovonto/ideatorType/SEEKER' else 1)
sns.regplot(x='session1_ideatorType', y='novelty', data=df)

on_idle = session_data[session_data['session2_condition'] == 'http://purl.org/innovonto/condition/on-idle']
on_idle = on_idle[on_idle['session1_ideatorType'] != 'http://purl.org/innovonto/ideatorType/SEEKER']
session_data[session_data['session2_numberOfSubmits'] < 8]['session2_numberOfSubmits'].plot(kind ='hist')

In [None]:
def get_anova(metric):
    df = ratings_per_type_max[[metric, 'session2_condition', 'session1_ideatorType']]
    df = df[df['session2_condition'].notna()]
    linear_model=ols(metric + ' ~ C(session2_condition) * C(session1_ideatorType)', data = df).fit()
    anova = sm.stats.anova_lm(linear_model, typ = 3)
    #display(linear_model.summary())
    #display(linear_model.t_test_pairwise('C(session2_condition)').result_frame)
    display(linear_model.t_test_pairwise('C(session1_ideatorType)').result_frame)
    #display(linear_model.wald_test_terms())
    
    #display(anova)
    
get_anova('novelty')


In [None]:

df = ratings_per_type_max
ideatorTypes = (df['session1_ideatorType'].apply(lambda x:str(x).replace("http://purl.org/innovonto/ideatorType/",""))
                                .apply(lambda x:str(x).capitalize()))
conditions = df['session2_condition'].apply(lambda x:str(x).replace("http://purl.org/innovonto/condition/",""))
sns.swarmplot(ideatorTypes, df['novelty'], color = '.3')
sns.boxplot(ideatorTypes, df['novelty'])
plt.show()
sns.swarmplot(conditions, df['novelty'])
plt.show()
sns.swarmplot(ideatorTypes, df['value'])
plt.show()
sns.swarmplot(conditions, df['value'])
plt.show()


def make_boxplot(type):
    df = ratings_per_type_max
    df = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/' + type]
    conditions = df['session2_condition'].apply(lambda x:str(x).replace("http://purl.org/innovonto/condition/",""))
    sns.boxplot(conditions, df['novelty'], order=['baseline', 'on-demand', 'on-idle'])
    plt.ylim(-.5, 1.8)
    
make_boxplot('SEEKER')
plt.show()
make_boxplot('AVOIDER')
plt.show()

In [None]:
df = ratings_per_type[['session2_condition', 'session1_ideatorType', 'value']]
df = df[df['session2_condition'].notna()]
linear_model=ols('value ~ C(session1_ideatorType) + C(session2_condition)', data = df).fit()
print('ANOVA idea value')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(anova)

df = ratings_per_type[['session2_condition', 'session1_ideatorType', 'value']]
df = df[df['session2_condition'].notna()]
linear_model=ols('value ~ C(session1_ideatorType) * C(session2_condition)', data = df).fit()
print('ANOVA idea value')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(anova)

df = ratings_per_type[['session2_condition', 'session1_ideatorType', 'value']]
df = df[df['session2_condition'].notna()]
linear_model=ols('value ~ C(session1_ideatorType, Sum) * C(session2_condition, Sum)', data = df).fit()
print('ANOVA idea value')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(anova)

In [None]:
df = session_data[['session2_condition', 'session1_ideatorType', 'session2_numberOfSubmits']]
df = df[df['session2_condition'].notna()]
linear_model=ols('session2_numberOfSubmits ~ C(session1_ideatorType) * C(session2_condition)', data = df).fit()
print('ANOVA idea value')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(anova)
display(linear_model.summary())

df = session_data[['session2_condition', 'session1_ideatorType', 'session1_numberOfSubmits']]
df = df[df['session2_condition'].notna()]
linear_model=ols('session1_numberOfSubmits ~ C(session1_ideatorType)', data = df).fit()
print('ANOVA idea value')
anova = sm.stats.anova_lm(linear_model, typ = 3)
display(anova)
display(linear_model.summary())

In [None]:
df = session_data
seeker = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/SEEKER']
avoider = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/AVOIDER']
#display(avoider)

plt.rcParams.update({'font.size': 20})

def survey(survey_data, title, answer_count):
    category_names = ['Strongly disagree', 'Disagree',
                  'Neutral', 'Agree', 'Strongly agree']
    questions = ['Inspirations were distracting',
                 'Inspirations were diverse',
                 'I had better ideas with inspirations',
                'I was satisfied with given inspirations'
                ]
    fields = ['inspirations-were-distracting',
              'inspirations-were-diverse',
              'better-ideas-with-inspirations', 
              'satisfied-with-inspirations', 
              ]
    values = [survey_data.groupby(field).size().iloc[::-1] for field in fields]
    data = ( np.array(values) / answer_count ) * 100 
    data_cum = data.cumsum(axis=1)
    display(data)
    display(data_cum)
    category_colors = ['#D38586', '#E5BCBC', '#F4F2D0', '#CEE8D6', '#A7E2BA']
    #category_colors = plt.get_cmap('RdYlGn')(
    #    np.linspace(0.15, 0.85, data.shape[1]))

    fig, ax = plt.subplots(figsize=(9.2, 5))
    ax.invert_yaxis()
    ax.xaxis.set_visible(False)
    ax.set_xlim(0, np.sum(data, axis=1).max())
    plt.title(title, y = 1.1, fontsize = 18)

    plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    )
    for i, (colname, color) in enumerate(zip(category_names, category_colors)):
        widths = data[:, i]
        starts = data_cum[:, i] - widths
        ax.barh(questions, widths, left=starts, height=0.5,
                label=colname, color=color)
        xcenters = starts + widths / 2

        #r, g, b, _ = color
        text_color = 'white' if i != 2 and i != 3 else 'darkgrey'
        for y, (x, c) in enumerate(zip(xcenters, widths)):
            ax.text(x, y, str(int(round(c))), ha='center', va='center',
                    color=text_color, fontsize=16)
    ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
              loc='lower left', fontsize='10')

    return fig, ax


plot, ax = survey(seeker, 'Seeker', 200)
plot.savefig("iui20-inspirations-survey_seeker.jpg", bbox_inches='tight', dpi = dpi)
plot, ax = survey(avoider, 'Avoider', 59)
plot.savefig("iui20-inspirations-survey_avoider.jpg", bbox_inches='tight', dpi = dpi)

In [None]:
df = session_data.copy()
plt.rcParams.update({'font.size': 15})
#display(df)
df['age-group'] = df['age-group'].apply(lambda x:str(x).replace(" years old",""))
plot = (df.replace({'f': 'female', 'm': 'male', 'o': 'other'})
 .groupby(['age-group', 'gender'])
.size().unstack().plot(kind = 'bar', stacked = True, color = colors, figsize=(12,7)))
plt.grid(axis='x')
plt.xticks(rotation='horizontal')
plt.xlabel('Age groups (years)')
plt.ylabel('Participants')

plot.figure.savefig("iui20-age+gender.jpg", bbox_inches='tight', dpi = dpi)

In [None]:
df = session_data.copy()
df['session1_ideatorType'] = (df['session1_ideatorType']
                              .apply(lambda x:str(x).replace("http://purl.org/innovonto/ideatorType/",""))
                                .apply(lambda x:str(x).capitalize()))
plt.rcParams.update({'font.size': 15})
plot = df.groupby(['session1_ideatorType']).size().plot(kind = 'bar', color = colors)
plt.xticks(rotation=15)
plt.grid(axis='x')
plt.xlabel('Ideator types')
plt.ylabel('Participants')
for i, v in enumerate(df.groupby(['session1_ideatorType']).size()):
            plt.text(i,
                  10, 
                  v, 
                  fontsize=15, 
                  horizontalalignment="center",
                  color='white')
plot.figure.savefig("iui20-ideator-types-count-session1.jpg", bbox_inches='tight', dpi = dpi)


In [None]:
display(df['fulltext-feedback'][df['fulltext-feedback'].apply(lambda x: isinstance(x, str) and len(x) > 2)])

In [None]:
display(session_data.columns)

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

#display(session_data)

data = session_data
data = data[data['session2_condition'].notna()]

#display(sm.datasets.get_rdataset("dietox", "geepack").data)

def get_value_str(row, column_name):
    return (str(row[column_name])
            .replace('http://purl.org/innovonto/condition/', '')
            .replace('http://purl.org/innovonto/ideatorType/', ''))

def get_value(row, column_name):
    mappings = {
        'SEEKER': 0,
        'AVOIDER': 1,
        'baseline': 0,
        'on-demand': 1,
        'on-idle': 2
    }
    return mappings[get_value_str(row, column_name)]
rows = []
for i, row in data.iterrows():
    rows.append([
                 i, 
                 '1',
                 get_value(row, 'session1_ideatorType'),
                 '1',
                 row['session1_numberOfSubmits'] / 10
                ])
    rows.append([i, 
                 '2',
                 get_value(row, 'session1_ideatorType'),
                 get_value(row, 'session2_condition'),
                 row['session2_numberOfSubmits'] / 15
                ])
    
df = pd.DataFrame(rows,
                  columns = ['worker_id', 
                             'session', 
                             'ideator_type', 
                             'condition', 
                             'submits'])

#display(df)
               

md = smf.mixedlm("submits ~ C(ideator_type)", df, groups=df['worker_id'])

mdf = md.fit()

print(mdf.summary())


# linear_model=ols('submits ~ C(ideator_type)', data = df).fit()
# print('ANOVA idea value')
# anova = sm.stats.anova_lm(linear_model, typ = 3)
# display(anova)
# display(linear_model.summary())


In [None]:
data = session_data
data = data[data['session2_condition'] == 'http://purl.org/innovonto/condition/on-demand']
pd.set_option('display.max_columns', None)

def get_ideator_type_match():
    for ideator_type in ['SEEKER', 'AVOIDER']:
        type_sessions = data[data['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/' + ideator_type]
        print('==== %s ====' % ideator_type)
        print('UNMOTIVATED: %s' % type_sessions[type_sessions['session2_numberOfSubmits'] < 3].count()[0])
        type_sessions = type_sessions[type_sessions['session2_numberOfSubmits'] >= 3]
        avoider_count = type_sessions[type_sessions['session2_numberOfRequests'] < 2].count()[0]
        print('AVOIDER: %s' % avoider_count)
        seeker_count = type_sessions[type_sessions['session2_numberOfRequests'] > 6].count()[0]
        print('SEEKER: %s' % seeker_count)
        print('UNDETERMINED: %s' % (len(type_sessions.index) - avoider_count - seeker_count))
        print()

get_ideator_type_match()

In [None]:
survey_data = session_data[["session1_ideatorType", "session2_condition", "tlx-effort", "tlx-frustration", "tlx-mental", "tlx-performance", "tlx-temporal"]]
survey_data = survey_data.rename(columns = {'tlx-effort': 'effort', 
                                            'tlx-frustration': 'frustration',
                                            'tlx-mental': 'mental', 
                                            'tlx-performance': 'performance',
                                             'tlx-temporal': 'temporal'})
survey_data = survey_data[survey_data['session1_ideatorType'] != 'http://purl.org/innovonto/ideatorType/UNMOTIVATED']
survey_data = survey_data[survey_data['session1_ideatorType'] != 'http://purl.org/innovonto/ideatorType/UNDETERMINED']
#survey_data = survey_data[survey_data['session2_condition'].notna()]
# survey_data.groupby('session1_ideatorType').mean().plot(kind = 'bar', legend=False)

# linear_model=ols('effort ~ C(session1_ideatorType)', data = survey_data).fit()
# display(linear_model.summary())

# linear_model=ols('frustration ~ C(session1_ideatorType)', data = survey_data).fit()
# display(linear_model.summary())

# linear_model=ols('mental ~ C(session1_ideatorType)', data = survey_data).fit()
# display(linear_model.summary())

# linear_model=ols('performance ~ C(session1_ideatorType)', data = survey_data).fit()
# display(linear_model.summary())

# linear_model=ols('temporal ~ C(session1_ideatorType)', data = survey_data).fit()
# display(linear_model.summary())

df = survey_data
ideator_type = df['session1_ideatorType'].apply(lambda x:str(x).replace("http://purl.org/innovonto/ideatorType/","")).apply(lambda x:str(x).capitalize())
plot = sns.violinplot(ideator_type, df['frustration'], palette = colors)
plt.xlabel('Ideator Type')
plt.ylabel('Frustration')
plt.show()
plot.figure.savefig("iui20-tlx-frustration.jpg", bbox_inches='tight', dpi = dpi)

plot = sns.violinplot(ideator_type, df['temporal'], palette = colors)
plt.xlabel('Ideator Type')
plt.ylabel('Temporal')
plt.show()
plot.figure.savefig("iui20-tlx-temporal.jpg", bbox_inches='tight', dpi = dpi)

In [None]:
answers = ['before', 'in-the-middle', 'out-of-ideas', 'bored', 'did-not']
survey_data = session_data[["session1_ideatorType", "session2_condition", "request-timing", "session2_numberOfRequests"]]
#survey_data = survey_data[survey_data['session2_condition'].notna()]
survey_data = survey_data[survey_data['session1_ideatorType'] != 'http://purl.org/innovonto/ideatorType/UNMOTIVATED']
survey_data = survey_data[survey_data['session1_ideatorType'] != 'http://purl.org/innovonto/ideatorType/UNDETERMINED']
survey_data = survey_data.rename(columns = {'request-timing': 'request_timing'})
survey_data['all'] = 1

for answer in answers:
    survey_data[answer.replace('-', '_')] = survey_data['request_timing'].str.split('|').apply(lambda x: 0 if (type(x) != list or answer not in x) else 1)

survey_data['no_answer'] = survey_data['request_timing'].str.split('|').apply(lambda x: 1 if type(x) != list else 0)
for answer in [x.replace('-', '_') for x in answers]:    
    linear_model=ols(answer + ' ~ C(session1_ideatorType)', data = survey_data).fit()
    display(linear_model.summary())

# sns.catplot(x="sex", y="survived", hue="class", kind="bar", data=titanic);
display(survey_data.groupby('session1_ideatorType').sum())

#survey_data['request-timing'].str.split('|').apply(lambda x: Series(1,index=x)).fillna(0).astype(bool)
#pd.concat([survey_data,survey_data['request-timing'].str.split('|').apply(lambda x: Series(1,index=x)).fillna(0).astype(bool)],axis=1)

survey_data = survey_data[survey_data['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/AVOIDER']
survey_data = survey_data[survey_data['session2_condition'] == 'http://purl.org/innovonto/condition/on-demand']
survey_data = survey_data[survey_data['session2_numberOfRequests'] > 1]
display(survey_data)

In [None]:
df = session_data
df = df[df['session2_condition'] == 'http://purl.org/innovonto/condition/on-idle']
avoider = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/AVOIDER']
seeker = df[df['session1_ideatorType'] == 'http://purl.org/innovonto/ideatorType/SEEKER']

#display(df)
linear_model=ols('session2_numberOfRequests ~ C(session1_ideatorType)', data = df).fit()
display(linear_model.summary())

display(scipy.f_oneway(avoider['session2_numberOfRequests'], seeker['session2_numberOfRequests']))
display(scipy.ttest_ind(avoider['session2_numberOfRequests'], seeker['session2_numberOfRequests']))