In [None]:
from collections import Counter
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
import matplotlib.lines as mlines
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn as sns

mpl.rcParams['figure.dpi'] = 300


In [None]:
# This dataset requires access permmissions.
# See https://osf.io/urv7w/
df = pd.read_excel('https://osf.io/us5xm/download', engine='openpyxl')


In [None]:
df['Underadvantaged'] = df['Member of Underrepresented Group']
df['Workshop Experience'] = df['How would you rate your experience with the workshop? (POST)']

for variable in [
    'How comfortable are you with C++?',
    'How comfortable are you with JavaScript?',
    'How comfortable are you with HTML/CSS?',
    'How comfortable are you with evolutionary biology?',
    'Which describes your overall level of proficiency with programming?',
    'Which describes your formal background with evolutionary biology?',
]:
  df[f'{variable} (DELTA)'] = df[f'{variable} (POST, CODED)'] - df[f'{variable} (PRE, CODED)']


In [None]:
for variable in [
    'How comfortable are you with C++?',
    'How comfortable are you with JavaScript?',
    'How comfortable are you with HTML/CSS?',
    'How comfortable are you with evolutionary biology?',
    'Which describes your overall level of proficiency with programming?',
    'Which describes your formal background with evolutionary biology?',
]:
  fig, (ax1, ax2, ax3) = plt.subplots(1, 3)
  fig.set_size_inches((18,3))
  fig.suptitle(variable)

  pairs = zip(
    df[f'{variable} (PRE, CODED)'],
    df[f'{variable} (POST, CODED)'],
  )

  sns.set_color_codes(palette='muted')
  for pair, count in Counter(pairs).items():
    pre, post = pair
    ax1.plot([pre, post], color='k', linewidth=count)

  for item, count in Counter(df[f'{variable} (PRE, CODED)']).items():
    ax1.plot(
      0,
      item,
      marker='o',
      markersize=count*3,
      color='b',
      markerfacecolor='b',
    )

  for item, count in Counter(df[f'{variable} (POST, CODED)']).items():
    ax1.plot(
      1,
      item,
      marker='o',
      markersize=count*3,
      color='b',
      markerfacecolor='b',
    )
  ax1.yaxis.set_major_locator(MaxNLocator(integer=True))
  ax1.set_xticks([0,1])
  ax1.set_xticklabels(['pre', 'post'])
  ax1.set_xlim([-0.25, 1.75])
  ax1.set_ylim([0.5, ax1.get_ylim()[1]+0.5])
  ax1.set_ylabel('Response')
  
  handles = [
    mlines.Line2D([], [], color='b', marker='o', linestyle='None',
                          markersize=x*3, label=str(int(x)))
    for x in np.linspace(1,10,3)
  ]
  legend = ax1.legend(handles=handles, loc="upper right", title="Respondents", labelspacing=1.5)
  ax1.add_artist(legend)

  handles = [
    mlines.Line2D([], [], color='k', linewidth=x, label=str(int(x)))
    for x in np.linspace(1,10,3)
  ]
  legend = ax1.legend(handles=handles, loc="lower right", title=None)
  ax1.add_artist(legend)

  sns.histplot(
    data=df,
    x=f'{variable} (DELTA)',
    discrete=True,
    ax=ax2,
  )
  ax2.yaxis.set_major_locator(MaxNLocator(integer=True))
  ax2.xaxis.set_major_locator(MaxNLocator(integer=True))
  ax2.set_xlabel('Response Delta')
  
  sns.barplot(
    data=df,
    y=f'{variable} (DELTA)',
    ax=ax3,
  )
  ax3.set_ylabel('Response Delta')
  ax3.axhline(y=0, c='k')

plt.show()


In [None]:
for variable in [
    'How comfortable are you with C++?',
    'How comfortable are you with JavaScript?',
    'How comfortable are you with HTML/CSS?',
    'How comfortable are you with evolutionary biology?',
    'Which describes your overall level of proficiency with programming?',
    'Which describes your formal background with evolutionary biology?',
]:
  fig, (ax1, ax2, ax3) = plt.subplots(1, 3)
  fig.set_size_inches((18,3))
  fig.suptitle(variable)

  sns.barplot(
      data=df.melt(
          value_vars=[
            f'{variable} (PRE, CODED)',
            f'{variable} (POST, CODED)',
          ],
          id_vars=[
            'Underadvantaged',
          ],
      ),
      y=f'value',
      x='variable',
      hue='Underadvantaged',
      ax=ax1,
  )
  ax1.yaxis.set_major_locator(MaxNLocator(integer=True))
  ax1.set_xticks([0,1])
  ax1.set_ylim([0.25, df[f'{variable} (POST, CODED)'].max() + 0.75])
  ax1.set_xticklabels(['pre', 'post'])
  ax1.set_ylabel('Response')
  h, l = ax1.get_legend_handles_labels()
  ax1.legend(h, ['no', 'yes'], title='Underadvantaged')
  ax1.set_xlabel('')

  sns.histplot(
    data=df,
    x=f'{variable} (DELTA)',
    hue='Underadvantaged',
    multiple='dodge',
    discrete=True,
    shrink=0.8,
    ax=ax2,
    legend=False,
  )
  ax2.yaxis.set_major_locator(MaxNLocator(integer=True))
  ax2.xaxis.set_major_locator(MaxNLocator(integer=True))
  ax2.set_xlabel('Response Delta')
  ax2.legend(title='Underadvantaged', labels=['yes', 'no'])

  sns.barplot(
    data=df,
    y=f'{variable} (DELTA)',
    x='Underadvantaged',
    ax=ax3,
  )
  ax3.set_ylabel('Response Delta')
  ax3.set_xticks([0,1])
  ax3.set_xticklabels(['no', 'yes'])
  ax3.axhline(y=0, c='k')

plt.show()


In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_size_inches((12,3))
fig.suptitle('Participant Satisfaction')

df.groupby('Workshop Experience').size().plot.pie(ax=ax1)
ax1.set_ylabel('All Participants')

df2 = df.groupby(['Underadvantaged', 'Workshop Experience']).size().unstack('Workshop Experience')
df2.plot(kind='bar', stacked=True, ax=ax2)
ax2.set_xticks([0,1])
ax2.set_xticklabels(['no', 'yes']) 
plt.xticks(rotation=0)
