In [None]:
import pandas as pd
import numpy as np

import scipy.stats as scs
import statsmodels.api as sm
import statsmodels.stats as st
from statsmodels.stats.proportion import proportion_confint

from matplotlib import gridspec, rcParams
from matplotlib.ticker import FuncFormatter
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns

sns.set()

from IPython.display import display, Markdown

%matplotlib inline

In [None]:
# create output directory if it doesn't exist
import os
if not os.path.exists('plots'):
   os.makedirs('plots')

In [None]:
def split_multiselect(col):
    'Split categorical multiple-select column into multiple Boolean columns'

    df = pd.concat([pd.Series(index=str(x).split(','), data=1) for x in col],
                   axis=1, sort=False).transpose().set_index(col.index)
    
    return df.drop(columns='nan').fillna(0)

def print_unique_categories(x):
    'Print unique categories in list of comma-sep multi-select strings'

    return set(','.join([str(xx) for xx in x]).split(','))

def dm(t):
    display(Markdown(t))

In [None]:
# Load enrollments
enr = pd.read_csv('data/enrollments_pub.csv', header=0)
enr['one'] = 1.
enr['level'] = 'INTRO'

enr_adv = pd.read_csv('data/enrollments_adv_pub.csv', header=0)
enr_adv['one'] = 1.
enr_adv['level'] = 'ADV'

# Load pre-surveys
pre = pd.read_csv('data/pre_survey_pub.csv', header=0).dropna(axis=0, how='all')
pre['one'] = 1.

# Load post-surveys
pst = pd.read_csv('data/pst_survey_pub.csv', header=0).dropna(axis=0, how='all')
pst['one'] = 1.

# # Load pre and post questions
pre_q = pd.read_csv('data/pre_survey_question_info.csv',
                    header=0, index_col=0)

pst_q = pd.read_csv('data/pst_survey_question_info.csv',
                    header=0, index_col=0)

In [None]:
enr['activity_quiz_count'] = enr[['activity_q{:d}'.format(i) for i in range(1, 7)]].sum(axis=1)
enr['activity_pga_count'] = enr[['activity_pga{:d}'.format(i) for i in range(1, 4)]].sum(axis=1)
enr['activity_vid_week_count'] = \
    (enr[['activity_vid_week_{:d}'.format(i) for i in range(1, 7)]] > 1).sum(axis=1)

# Definitions

- __Registrant__: A person who registered for the MOOC
- __Learner__: A registrant who completed at least two quizzes, completed a PGA, or watched videos from at least three of the six course modules.
- __Completer__: A Learner who completed a combination of quizzes (weighted 60% together for four highest scores) and PGAs (individually weighted 10%, 10%, and 20%) to get an overall score of 50%.
- __Auditor__: A Learner who did not meet the criteria for Completion.

In [None]:
enr['activity_auditor'] = enr['activity_learner'] & (~enr['activity_completed'])
enr_adv['activity_auditor'] = enr_adv['activity_learner'] & (~enr_adv['activity_completed'])

enr['learner_category'] = 'Non-learner'
enr.loc[enr['activity_completed'] == 1, 'learner_category'] = 'Learner: completer'
enr.loc[(enr['activity_auditor'] == 1) , 'learner_category'] = 'Learner: auditor'

### Week-by-week engagement

Determine whether a registrant was engaged during each of the 6 weeks and calculate the total # of weeks engaged.

In [None]:
# Pivot to long: each row is a (user):(week)
# User is engaged in week n if they complete Quiz n or watch more than 0 videos in week n
x = pd.wide_to_long(enr, stubnames=['activity_q', 'activity_vid_week_'], i='unique_user_id', j='week')
x['engaged'] = (x['activity_q'] > 0) | (x['activity_vid_week_'] > 0)
enr = enr.merge(x.groupby('unique_user_id').sum()['engaged'].rename('activity_weeks_engaged'),
                left_on='unique_user_id', right_index=True)

In [None]:
# Intend to pursue academic job
job_categories = {'Community college',
                  'Research intensive university faculty position',
                  'Undergraduate-focused college or university'}

pre['academic_job'] = pre['QID363'].apply(lambda x: len(set(str(x).split(',')) & job_categories) > 0)

# Intent to be active
pre['intent_active'] = (pre['QID366'].apply(lambda x: 'complete most of the quizzes' in str(x)) |
                        pre['QID366'].apply(lambda x: 'complete the peer assessments' in str(x)))

# Matched survey data

The first two iterations of the course were offered on Coursera, and the pre/post surveys were linked to course activity

In [None]:
# Coursera courses (linked survey and completion stats)
pre_c = pre[pre['course_name'].isin(['INTRO Fall 2014',
                                     'INTRO Fall 2015'])]
pst_c = pst[pst['course_name'].isin(['INTRO Fall 2014',
                                     'INTRO Fall 2015'])]

enr_c = enr[enr['course_name'].isin(['INTRO Fall 2014',
                                     'INTRO Fall 2015'])]

print('           Enrollments  Pre-surveys  Post-surveys')
print('All INTRO:   {:9d}    {:9d}     {:9d}'.format(len(enr), len(pre), len(pst)))
print('COURSERA:    {:9d}    {:9d}     {:9d}'.format(len(enr_c), len(pre_c), len(pst_c)))

print('\nINTRO: {:5.1f}% of students complete pre-survey'.format(100.*len(pre) / len(enr)))
print('INTRO: {:5.1f}% of students complete post-survey'.format(100.*len(pst) / len(enr)))

In [None]:
# Linked pre- and post-survey data, including activity data
pre_pst = pst_c.merge(pre_c, how='inner', on='unique_user_id')\
    .merge(enr_c, how='left', on='unique_user_id')
pre_pst['one'] = 1.
print('{:d} matching pre-post survey pairs'.format(len(pre_pst)))

In [None]:
# Linked Pre-survey and activity data
pre_enr = pre_c.merge(enr_c, how='left', on='unique_user_id')
pre_enr['one'] = 1.
print('{:d} matching pre-survey/enrollment pairs'.format(len(pre_enr)))

In [None]:
# Linked post-survey and activity data
pst_enr = pst_c.merge(enr_c, how='left', on='unique_user_id')
pst_enr['one'] = 1.
print('{:d} matching post-survey/enrollment pairs'.format(len(pst_enr)))

# Basic enrollment and completion stats

In [None]:
t = pd.concat([enr, enr_adv], join='inner')\
    .groupby('level')\
    .aggregate({'course_name': 'nunique',
                'one': 'sum',
                'activity_learner': 'sum',
                'activity_completed': 'sum',
                'activity_auditor': 'sum',
                'activity_weeks_engaged': lambda x: (x == 6).sum()
               })\
    .rename(columns={'course_name': 'Offerings',
                     'one': 'Total enrollments',
                     'activity_learner': 'Total learners',
                     'activity_completed': 'Completers',
                     'activity_auditor': 'Auditors',
                     'activity_weeks_engaged': 'Engaged all 6 weeks'})\
    .transpose()[['INTRO', 'ADV']]

### Totals

In [None]:
t.style.format('{:.0f}')

### As percentage of Total enrollments

In [None]:
(t.iloc[1:] / t.loc['Total enrollments']).style.format('{:.1%}')

### As percentage of Learners

In [None]:
(t.iloc[2:] / t.loc['Total learners']).style.format('{:.0%}')

## Course Engagement

In [None]:
plt.rc('font', size=8)          # controls default text sizes
plt.rc('axes', titlesize=8)     # fontsize of the axes title
plt.rc('axes', labelsize=10)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=8)    # fontsize of the tick labels
plt.rc('ytick', labelsize=8)    # fontsize of the tick labels
plt.rc('legend', fontsize=8)    # legend fontsize
plt.rc('figure', titlesize=12)  # fontsize of the figure title

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(6.5, 3))

cm_blue = sns.cubehelix_palette(start=2.8, rot=0.1, as_cmap=True)

## Video/quiz engagement grid
sns.heatmap(pd.crosstab(enr['activity_vid_week_count'],
                        enr['activity_quiz_count'])
            .sort_index(ascending=False),
            annot=True, fmt='d', vmax=600, ax=ax[0], cmap=cm_blue, cbar=False)

# Draw boundary around learner category
ax[0].plot(np.array([0.03, 0.03, 6.95, 6.95, 2., 2., 0]),
           7 - np.array([3., 6.97, 6.97, 0.05, 0.05, 3., 3.]), '#00aaaa', linewidth=3)

ax[0].set_ylabel('Number of weeks watching videos');
ax[0].set_xlabel('Number of quizzes attempted');


## Fraction of users actively watching videos each week
vid_wk_cols = ['activity_vid_week_{:d}'.format(i) for i in range(1, 7)]
vid_wks = enr[vid_wk_cols]\
    .rename(columns=lambda x: x[-1])

cb = sns.color_palette('Blues_r', 3)
cr = sns.color_palette('Reds', 2)
t = (100.*(vid_wks > 1).groupby(enr['learner_category']).sum().transpose() / len(enr))

b1 = ax[1].bar(t.index, t['Learner: completer'], color=cb[0]) 
b2 = ax[1].bar(t.index, t['Learner: auditor'], bottom=t['Learner: completer'], color=cb[1])
b3 = ax[1].bar(t.index, t['Non-learner'], bottom=t['Learner: completer']+t['Learner: auditor'], color=cr[0])

ax[1].set_facecolor("white")
ax[1].spines["top"].set_visible(False)
ax[1].spines["right"].set_visible(False)
ax[1].spines["left"].set_visible(False)
ax[1].grid(False)
ax[1].grid(which="major", axis="y", color="#cccccc")
ax[1].set_xlabel('Module / Course week');
ax[1].set_ylabel('Percent of users who watched >1 video');
ax[1].yaxis.set_major_formatter(FuncFormatter(lambda f, _: '{:.0f}%'.format(f)))

ax[1].legend([b3[0], b2[0], b1[0]], 
             ['Non-learner', 'Learner: auditor', 'Learner: completer'  ], 
             title='', loc="upper right", facecolor="white", framealpha=1, frameon=True, edgecolor='w',
            borderaxespad=0)

plt.tight_layout(w_pad=3)
plt.savefig("plots/Fig1-v3.png")
plt.savefig("plots/Fig1-v3.svg")

In [None]:
# Note about PGA completers who aren't counted within the red boundary
print('...a small number ({:d}) of learners who completed a PGA but few '
      'quizzes may not fall within the red boundary.'
      .format(((enr['activity_pga_count'] > 0) &
               (enr['activity_quiz_count'] < 2) &
               (enr['activity_vid_week_count'] < 3)).sum()))

In [None]:
# Video/Quiz activity matrix
# Rows    = total number of video modules watched (>1 video counts as "watched" the module)
# Columns = total number of quizzes completed
pd.crosstab(enr['activity_vid_week_count'],
            enr['activity_quiz_count']).sort_index(ascending=True)\
    .to_csv('plots/vid_quiz_matrix.csv')

# Stacked bar chart: number of users who watched > 1 videos per module
# Rows    = module number
# Columns = engagement category
(100.*(vid_wks > 1).groupby(enr['learner_category']).sum().transpose() / len(enr))\
    .to_csv('plots/vid_module_by_learner.csv')

## Reaching our audience
### Overlap between pre-survey, post-survey, completers, and learners

In [None]:
act_c = enr_c[enr_c['activity_learner'] == 1]
com_c = enr_c[enr_c['activity_completed'] == 1]
all6_no_com = enr_c[(enr_c['activity_completed'] == 0) &
                    (enr_c['activity_weeks_engaged'] == 6)]

dm('Over the seven instances of the introductory course, '
   '{:d} students ({:.0%} of enrolled) took the pre-course survey.'
   .format(len(pre), len(pre) / len(enr)))

dm('{:.1%} of learners responded to pre-course survey'
    .format(act_c['unique_user_id'].isin(pre_c['unique_user_id']).sum()
            /len(act_c)))

dm('{:.1%} of pre-survey respondents were Learners'\
    .format(pre_c['unique_user_id'].isin(act_c['unique_user_id']).sum()
            /len(pre_c)))

dm('{:.1%} of pre-survey respondents completed the course'\
    .format(pre_c['unique_user_id'].isin(com_c['unique_user_id']).sum()
            / len(pre_c)))

dm('{:.1%} of course completers responded to post-survey'\
    .format(com_c['unique_user_id'].isin(pst_c['unique_user_id']).sum()
            / len(com_c)))

dm('{:.1%} of post-survey respondents completed the course'\
    .format(pst_c['unique_user_id'].isin(com_c['unique_user_id']).sum()
            / len(pst_c)))

dm('{:.1%} of post-survey respondents were learners'\
    .format(pst_c['unique_user_id'].isin(act_c['unique_user_id']).sum()
            / len(pst_c)))

dm('An additional {:.1%} of post-survey respondents were engaged all 6 weeks'\
    .format(pst_c['unique_user_id'].isin(all6_no_com['unique_user_id']).sum()
            / len(pst_c)))

### Role

In [None]:
roles = pd.concat([pre['QID334_p'].value_counts(),
                   pre['QID334_p'].value_counts()/pre['QID334_p'].notna().sum(),
                   pst['QID334_p'].value_counts(),
                   pst['QID334_p'].value_counts()/pst['QID334_p'].notna().sum()],
                  axis=1, keys=['Pre total', 'Pre %', 'Post total', 'Post %'], sort=True)

dm('__Based on responses (N/A not included in denominators)__')

dm('PhD students and Post-docs make up {:.0%} of pre-survey respondents'
   .format(roles.loc[['Doctoral student',
                      'Post-doctoral researcher'], :].sum()['Pre %']))

dm('PhD students and Post-docs make up {:.0%} of post-survey respondents'
   .format(roles.loc[['Doctoral student',
                      'Post-doctoral researcher'], :].sum()['Post %']))

dm('Faculty make up {:.0%} of pre-survey respondents'
   .format(roles.loc['Faculty', 'Pre %']))

dm('Faculty make up {:.0%} of post-survey respondents'
   .format(roles.loc['Faculty', 'Post %']))

### Academic field

In [None]:
fields = pd.concat([pre['QID356'].value_counts(),
                    pre['QID356'].value_counts()/pre['QID356'].notna().sum()],
                   axis=1, keys=['Total', '%'])

display(fields)

dm('STEM/SBE fields account for {:.0%} of pre-survey respondents.'
   .format(1. - fields.loc[['Health Fields', 'None of the above']].sum()['%']))

### Intent to teach

In [None]:
sub_grp = ['Doctoral student', 'Post-doctoral researcher']

pre['stem'] = ~pre['QID356'].isin(['Health Fields', 'None of the above']) & pre['QID356'].notna()

n_all = len(pre)

n_ppd = pre['QID334_p'].isin(sub_grp).sum()

n_stem = (pre['QID334_p'].isin(sub_grp) &
          pre['stem']).sum()

n_ff = (pre['QID334_p'].isin(sub_grp) &
        pre['stem'] &
        pre['academic_job']).sum()

print('{:d} pre-survey respondents'.format(n_all))
print('{:d} PhDs and Post-docs'.format(n_ppd))
print('{:d} STEM PhDs and Post-docs'.format(n_stem))
print('{:d} STEM PhDs and Post-docs who intend to teach'.format(n_ff))
print('{:d} pre-survey respondents intend to teach'.format(pre['academic_job'].sum()))

print('{:.0%} of STEM PhDs and Post-docs intend to teach'
      .format(n_ff / n_stem))

## Outcomes
### Completion rates

In [None]:
dm('Overall, {:.0%} of pre-course survey respondents completed the course'
   .format(pre_enr['activity_completed'].sum() / len(pre_enr)))

In [None]:
def completion_comparison(var, outcome, ax):

    ct = pd.crosstab(var, outcome)
    rates = ct.loc[:, 1.0] / ct.sum(axis=1)
    print (ct.sum().sum())
    OR, p_val = scs.fisher_exact(ct, alternative='greater')
    
    # Print summary statistics
    print('-------------------------------------')
    print('Variable: {:s}'.format(var.name))
    print('Number TRUE {:d}'.format(ct.loc[True, :].sum()))
    print('%: {:.0%}'.format(ct.loc[True, :].sum()/var.shape[0]))
    print('Completion rate TRUE: {:.0%}'.format(rates[True]))
    print('Completion rate FALSE: {:.0%}'.format(rates[False]))
    print('p-value: {:.3e}'.format(p_val))
    
    # Plot completion rate comparison
    rates.plot(kind='bar', ax=ax, rot=0)
    
    for p in ax.patches:
        ax.annotate('{:.0%}'.format(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.01))
    
    ax.set_ylabel('Completion rate')
    ax.set_title('p = {:.3f}'.format(p_val))
    
    ax.set_ylim([0., 0.5])
    

fig, ax = plt.subplots(ncols=2, figsize=(6.5, 3))

completion_comparison((pre_enr['QID334_p'] == 'Post-doctoral researcher').rename('is a post-doc'),
                      pre_enr['activity_completed'], ax=ax[0])

completion_comparison(pre_enr['academic_job'].rename('intends to teach'),
                      pre_enr['activity_completed'], ax=ax[1])

plt.tight_layout()
# plot not used in paper - just numbers

### Survey results and learning outcomes

In [None]:
dm("'Satisfied' or 'Extremely satisfied': {0:.0%} ({1:.0%} of question respondents)"
   .format((pst['QID699'] > 0).sum() / len(pst),
           (pst['QID699'] > 0).sum() / pst['QID699'].notna().sum()))

dm('Improved ability to teach: {0:.0%} ({1:.0%} of question respondents)'
    .format((pst['QID354'] == 'yes').sum() / len(pst),
            (pst['QID354'] == 'yes').sum() / pst['QID354'].notna().sum()))

dm('Would recommend to others: {0:.0%} ({1:.0%} of question respondents)'
    .format((pst['QID383'] == 'yes').sum() / len(pst),
            (pst['QID383'] == 'yes').sum() / pst['QID383'].notna().sum()))

### Familiarity

In [None]:
plt.rc('axes', labelsize=10)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=8)    # fontsize of the tick labels
plt.rc('ytick', labelsize=8)    # fontsize of the tick labels

In [None]:
print('Average of paired differences for the {:d} respondents '
      'who took both the pre- and post-course surveys'
      .format(len(pre_pst)))

fig = plt.figure(figsize=(6.5,2.5))
gs = gridspec.GridSpec(1, 2, width_ratios=[2.5, 1])
axl = plt.subplot(gs[0])
axr = plt.subplot(gs[1])

q_fam = ['QID601_{:d}'.format(i) for i in [1, 2, 3, 4, 6]]
q_text = pre_q.loc[q_fam, 'short']

pre_fam = pre_c.loc[:, q_text.index]

pst_fam = pst_c.loc[:, q_text.index]

axl.errorbar(x=pre_fam.mean(), y=np.arange(5) + 0.1, xerr=pre_fam.std()/2,
             fmt='o', label='Pre')
axl.errorbar(x=pst_fam.mean(), y=np.arange(5) - 0.1, xerr=pst_fam.std()/2,
             fmt='s', label='Post')

axl.grid(False)
axl.spines["top"].set_visible(False)
axl.spines["bottom"].set_visible(False)
axl.spines["right"].set_visible(False)
axl.spines["left"].set_visible(False)
axl.grid(which="major", axis="x", color="#cccccc")

axl.set_ylim(-0.5, 4.5)
axl.set_yticks(range(5))
axl.set_yticklabels(['{}'.format(x) for x in q_text.values])

axl.set_xlim([-0.1, 3.1])
axl.set_xticks(range(4))
axl.set_xticklabels(['not at all', 'slightly', 'moderately', 'very'])
axl.set_xlabel('Familiarity with teaching practice', labelpad=10)

axl.legend(loc='lower left', frameon=False)

axl.set_facecolor("white")


# Paired differences
diff = (pre_pst.filter(axis='columns', regex='QID601_(1|2|3|4|6)_x').rename(columns=lambda x: x[:8]) -
        pre_pst.filter(axis='columns', regex='QID601_(1|2|3|4|6)_y').rename(columns=lambda x: x[:8]))

c_int = diff.sem() * scs.t.interval(0.99, diff.count()-1, loc=0, scale=1.)[1]

eb = axr.errorbar(x=diff.mean(), y=range(5), xerr=c_int, fmt='D', color="purple")

axr.set_ylim([-0.4, 4.5])
axr.set_yticks(range(5))
axr.set_yticklabels(['{}'.format(x) for x in q_text.values])

axr.set_xlim([-0.1, 2.1])
axr.set_xticks(np.arange(0, 2.1, .5))
axr.set_xticklabels(['0', '', '+1', '', '+2']);
axr.set_xlabel('Average paired change\nin scale categories', labelpad=10)

axr.set_facecolor("white")
axr.grid(False)
axr.spines["top"].set_visible(False)
axr.spines["bottom"].set_visible(False)
axr.spines["right"].set_visible(False)
axr.spines["left"].set_visible(False)
axr.grid(which="major", axis="x", color="#cccccc")

plt.tight_layout(w_pad=2)
plt.savefig("plots/familiarity.png")
plt.savefig("plots/familiarity.svg")

In [None]:
pd.concat([pre_fam.aggregate(['mean', 'std']).transpose(),
           pre_fam.aggregate(['mean', 'std']).transpose(),
           pd.concat([diff.mean(),
                      diff.sem() * scs.t.interval(0.99, diff.count()-1, loc=0, scale=1.)[1]],
                      keys=['mean', 'conf_int 99%'], axis=1)],
          keys=['Pre', 'Post', 'Diff'], axis=1)\
    .rename(index=q_text)\
    .to_csv('plots/familiarity_outcomes.csv')

## LEARNING COMMUNITY / MCLC ENGAGEMENT

In [None]:
dm('{0:.0%} of pre-course survey respondents planned to participate in in-person '
   'learning communities, while {1:.0%} weren’t sure, and {2:.0%} thought not'
   .format((pre['QID367'] == "Yes").sum() / pre['QID367'].notna().sum(),
           (pre['QID367'] == "I'm not sure").sum() / pre['QID367'].notna().sum(),
           (pre['QID367'] == "No").sum() / pre['QID367'].notna().sum()))

In [None]:
dm('{:.0%} of post-survey respondents reported participating in an MCLC.'
   .format((pst['QID389_p'] == 'yes').sum() / pst['QID389_p'].notna().sum()))

dm('{:.0%} of post-survey respondents were learners in the course.'
   .format(pst_enr['activity_learner'].sum() / len(pst_enr)))

dm('Completion rate for self-reported MCLC participants: {:.0%}'
   .format(pst_enr.loc[pst_enr['QID389_p'] == 'yes', 'activity_completed'].sum() /
           (pst_enr['QID389_p'] == 'yes').sum()))

dm('Completion rate for self-reported MCLC non-participants: {:.0%}'
   .format(pst_enr.loc[pst_enr['QID389_p'] == 'no', 'activity_completed'].sum() /
           (pst_enr['QID389_p'] == 'no').sum()))

In [None]:
# Completion rate of those who INTEND to do MCLC and DO
x = (pre_pst['QID367'] == 'Yes') & (pre_pst['QID389_p'] == 'yes')
dm('Completion rate of those who INTEND to participate in MCLC and DO: {:.0%}, n={:d}'
   .format(pre_pst.loc[x, 'activity_completed'].sum() / x.sum(),
           x.sum()))

# Completion rate of those who INTEND to do MCLC and DONT
x = (pre_pst['QID367'] == 'Yes') & (pre_pst['QID389_p'] == 'no')
dm('Completion rate of those who INTEND to participate in MCLC and DONT: {:.0%}, n={:d}'
   .format(pre_pst.loc[x, 'activity_completed'].sum() / x.sum(),
           x.sum()))

# Completion rate of those who INTEND to do MCLC and DONT, or don't respond to post-survey
d = pre_enr.merge(pst, how='left', on='unique_user_id')
x = (d['QID367'] == 'Yes') & ~(d['QID389_p'] == 'yes')
dm('Completion rate of those who INTEND to participate in MCLC and DONT (or dont respond): {:.0%}, n={:d}'
   .format(d.loc[x, 'activity_completed'].sum() / x.sum(),
           x.sum()))

In [None]:
def multiselect_value_counts(s):
    'Count occurances of multiselect options'

    opts = set(','.join(s.loc[s.notna()].tolist()).split(','))
    
    c = pd.Series(index=list(opts))
    
    for i, x in c.iteritems():
        c.loc[i] = sum([i in str(y) for y in s])
    
    return c

pd.DataFrame({'Total': multiselect_value_counts(pre['QID388']),
              'Percent': multiselect_value_counts(pre['QID388'])/len(pre)})\
    .sort_values(by='Total', ascending=False)

In [None]:
print('{:.0%} of post-survey respondents participated in a MCLC'
      .format((pst['QID389_p'] == 'yes').sum() / pst['QID389_p'].notna().sum()))

mclc = pst_enr.loc[pst_enr['QID389_p'] == 'yes']

print('{:.0%} of MCLC participants engaged (learners) in course'
      .format(mclc['activity_learner'].sum() / len(mclc)))

print('{:.0%} of MCLC participants completed course'
      .format(mclc['activity_completed'].sum() / len(mclc)))

print('Known MCLC participants made up {:.0%} of course completers'
      .format(mclc['activity_completed'].sum() / enr_c['activity_completed'].sum()))