# COMM 4940 Slack Participation Tally Script
[J. Nathan Matias](https://natematias.com), January 2020

This script is the intitial prototype of software that will record counts of student participation in the online chat for class. Since the class participation grade is partly based on participation in chat discussion (at least one toplevel comment and one reply before each class), the purpose of this script is to inform the professor about levels of participation when making grading decisions.

[COMM 4940](https://natematias.com/courses/comm4940): The Design and Governance of Field Experiments

In [1]:
import os, datetime
import pandas as pd

In [2]:
data_dir = os.getenv('DATADIR','') #something defined in the environment before launchind ipython, or default to this folder
print("Data dir is: {data_dir}".format(data_dir=data_dir))

Data dir is: /home/paprika/Downloads/COMM4940 Slack export Jan 21 2020 - Apr 7 2020


### Load Users

In [3]:
users = pd.read_json(os.path.join(data_dir, 'users.json'))
# fix where real_name is sometimes not set properly
def fix_real_name(row):
    if pd.notnull(row['real_name']):
        return row['real_name']
    elif pd.notnull(row['profile']['real_name']):
        return row['profile']['real_name']
users['real_name'] = users.apply(fix_real_name, axis=1)

### Load #readings
- want to know how many toplevel messages a user sent
- and how many replies they made

In [4]:
readings_dir = 'readings'
reading_files = os.listdir(os.path.join(data_dir, readings_dir))

comment_dfs = []

for reading_file in reading_files:
    comment_df = pd.read_json(os.path.join(data_dir, readings_dir, reading_file))
    comment_dfs.append(comment_df)

comments = pd.concat(comment_dfs, sort=False)

In [5]:
comments['text_len'] = comments['text'].apply(len)
comments['dt'] = comments['ts'].apply(datetime.datetime.utcfromtimestamp)
comments['iso_dt'] = comments['dt'].apply(lambda dt: dt.isocalendar())

In [6]:
min_comment_len = 50
comments['text_is_min_length'] = comments['text_len'].apply(lambda l: l >= min_comment_len)
print('Number of all comments: {nc}'.format(nc=len(comments)))
comments = comments[comments['text_is_min_length']==True]
print('Number of comments with minimum {min_comment_len} chars: {nc}'.format(nc=len(comments), min_comment_len=min_comment_len))

Number of all comments: 212
Number of comments with minimum 50 chars: 186


In [22]:
def make_periods():
    # it's up to you to ensure the beginning and end of this are long enough:
    # in UTC
    start_date = datetime.datetime(2020, 1, 6, 16) # Monday at Noon Eastern in UTC
    end_date = datetime.datetime(2021,1,1)

    period_durations = [
        datetime.timedelta(hours=60),
        datetime.timedelta(hours=108)
        ]# should sum to a week 168 hours
    curr_date = start_date
    periods = []
    while curr_date <= end_date:
        for period_duration in period_durations:
            prev_date = curr_date
            curr_date += period_duration
            periods.append((prev_date, curr_date))
    enum_periods = list(enumerate(periods))              
    return enum_periods

In [72]:
def find_period(target_dt, enum_periods):
    for pid, (start, end) in enum_periods:
        if target_dt > start and target_dt <= end: 
            return pid, start, end
        
def nice_period_fmt(pid_start_end):
    pid, start, end = pid_start_end[0], pid_start_end[1], pid_start_end[2]
    return "P{p} : {s}___{e}".format(p=str(pid).zfill(3), s=start.isoformat(), e=end.isoformat())

enum_periods = make_periods()
comments['time_period'] = comments['dt'].apply(lambda x: nice_period_fmt(find_period(x, enum_periods)))

In [73]:
def week_number(iso_dt):
    return '{year}wk{week_number}'.format(year=iso_dt[0], week_number=str(iso_dt[1]).zfill(2))

# comments['week_number'] = comments['iso_dt'].apply(week_number)
# comments['week_range'] = comments['iso_dt'].apply(week_range)


In [74]:
def top_level_or_reply(parent_id):
    return 'top_level' if pd.isnull(parent_id) else 'reply'
comments['comment_depth'] = comments['parent_user_id'].apply(top_level_or_reply)

In [75]:
# this works, but doesn't insert the 0s in the case where are a user doesnt have a message of type in week
# user_comments = comments.groupby(['user','week_number','comment_depth']).size()\
#                                 .reset_index().rename(columns={0:'num_messages'})

In [76]:
comments_pivot = comments[['user','time_period','comment_depth', 'text']].pivot_table(
                    index=['user'], 
                     columns=['time_period', 'comment_depth'],
                     fill_value=0, 
                     aggfunc='count').unstack().to_frame().reset_index().rename(columns={0:"count"})

In [77]:
# get the real_name
comments_pivot = comments_pivot.merge(users[['id', 'real_name']], how='left', left_on='user', right_on='id')

In [78]:
# fixup the output
# del comments_pivot['level_0']
COLUMN_OUTPUT_ORDER = ['time_period', 'real_name', 'comment_depth', 'count']
comments_pivot = comments_pivot.sort_values(COLUMN_OUTPUT_ORDER)
comments_output = comments_pivot[COLUMN_OUTPUT_ORDER]

In [79]:
#save a CSV
out_filename = '#readings_participation_counts_as_of_{date}.csv'.format(date=datetime.date.today().isoformat())
participation_counts_dir = os.path.join(data_dir, 'participation_counts')
if not os.path.exists(participation_counts_dir):
        os.makedirs(participation_counts_dir)
out_path = os.path.join(participation_counts_dir, out_filename)
comments_output.to_csv(out_path, index=False)

# Extra Code

In [58]:
#alternate replies method, that does't rely on parent_id
replies_list = []
replies_ser = comments['replies']

for reply_list in replies_ser:
    if isinstance(reply_list, list):
        replies_list.extend(reply_list) 

replies = pd.DataFrame.from_dict(replies_list)

user_replies = replies.groupby('user').size().reset_index().rename(columns={0:"alt_reply_count"})

# Copyright

Copyright 2020 J. Nathan Matias

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.