In [1]:
import json
from datetime import datetime
from collections import defaultdict

## Prepare Environment

In [None]:
# Helper function to parse timestamps
def parse_timestamp(timestamp_str):
    return datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%SZ")

## Load Data

In [None]:
data = json.load(open('data.json'))

In [5]:
data[0]

{'id': 46137055,
 'user_id': 6022275,
 'parent_id': None,
 'created_at': '2024-01-26T15:25:09Z',
 'updated_at': '2024-01-26T15:25:09Z',
 'rating_count': None,
 'rating_sum': None,
 'user_name': 'John Haubrick',
 'message': '<p>Maybe we could order some sort of keepsake that we can have engraved with his time as dept head as well our \'thank yous\' and \'appreciations.\'?</p><script src="https://instructure-uploads.s3.amazonaws.com/account_10500000000085746/attachments/88254828/Mobile.js"></script>',
 'user': {'id': 6022275,
  'anonymous_id': '3l2tf',
  'display_name': 'John M Haubrick',
  'avatar_image_url': 'https://psu.instructure.com/images/thumbnails/127535803/rj1IkpuLedzk1757fmEK4iVbcvNXkVEw1DqotYbn',
  'html_url': 'https://psu.instructure.com/courses/2326609/users/6022275',
  'pronouns': 'He/Him'},
 'read_state': 'read',
 'forced_read_state': False}

## Compute Metrics

In [10]:
# Metric 1: Count unique discussion members
unique_users = set()
post_count_per_user = defaultdict(int)
timestamps = []

for entry in data:
    unique_users.add(entry['user_id'])
    post_count_per_user[entry['user_id']] += 1
    timestamps.append(parse_timestamp(entry['created_at']))

# Metric 2: Calculate total number of posts
total_posts = len(data)

# Metric 3: Calculate time differences between posts
timestamps.sort()  # Ensure timestamps are in chronological order
time_differences = [(timestamps[i] - timestamps[i-1]).total_seconds() / 60.0 for i in range(1, len(timestamps))]

# Metric 4: Calculate average response time
average_response_time = sum(time_differences) / len(time_differences) if time_differences else 0

# Metric 5: Longest gap between posts
longest_gap = max(time_differences) if time_differences else 0

# Metric 6: Post distribution per user
post_distribution = {user_id: count for user_id, count in post_count_per_user.items()}

## Report Metrics

In [7]:
# Printing the results
print(f"Number of unique discussion members: {len(unique_users)}")
print(f"Total number of posts: {total_posts}")
print(f"Average response time between posts (in minutes): {average_response_time:.2f}")
print(f"Longest gap between posts (in minutes): {longest_gap:.2f}")
print("Post distribution per user (user_id: number of posts):")
for user_id, count in post_distribution.items():
    print(f"User {user_id}: {count} posts")

Number of unique discussion members: 4
Total number of posts: 5
Average response time between posts (in minutes): 342.47
Longest gap between posts (in minutes): 1148.47
Post distribution per user (user_id: number of posts):
User 6022275: 1 posts
User 6817271: 1 posts
User 7244899: 2 posts
User 6788319: 1 posts


## Compute Integrated Metric

In [12]:
# Normalization function (min-max scaling)
def normalize(value, min_value, max_value):
    return (value - min_value) / (max_value - min_value) if max_value != min_value else 0

# Assuming you want to give weights to the metrics (you can adjust these weights)
weights = {
    "unique_members": 0.3,  # Weight for number of unique members
    "total_posts": 0.3,     # Weight for total number of posts
    "average_response_time": 0.2,  # Weight for average response time (lower is better)
    "longest_gap": 0.2      # Weight for longest gap between posts (lower is better)
}

# Min and max values for normalization (you should adjust these based on your dataset)
min_unique_members = 1
max_unique_members = 50  # Example maximum
min_total_posts = 1
max_total_posts = 100  # Example maximum
min_response_time = 1   # Minimum response time in minutes
max_response_time = 1440  # 24 hours, as an example of maximum
min_gap = 1             # Minimum gap between posts
max_gap = 1440          # 24 hours, as an example of maximum

# Normalize metrics
normalized_unique_members = normalize(len(unique_users), min_unique_members, max_unique_members)
normalized_total_posts = normalize(total_posts, min_total_posts, max_total_posts)
normalized_response_time = normalize(max_response_time - average_response_time, min_response_time, max_response_time)
normalized_longest_gap = normalize(max_gap - longest_gap, min_gap, max_gap)

# Calculate the final integrated score using the weighted sum
integrated_score = (
    weights["unique_members"] * normalized_unique_members +
    weights["total_posts"] * normalized_total_posts +
    weights["average_response_time"] * normalized_response_time +
    weights["longest_gap"] * normalized_longest_gap
)

# Print the integrated score
print(f"Integrated Engagement Score: {integrated_score:.2f}")


Integrated Engagement Score: 0.22


## Same approach, by Discussion Post

Code below would only work if when downloading canvas api data, you add a discussion_id - currently missing in the data sample.

In [9]:
# Assuming data is a list of dictionaries with each post including a 'discussion_id'
from collections import defaultdict

# Group posts by discussion_id
discussions = defaultdict(list)
for entry in data:
    discussions[entry['discussion_id']].append(entry)

# To store metrics for each discussion
discussion_metrics = {}

# Iterate through each discussion
for discussion_id, posts in discussions.items():
    # Metric 1: Count unique discussion members
    unique_users = set()
    post_count_per_user = defaultdict(int)
    timestamps = []

    for entry in posts:
        unique_users.add(entry['user_id'])
        post_count_per_user[entry['user_id']] += 1
        timestamps.append(parse_timestamp(entry['created_at']))

    # Metric 2: Calculate total number of posts
    total_posts = len(posts)

    # Metric 3: Calculate time differences between posts
    timestamps.sort()  # Ensure timestamps are in chronological order
    time_differences = [(timestamps[i] - timestamps[i-1]).total_seconds() / 60.0 for i in range(1, len(timestamps))]

    # Metric 4: Calculate average response time
    average_response_time = sum(time_differences) / len(time_differences) if time_differences else 0

    # Metric 5: Longest gap between posts
    longest_gap = max(time_differences) if time_differences else 0

    # Metric 6: Post distribution per user
    post_distribution = {user_id: count for user_id, count in post_count_per_user.items()}

    # Store metrics for this discussion
    discussion_metrics[discussion_id] = {
        "num_unique_members": len(unique_users),
        "total_posts": total_posts,
        "average_response_time": average_response_time,
        "longest_gap": longest_gap,
        "post_distribution": post_distribution
    }

# Printing metrics for each discussion
for discussion_id, metrics in discussion_metrics.items():
    print(f"Discussion {discussion_id}:")
    print(f"  Number of unique discussion members: {metrics['num_unique_members']}")
    print(f"  Total number of posts: {metrics['total_posts']}")
    print(f"  Average response time between posts (in minutes): {metrics['average_response_time']:.2f}")
    print(f"  Longest gap between posts (in minutes): {metrics['longest_gap']:.2f}")
    print("  Post distribution per user (user_id: number of posts):")
    for user_id, count in metrics['post_distribution'].items():
        print(f"    User {user_id}: {count} posts")


Discussion 46137055:
  Number of unique discussion members: 1
  Total number of posts: 1
  Average response time between posts (in minutes): 0.00
  Longest gap between posts (in minutes): 0.00
  Post distribution per user (user_id: number of posts):
    User 6022275: 1 posts
Discussion 46136280:
  Number of unique discussion members: 1
  Total number of posts: 1
  Average response time between posts (in minutes): 0.00
  Longest gap between posts (in minutes): 0.00
  Post distribution per user (user_id: number of posts):
    User 6817271: 1 posts
Discussion 46123132:
  Number of unique discussion members: 1
  Total number of posts: 1
  Average response time between posts (in minutes): 0.00
  Longest gap between posts (in minutes): 0.00
  Post distribution per user (user_id: number of posts):
    User 7244899: 1 posts
Discussion 46122996:
  Number of unique discussion members: 1
  Total number of posts: 1
  Average response time between posts (in minutes): 0.00
  Longest gap between post