Skip to content

Commit

Permalink
feat: allow switching anonymous user ID hashing algorithm from shake …
Browse files Browse the repository at this point in the history
…to md5

The hashing algorithm has been changed in cd60646. However, there are Open edX
operators who maintain backward compatibility of anonymous user IDs after past
rotations of their Django secret key. For them, altering the hashing algorithm
was a breaking change that made their analytics inconsistent.

(cherry picked from commit 746e4fe)
  • Loading branch information
kaustavb12 authored and 0x29a committed Oct 5, 2022
1 parent 47ac9a7 commit ff6d92f
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 2 deletions.
10 changes: 10 additions & 0 deletions cms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,16 @@
# .. toggle_tickets: 'https://github.com/open-craft/edx-platform/pull/429'
'DISABLE_UNENROLLMENT': False,

# .. toggle_name: FEATURES['ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID']
# .. toggle_implementation: DjangoSetting
# .. toggle_default: False
# .. toggle_description: Whether to enable the legacy MD5 hashing algorithm to generate anonymous user id
# instead of the newer SHAKE128 hashing algorithm
# .. toggle_use_cases: open_edx
# .. toggle_creation_date: 2022-08-08
# .. toggle_target_removal_date: None
# .. toggle_tickets: 'https://github.com/openedx/edx-platform/pull/30832'
'ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID': False,
}

# .. toggle_name: ENABLE_COPPA_COMPLIANCE
Expand Down
14 changes: 12 additions & 2 deletions common/djangoapps/student/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,22 @@ def anonymous_id_for_user(user, course_id, save='DEPRECATED'):
# function: Rotate at will, since the hashes are stored and
# will not change.
# include the secret key as a salt, and to make the ids unique across different LMS installs.
hasher = hashlib.shake_128()
legacy_hash_enabled = settings.FEATURES.get('ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID', False)
if legacy_hash_enabled:
# Use legacy MD5 algorithm if flag enabled
hasher = hashlib.md5()
else:
hasher = hashlib.shake_128()

hasher.update(settings.SECRET_KEY.encode('utf8'))
hasher.update(str(user.id).encode('utf8'))
if course_id:
hasher.update(str(course_id).encode('utf-8'))
anonymous_user_id = hasher.hexdigest(16) # pylint: disable=too-many-function-args

if legacy_hash_enabled:
anonymous_user_id = hasher.hexdigest()
else:
anonymous_user_id = hasher.hexdigest(16) # pylint: disable=too-many-function-args

try:
AnonymousUserId.objects.create(
Expand Down
11 changes: 11 additions & 0 deletions common/djangoapps/student/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,17 @@ def test_anonymous_id_secret_key_changes_result_in_diff_values_for_same_new_user
assert anonymous_id != new_anonymous_id
assert self.user == user_by_anonymous_id(new_anonymous_id)

def test_enable_legacy_hash_flag(self):
"""Test that different anonymous id returned if ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID enabled."""
CourseEnrollment.enroll(self.user, self.course.id)
anonymous_id = anonymous_id_for_user(self.user, self.course.id)
with patch.dict(settings.FEATURES, ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID=True):
# Recreate user object to clear cached anonymous id.
self.user = User.objects.get(pk=self.user.id)
AnonymousUserId.objects.filter(user=self.user).filter(course_id=self.course.id).delete()
new_anonymous_id = anonymous_id_for_user(self.user, self.course.id)
assert anonymous_id != new_anonymous_id


@skip_unless_lms
@patch('openedx.core.djangoapps.programs.utils.get_programs')
Expand Down
11 changes: 11 additions & 0 deletions lms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,17 @@
# in the LMS and CMS.
# .. toggle_tickets: 'https://github.com/open-craft/edx-platform/pull/429'
'DISABLE_UNENROLLMENT': False,

# .. toggle_name: FEATURES['ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID']
# .. toggle_implementation: DjangoSetting
# .. toggle_default: False
# .. toggle_description: Whether to enable the legacy MD5 hashing algorithm to generate anonymous user id
# instead of the newer SHAKE128 hashing algorithm
# .. toggle_use_cases: open_edx
# .. toggle_creation_date: 2022-08-08
# .. toggle_target_removal_date: None
# .. toggle_tickets: 'https://github.com/openedx/edx-platform/pull/30832'
'ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID': False,
}

# Specifies extra XBlock fields that should available when requested via the Course Blocks API
Expand Down

0 comments on commit ff6d92f

Please sign in to comment.