Skip to content
This repository has been archived by the owner on Apr 3, 2024. It is now read-only.

Commit

Permalink
fix: Add unique dump id and consistent dump time
Browse files Browse the repository at this point in the history
In order to connect the nodes in a dump, where there may be many dumps per course, these columns are necessary to find the dump that corresponds most closely to an event or set of events.
  • Loading branch information
bmtcril committed May 3, 2023
1 parent 645a31a commit 90f5a3d
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions event_sink_clickhouse/sinks/course_published.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import csv
import io
import uuid

import requests
from django.utils import timezone
Expand Down Expand Up @@ -86,7 +87,7 @@ def get_course_last_published(course_key):
return str(approx_last_published)

@staticmethod
def serialize_item(item, index, detached_xblock_types):
def serialize_item(item, index, detached_xblock_types, dump_id, dump_timestamp):
"""
Args:
item: an XBlock
Expand All @@ -109,9 +110,10 @@ def serialize_item(item, index, detached_xblock_types):
'display_name': item.display_name_with_default.replace("'", "\'"),
'block_type': block_type,
'detached': 1 if block_type in detached_xblock_types else 0,
'edited_on': str(getattr(item, 'edited_on', '')),
'time_last_dumped': str(timezone.now()),
'order': index,
'edited_on': str(getattr(item, 'edited_on', '')),
'dump_id': dump_id,
'time_last_dumped': dump_timestamp,
}

return rtn_fields
Expand All @@ -130,6 +132,9 @@ def serialize_course(self, course_id):
modulestore = CoursePublishedSink._get_modulestore()
detached_xblock_types = CoursePublishedSink._get_detached_xblock_types()

dump_id = str(uuid.uuid4())
dump_timestamp = str(timezone.now())

# create a location to node mapping we'll need later for
# writing relationships
location_to_node = {}
Expand All @@ -139,7 +144,7 @@ def serialize_course(self, course_id):
i = 0
for item in items:
i += 1
fields = self.serialize_item(item, i, detached_xblock_types)
fields = self.serialize_item(item, i, detached_xblock_types, dump_id, dump_timestamp)
location_to_node[self.strip_branch_and_version(item.location)] = fields

# create relationships
Expand All @@ -154,6 +159,8 @@ def serialize_course(self, course_id):
'course_key': str(course_id),
'parent_location': str(parent_node["location"]),
'child_location': str(child_node["location"]),
'dump_id': dump_id,
'time_last_dumped': dump_timestamp,
'order': index
}
relationships.append(relationship)
Expand Down

0 comments on commit 90f5a3d

Please sign in to comment.