diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 3e35927e..af1f6a58 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -7,8 +7,6 @@ on: branches: - main pull_request: - branches: - - main env: DBT_PROFILES_DIR: ./.github/ diff --git a/macros/items_per_subsection.sql b/macros/items_per_subsection.sql index 7fe8d28a..f59741c8 100644 --- a/macros/items_per_subsection.sql +++ b/macros/items_per_subsection.sql @@ -24,7 +24,9 @@ subsection_blocks.display_name_with_location as subsection_with_name, ips.course_order as course_order, ips.graded as graded, - ips.item_count as item_count + ips.item_count as item_count, + subsection_blocks.block_id as subsection_block_id, + section_blocks.block_id as section_block_id from items_per_subsection ips left join {{ ref("dim_course_blocks") }} section_blocks diff --git a/models/problems/fact_problem_engagement.sql b/models/problems/fact_problem_engagement.sql index e992da78..733f4847 100644 --- a/models/problems/fact_problem_engagement.sql +++ b/models/problems/fact_problem_engagement.sql @@ -1,104 +1,49 @@ with - subsection_counts as ( + subsection_engagement as ( select org, course_key, - course_run, - section_with_name, - subsection_with_name, - actor_id, - item_count, - count(distinct problem_id) as problems_attempted, - case - when problems_attempted = 0 - then 'No problems attempted yet' - when problems_attempted = item_count - then 'All problems attempted' - else 'At least one problem attempted' - end as engagement_level, - username, - name, - email - from {{ ref("fact_problem_engagement_per_subsection") }} - group by - org, - course_key, - course_run, - section_with_name, - subsection_with_name, + 'subsection' as content_level, actor_id, - item_count, - username, - name, - email + subsection_block_id as block_id, + engagement_level as section_subsection_problem_engagement + from {{ ref("subsection_problem_engagement") }} ), - section_counts as ( + section_engagement as ( select org, course_key, - course_run, - section_with_name, - actor_id, - sum(item_count) as item_count, - sum(problems_attempted) as problems_attempted, - case - when problems_attempted = 0 - then 'No problems attempted yet' - when problems_attempted = item_count - then 'All problems attempted' - else 'At least one problem attempted' - end as engagement_level, - username, - name, - email - from subsection_counts - group by - org, - course_key, - course_run, - section_with_name, + 'section' as content_level, actor_id, - username, - name, - email + section_block_id as block_id, + engagement_level as section_subsection_problem_engagement + from {{ ref("section_problem_engagement") }} ), problem_engagement as ( - select - org, - course_key, - course_run, - subsection_with_name as section_subsection_name, - 'subsection' as content_level, - actor_id as actor_id, - engagement_level as section_subsection_problem_engagement, - username, - name, - email - from subsection_counts + select * + from subsection_engagement union all - select - org, - course_key, - course_run, - section_with_name as section_subsection_name, - 'section' as content_level, - actor_id as actor_id, - engagement_level as section_subsection_problem_engagement, - username, - name, - email - from section_counts + select * + from section_engagement ) - select pe.org as org, pe.course_key as course_key, - pe.course_run as course_run, - pe.section_subsection_name as section_subsection_name, + course_blocks.course_run as course_run, + course_blocks.display_name_with_location as section_subsection_name, pe.content_level as content_level, pe.actor_id as actor_id, pe.section_subsection_problem_engagement as section_subsection_problem_engagement, - pe.username as username, - pe.name as name, - pe.email as email + users.username as username, + users.name as name, + users.email as email from problem_engagement pe +join + {{ ref("dim_course_blocks") }} course_blocks + on ( + pe.org = course_blocks.org + and pe.course_key = course_blocks.course_key + and pe.block_id = course_blocks.block_id + ) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(pe.actor_id) = users.external_user_id diff --git a/models/problems/fact_problem_engagement_per_subsection.sql b/models/problems/fact_problem_engagement_per_subsection.sql index 84859d19..3a18e86b 100644 --- a/models/problems/fact_problem_engagement_per_subsection.sql +++ b/models/problems/fact_problem_engagement_per_subsection.sql @@ -11,12 +11,14 @@ with course_order as course_order, graded, actor_id, - problem_id + problem_id, + username, + name, + email from {{ ref("fact_problem_responses") }} ) select - attempts.attempted_on as attempted_on, attempts.org as org, attempts.course_key as course_key, attempts.course_run as course_run, @@ -25,11 +27,9 @@ select problems.item_count as item_count, attempts.actor_id as actor_id, attempts.problem_id as problem_id, - attempts.course_order as course_order, - attempts.graded as graded, - users.username as username, - users.name as name, - users.email as email + attempts.username as username, + attempts.name as name, + attempts.email as email from attempted_subsection_problems attempts join {{ ref("int_problems_per_subsection") }} problems @@ -39,5 +39,3 @@ join and attempts.section_number = problems.section_number and attempts.subsection_number = problems.subsection_number ) -left outer join - {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/problems/fact_problem_responses.sql b/models/problems/fact_problem_responses.sql index 5b65d660..419a2474 100644 --- a/models/problems/fact_problem_responses.sql +++ b/models/problems/fact_problem_responses.sql @@ -5,7 +5,7 @@ with org, course_key, object_id, - {{ get_problem_id("object_id") }} as problem_id, + problem_id, actor_id, responses, success, diff --git a/models/problems/int_problem_hints.sql b/models/problems/int_problem_hints.sql index e9d8c11a..e0e8a221 100644 --- a/models/problems/int_problem_hints.sql +++ b/models/problems/int_problem_hints.sql @@ -4,7 +4,7 @@ with emission_time, org, course_key, - {{ get_problem_id("object_id") }} as problem_id, + problem_id, actor_id, case when object_id like '%/hint%' diff --git a/models/problems/int_problem_results.sql b/models/problems/int_problem_results.sql index bf7d89d8..339f727b 100644 --- a/models/problems/int_problem_results.sql +++ b/models/problems/int_problem_results.sql @@ -5,17 +5,9 @@ -- this will be used to pick the xAPI event corresponding to that submission with successful_responses as ( - select - org, - course_key, - problem_id, - actor_id, - min(emission_time) as first_success_at - from {{ ref("fact_problem_responses") }} - where - -- clickhouse throws an error when shortening this to `where success` - success = true - group by org, course_key, problem_id, actor_id + select org, course_key, problem_id, actor_id, first_success_at + from {{ ref("responses") }} + where isNotNull(first_success_at) ), -- for all learners who did not submit a successful response, -- find the timestamp of the most recent unsuccessful response @@ -25,8 +17,8 @@ with course_key, problem_id, actor_id, - max(emission_time) as last_response_at - from {{ ref("fact_problem_responses") }} + max(last_attempt_at) as last_attempt_at + from {{ ref("responses") }} where actor_id not in (select distinct actor_id from successful_responses) group by org, course_key, problem_id, actor_id ), @@ -35,26 +27,46 @@ with select org, course_key, problem_id, actor_id, first_success_at as emission_time from successful_responses union all - select org, course_key, problem_id, actor_id, last_response_at as emission_time + select org, course_key, problem_id, actor_id, last_attempt_at as emission_time from unsuccessful_responses + ), + full_responses as ( + select + events.emission_time as emission_time, + events.org as org, + events.course_key as course_key, + events.problem_id as problem_id, + events.object_id as object_id, + events.actor_id as actor_id, + events.responses as responses, + events.success as success, + events.attempts as attempts, + events.interaction_type as interaction_type + from {{ ref("problem_events") }} events + join responses using (org, course_key, problem_id, actor_id, emission_time) ) select - emission_time, - org, - course_key, - course_name, - course_run, - problem_id, - problem_name, - problem_name_with_location, - course_order, - problem_link, - actor_id, - responses, - success, - attempts, - graded, - interaction_type -from {{ ref("fact_problem_responses") }} problem_responses -join responses using (org, course_key, problem_id, actor_id, emission_time) + full_responses.emission_time as emission_time, + full_responses.org as org, + full_responses.course_key as course_key, + blocks.course_name as course_name, + blocks.course_run as course_run, + full_responses.problem_id as problem_id, + blocks.block_name as problem_name, + blocks.display_name_with_location as problem_name_with_location, + blocks.course_order as course_order, + {{ a_tag("full_responses.object_id", "blocks.block_name") }} as problem_link, + full_responses.actor_id as actor_id, + full_responses.responses as responses, + full_responses.success as success, + full_responses.attempts as attempts, + full_responses.interaction_type as interaction_type, + blocks.graded +from full_responses +join + {{ ref("dim_course_blocks") }} blocks + on ( + full_responses.course_key = blocks.course_key + and full_responses.problem_id = blocks.block_id + ) diff --git a/models/problems/problem_events.sql b/models/problems/problem_events.sql index 7a86fa52..e75ccea2 100644 --- a/models/problems/problem_events.sql +++ b/models/problems/problem_events.sql @@ -35,7 +35,8 @@ select ) as Int16 ), 0 - ) as attempts + ) as attempts, + {{ get_problem_id("object_id") }} as problem_id from {{ ref("xapi_events_all_parsed") }} where verb_id in ( diff --git a/models/problems/responses.sql b/models/problems/responses.sql new file mode 100644 index 00000000..9f14862f --- /dev/null +++ b/models/problems/responses.sql @@ -0,0 +1,43 @@ +-- select one record per (learner, problem, course, org) tuple +-- contains either the first successful attempt +-- or the most recent unsuccessful attempt +-- find the timestamp of the earliest successful response +-- this will be used to pick the xAPI event corresponding to that submission +{{ + config( + materialized="materialized_view", + schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), + engine=get_engine("ReplacingMergeTree()"), + primary_key="(org, course_key, problem_id)", + order_by="(org, course_key, problem_id, actor_id)", + partition_by="toYYYYMM(emission_time)", + ttl=env_var("ASPECTS_DATA_TTL_EXPRESSION", ""), + ) +}} + +with + responses as ( + select emission_time, org, course_key, object_id, problem_id, actor_id, success + from {{ ref("problem_events") }} + where verb_id = 'https://w3id.org/xapi/acrossx/verbs/evaluated' + ), + response_status as ( + select + org, + course_key, + problem_id, + actor_id, + MIN(case when success then emission_time else NULL end) as first_success_at, + MAX(emission_time) as last_attempt_at + from responses + group by org, course_key, problem_id, actor_id + ) +select + org, + course_key, + problem_id, + actor_id, + first_success_at, + last_attempt_at, + coalesce(first_success_at, last_attempt_at) as emission_time +from response_status diff --git a/models/problems/schema.yml b/models/problems/schema.yml index 49a93c00..d525f69a 100644 --- a/models/problems/schema.yml +++ b/models/problems/schema.yml @@ -249,6 +249,9 @@ models: - name: attempts data_type: int16 description: "Number indicating which attempt this was" + - name: problem_id + data_type: string + description: "The problem's unique identifier" - name: int_problems_per_subsection description: "A dimension table with the number of problems per subsection" @@ -280,6 +283,12 @@ models: - name: course_order data_type: Int32 description: "The sort order of this block in the course across all course blocks" + - name: subsection_block_id + data_type: string + description: "The unique identifier for the subsection block" + - name: section_block_id + data_type: string + description: "The unique identifier for the section block" - name: fact_problem_engagement_per_subsection description: "A dataset with one record representing a problem attempted by a learner and the section and subsection that problem belongs to" @@ -435,3 +444,78 @@ models: - name: course_order data_type: Int32 description: "The sort order of this block in the course across all course blocks" + + - name: responses + description: "A record per course per problem per learner with their last attempt and first success" + columns: + - name: org + data_type: string + description: "The organization that the course belongs to" + - name: course_key + data_type: string + description: "The course key for the course" + - name: problem_id + data_type: string + description: "The problem's unique identifier" + - name: actor_id + data_type: string + description: "The xAPI actor identifier" + - name: first_success_at + data_type: datetime + description: "The timestamp of the first successful attempt" + - name: last_attempt_at + data_type: datetime + description: "The timestamp of the last attempt" + - name: emission_time + data_type: datetime + description: "The timestamp of the last attempt or the first successful attempt" + + - name: section_problem_engagement + description: "A record per course per section per learner with their engagement level" + columns: + - name: org + data_type: string + description: "The organization that the course belongs to" + - name: course_key + data_type: string + description: "The course key for the course" + - name: actor_id + data_type: string + description: "The xAPI actor identifier" + - name: section_block_id + data_type: string + description: "The unique identifier for the section block" + - name: engagement_level + data_type: string + description: "The engagement level of the learner with the section" + tests: + - accepted_values: + values: + - 'No problems attempted yet' + - 'All problems attempted' + - 'At least one problem attempted' + + - name: subsection_problem_engagement + description: "A record per course per subsection per learner with their engagement level" + columns: + - name: org + data_type: string + description: "The organization that the course belongs to" + - name: course_key + data_type: string + description: "The course key for the course" + - name: actor_id + data_type: string + description: "The xAPI actor identifier" + - name: subsection_block_id + data_type: string + description: "The unique identifier for the subsection block" + - name: engagement_level + data_type: string + description: "The engagement level of the learner with the subsection" + tests: + - accepted_values: + values: + - 'No problems attempted yet' + - 'All problems attempted' + - 'At least one problem attempted' diff --git a/models/problems/section_problem_engagement.sql b/models/problems/section_problem_engagement.sql new file mode 100644 index 00000000..fbb968fb --- /dev/null +++ b/models/problems/section_problem_engagement.sql @@ -0,0 +1,153 @@ +{{ + config( + materialized="materialized_view", + schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), + engine=get_engine("ReplacingMergeTree()"), + primary_key="(org, course_key)", + order_by="(org, course_key, section_block_id, actor_id)", + ) +}} + +with + responses as ( + select + emission_time, + org, + course_key, + object_id, + problem_id, + actor_id, + responses, + success, + attempts, + interaction_type + from {{ ref("problem_events") }} + where verb_id = 'https://w3id.org/xapi/acrossx/verbs/evaluated' + ), + fact_problem_responses as ( + select + responses.emission_time as emission_time, + responses.org as org, + responses.course_key as course_key, + blocks.course_name as course_name, + responses.problem_id as problem_id, + blocks.block_name as problem_name, + blocks.display_name_with_location as problem_name_with_location, + {{ a_tag("responses.object_id", "blocks.block_name") }} as problem_link, + blocks.graded as graded, + course_order as course_order, + responses.actor_id as actor_id, + responses.responses as responses, + responses.success as success, + responses.attempts as attempts, + responses.interaction_type as interaction_type + from responses + join + {{ ref("dim_course_blocks") }} blocks + on ( + responses.course_key = blocks.course_key + and responses.problem_id = blocks.block_id + ) + group by + -- multi-part questions include an extra record for the response to the + -- first + -- part of the question. this group by clause eliminates the duplicate + -- record + emission_time, + org, + course_key, + course_name, + problem_id, + problem_name, + problem_name_with_location, + problem_link, + actor_id, + responses, + success, + attempts, + course_order, + graded, + interaction_type + ), + attempted_subsection_problems as ( + select distinct + date(emission_time) as attempted_on, + org, + course_key, + {{ section_from_display("problem_name_with_location") }} as section_number, + {{ subsection_from_display("problem_name_with_location") }} + as subsection_number, + course_order as course_order, + graded, + actor_id, + problem_id + from fact_problem_responses + ), + fact_problem_engagement_per_subsection as ( + select + attempts.org as org, + attempts.course_key as course_key, + problems.section_with_name as section_with_name, + problems.subsection_with_name as subsection_with_name, + problems.item_count as item_count, + attempts.actor_id as actor_id, + attempts.problem_id as problem_id, + problems.section_block_id as section_block_id + from attempted_subsection_problems attempts + join + {{ ref("int_problems_per_subsection") }} problems + on ( + attempts.org = problems.org + and attempts.course_key = problems.course_key + and attempts.section_number = problems.section_number + and attempts.subsection_number = problems.subsection_number + ) + ), + subsection_counts as ( + select + org, + course_key, + section_with_name, + subsection_with_name, + actor_id, + item_count, + count(distinct problem_id) as problems_attempted, + case + when problems_attempted = 0 + then 'No problems attempted yet' + when problems_attempted = item_count + then 'All problems attempted' + else 'At least one problem attempted' + end as engagement_level, + section_block_id + from fact_problem_engagement_per_subsection + group by + org, + course_key, + section_with_name, + subsection_with_name, + actor_id, + item_count, + section_block_id + ), + section_counts as ( + select + org, + course_key, + actor_id, + sum(item_count) as item_count, + sum(problems_attempted) as problems_attempted, + case + when problems_attempted = 0 + then 'No problems attempted yet' + when problems_attempted = item_count + then 'All problems attempted' + else 'At least one problem attempted' + end as engagement_level, + section_block_id + from subsection_counts + group by org, course_key, section_block_id, actor_id + ) + +select org, course_key, actor_id, section_block_id, engagement_level +from section_counts diff --git a/models/problems/subsection_problem_engagement.sql b/models/problems/subsection_problem_engagement.sql new file mode 100644 index 00000000..14c9f3cb --- /dev/null +++ b/models/problems/subsection_problem_engagement.sql @@ -0,0 +1,135 @@ +{{ + config( + materialized="materialized_view", + schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), + engine=get_engine("ReplacingMergeTree()"), + primary_key="(org, course_key)", + order_by="(org, course_key, subsection_block_id, actor_id)", + ) +}} + +with + responses as ( + select + emission_time, + org, + course_key, + object_id, + problem_id, + actor_id, + responses, + success, + attempts, + interaction_type + from {{ ref("problem_events") }} + where verb_id = 'https://w3id.org/xapi/acrossx/verbs/evaluated' + ), + fact_problem_responses as ( + select + responses.emission_time as emission_time, + responses.org as org, + responses.course_key as course_key, + blocks.course_name as course_name, + responses.problem_id as problem_id, + blocks.block_name as problem_name, + blocks.display_name_with_location as problem_name_with_location, + {{ a_tag("responses.object_id", "blocks.block_name") }} as problem_link, + blocks.graded as graded, + course_order as course_order, + responses.actor_id as actor_id, + responses.responses as responses, + responses.success as success, + responses.attempts as attempts, + responses.interaction_type as interaction_type + from responses + join + {{ ref("dim_course_blocks") }} blocks + on ( + responses.course_key = blocks.course_key + and responses.problem_id = blocks.block_id + ) + group by + -- multi-part questions include an extra record for the response to the + -- first + -- part of the question. this group by clause eliminates the duplicate + -- record + emission_time, + org, + course_key, + course_name, + problem_id, + problem_name, + problem_name_with_location, + problem_link, + actor_id, + responses, + success, + attempts, + course_order, + graded, + interaction_type + ), + attempted_subsection_problems as ( + select distinct + date(emission_time) as attempted_on, + org, + course_key, + {{ section_from_display("problem_name_with_location") }} as section_number, + {{ subsection_from_display("problem_name_with_location") }} + as subsection_number, + course_order as course_order, + graded, + actor_id, + problem_id + from fact_problem_responses + ), + fact_problem_engagement_per_subsection as ( + select + attempts.org as org, + attempts.course_key as course_key, + problems.section_with_name as section_with_name, + problems.subsection_with_name as subsection_with_name, + problems.item_count as item_count, + attempts.actor_id as actor_id, + attempts.problem_id as problem_id, + problems.subsection_block_id as subsection_block_id + from attempted_subsection_problems attempts + join + {{ ref("int_problems_per_subsection") }} problems + on ( + attempts.org = problems.org + and attempts.course_key = problems.course_key + and attempts.section_number = problems.section_number + and attempts.subsection_number = problems.subsection_number + ) + ), + subsection_counts as ( + select + org, + course_key, + section_with_name, + subsection_with_name, + actor_id, + item_count, + count(distinct problem_id) as problems_attempted, + case + when problems_attempted = 0 + then 'No problems attempted yet' + when problems_attempted = item_count + then 'All problems attempted' + else 'At least one problem attempted' + end as engagement_level, + subsection_block_id + from fact_problem_engagement_per_subsection + group by + org, + course_key, + section_with_name, + subsection_with_name, + actor_id, + item_count, + subsection_block_id + ) + +select org, course_key, actor_id, subsection_block_id, engagement_level +from subsection_counts diff --git a/models/video/schema.yml b/models/video/schema.yml index e1726789..e9fa7740 100644 --- a/models/video/schema.yml +++ b/models/video/schema.yml @@ -194,6 +194,12 @@ models: - name: course_order data_type: Int32 description: "The sort order of this block in the course across all course blocks" + - name: subsection_block_id + data_type: string + description: "The unique identifier for the subsection block" + - name: section_block_id + data_type: string + description: "The unique identifier for the section block" - name: fact_video_engagement_per_subsection description: "A dataset with one record representing a video viewed by a learner and the section and subsection that video belongs to"