Skip to content

Commit

Permalink
Merge pull request #91 from openedx/cag/problem-performance
Browse files Browse the repository at this point in the history
feat: move response status to a MV
feat: move problem engagement to an mv
  • Loading branch information
Ian2012 committed May 23, 2024
2 parents 9660e39 + 2948f0f commit 202f2e1
Show file tree
Hide file tree
Showing 13 changed files with 507 additions and 130 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ on:
branches:
- main
pull_request:
branches:
- main

env:
DBT_PROFILES_DIR: ./.github/
Expand Down
4 changes: 3 additions & 1 deletion macros/items_per_subsection.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
subsection_blocks.display_name_with_location as subsection_with_name,
ips.course_order as course_order,
ips.graded as graded,
ips.item_count as item_count
ips.item_count as item_count,
subsection_blocks.block_id as subsection_block_id,
section_blocks.block_id as section_block_id
from items_per_subsection ips
left join
{{ ref("dim_course_blocks") }} section_blocks
Expand Down
111 changes: 28 additions & 83 deletions models/problems/fact_problem_engagement.sql
Original file line number Diff line number Diff line change
@@ -1,104 +1,49 @@
with
subsection_counts as (
subsection_engagement as (
select
org,
course_key,
course_run,
section_with_name,
subsection_with_name,
actor_id,
item_count,
count(distinct problem_id) as problems_attempted,
case
when problems_attempted = 0
then 'No problems attempted yet'
when problems_attempted = item_count
then 'All problems attempted'
else 'At least one problem attempted'
end as engagement_level,
username,
name,
email
from {{ ref("fact_problem_engagement_per_subsection") }}
group by
org,
course_key,
course_run,
section_with_name,
subsection_with_name,
'subsection' as content_level,
actor_id,
item_count,
username,
name,
email
subsection_block_id as block_id,
engagement_level as section_subsection_problem_engagement
from {{ ref("subsection_problem_engagement") }}
),
section_counts as (
section_engagement as (
select
org,
course_key,
course_run,
section_with_name,
actor_id,
sum(item_count) as item_count,
sum(problems_attempted) as problems_attempted,
case
when problems_attempted = 0
then 'No problems attempted yet'
when problems_attempted = item_count
then 'All problems attempted'
else 'At least one problem attempted'
end as engagement_level,
username,
name,
email
from subsection_counts
group by
org,
course_key,
course_run,
section_with_name,
'section' as content_level,
actor_id,
username,
name,
email
section_block_id as block_id,
engagement_level as section_subsection_problem_engagement
from {{ ref("section_problem_engagement") }}
),
problem_engagement as (
select
org,
course_key,
course_run,
subsection_with_name as section_subsection_name,
'subsection' as content_level,
actor_id as actor_id,
engagement_level as section_subsection_problem_engagement,
username,
name,
email
from subsection_counts
select *
from subsection_engagement
union all
select
org,
course_key,
course_run,
section_with_name as section_subsection_name,
'section' as content_level,
actor_id as actor_id,
engagement_level as section_subsection_problem_engagement,
username,
name,
email
from section_counts
select *
from section_engagement
)

select
pe.org as org,
pe.course_key as course_key,
pe.course_run as course_run,
pe.section_subsection_name as section_subsection_name,
course_blocks.course_run as course_run,
course_blocks.display_name_with_location as section_subsection_name,
pe.content_level as content_level,
pe.actor_id as actor_id,
pe.section_subsection_problem_engagement as section_subsection_problem_engagement,
pe.username as username,
pe.name as name,
pe.email as email
users.username as username,
users.name as name,
users.email as email
from problem_engagement pe
join
{{ ref("dim_course_blocks") }} course_blocks
on (
pe.org = course_blocks.org
and pe.course_key = course_blocks.course_key
and pe.block_id = course_blocks.block_id
)
left outer join
{{ ref("dim_user_pii") }} users on toUUID(pe.actor_id) = users.external_user_id
16 changes: 7 additions & 9 deletions models/problems/fact_problem_engagement_per_subsection.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ with
course_order as course_order,
graded,
actor_id,
problem_id
problem_id,
username,
name,
email
from {{ ref("fact_problem_responses") }}
)

select
attempts.attempted_on as attempted_on,
attempts.org as org,
attempts.course_key as course_key,
attempts.course_run as course_run,
Expand All @@ -25,11 +27,9 @@ select
problems.item_count as item_count,
attempts.actor_id as actor_id,
attempts.problem_id as problem_id,
attempts.course_order as course_order,
attempts.graded as graded,
users.username as username,
users.name as name,
users.email as email
attempts.username as username,
attempts.name as name,
attempts.email as email
from attempted_subsection_problems attempts
join
{{ ref("int_problems_per_subsection") }} problems
Expand All @@ -39,5 +39,3 @@ join
and attempts.section_number = problems.section_number
and attempts.subsection_number = problems.subsection_number
)
left outer join
{{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id
2 changes: 1 addition & 1 deletion models/problems/fact_problem_responses.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ with
org,
course_key,
object_id,
{{ get_problem_id("object_id") }} as problem_id,
problem_id,
actor_id,
responses,
success,
Expand Down
2 changes: 1 addition & 1 deletion models/problems/int_problem_hints.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ with
emission_time,
org,
course_key,
{{ get_problem_id("object_id") }} as problem_id,
problem_id,
actor_id,
case
when object_id like '%/hint%'
Expand Down
76 changes: 44 additions & 32 deletions models/problems/int_problem_results.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,9 @@
-- this will be used to pick the xAPI event corresponding to that submission
with
successful_responses as (
select
org,
course_key,
problem_id,
actor_id,
min(emission_time) as first_success_at
from {{ ref("fact_problem_responses") }}
where
-- clickhouse throws an error when shortening this to `where success`
success = true
group by org, course_key, problem_id, actor_id
select org, course_key, problem_id, actor_id, first_success_at
from {{ ref("responses") }}
where isNotNull(first_success_at)
),
-- for all learners who did not submit a successful response,
-- find the timestamp of the most recent unsuccessful response
Expand All @@ -25,8 +17,8 @@ with
course_key,
problem_id,
actor_id,
max(emission_time) as last_response_at
from {{ ref("fact_problem_responses") }}
max(last_attempt_at) as last_attempt_at
from {{ ref("responses") }}
where actor_id not in (select distinct actor_id from successful_responses)
group by org, course_key, problem_id, actor_id
),
Expand All @@ -35,26 +27,46 @@ with
select org, course_key, problem_id, actor_id, first_success_at as emission_time
from successful_responses
union all
select org, course_key, problem_id, actor_id, last_response_at as emission_time
select org, course_key, problem_id, actor_id, last_attempt_at as emission_time
from unsuccessful_responses
),
full_responses as (
select
events.emission_time as emission_time,
events.org as org,
events.course_key as course_key,
events.problem_id as problem_id,
events.object_id as object_id,
events.actor_id as actor_id,
events.responses as responses,
events.success as success,
events.attempts as attempts,
events.interaction_type as interaction_type
from {{ ref("problem_events") }} events
join responses using (org, course_key, problem_id, actor_id, emission_time)
)

select
emission_time,
org,
course_key,
course_name,
course_run,
problem_id,
problem_name,
problem_name_with_location,
course_order,
problem_link,
actor_id,
responses,
success,
attempts,
graded,
interaction_type
from {{ ref("fact_problem_responses") }} problem_responses
join responses using (org, course_key, problem_id, actor_id, emission_time)
full_responses.emission_time as emission_time,
full_responses.org as org,
full_responses.course_key as course_key,
blocks.course_name as course_name,
blocks.course_run as course_run,
full_responses.problem_id as problem_id,
blocks.block_name as problem_name,
blocks.display_name_with_location as problem_name_with_location,
blocks.course_order as course_order,
{{ a_tag("full_responses.object_id", "blocks.block_name") }} as problem_link,
full_responses.actor_id as actor_id,
full_responses.responses as responses,
full_responses.success as success,
full_responses.attempts as attempts,
full_responses.interaction_type as interaction_type,
blocks.graded
from full_responses
join
{{ ref("dim_course_blocks") }} blocks
on (
full_responses.course_key = blocks.course_key
and full_responses.problem_id = blocks.block_id
)
3 changes: 2 additions & 1 deletion models/problems/problem_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ select
) as Int16
),
0
) as attempts
) as attempts,
{{ get_problem_id("object_id") }} as problem_id
from {{ ref("xapi_events_all_parsed") }}
where
verb_id in (
Expand Down
43 changes: 43 additions & 0 deletions models/problems/responses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
-- select one record per (learner, problem, course, org) tuple
-- contains either the first successful attempt
-- or the most recent unsuccessful attempt
-- find the timestamp of the earliest successful response
-- this will be used to pick the xAPI event corresponding to that submission
{{
config(
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("ReplacingMergeTree()"),
primary_key="(org, course_key, problem_id)",
order_by="(org, course_key, problem_id, actor_id)",
partition_by="toYYYYMM(emission_time)",
ttl=env_var("ASPECTS_DATA_TTL_EXPRESSION", ""),
)
}}

with
responses as (
select emission_time, org, course_key, object_id, problem_id, actor_id, success
from {{ ref("problem_events") }}
where verb_id = 'https://w3id.org/xapi/acrossx/verbs/evaluated'
),
response_status as (
select
org,
course_key,
problem_id,
actor_id,
MIN(case when success then emission_time else NULL end) as first_success_at,
MAX(emission_time) as last_attempt_at
from responses
group by org, course_key, problem_id, actor_id
)
select
org,
course_key,
problem_id,
actor_id,
first_success_at,
last_attempt_at,
coalesce(first_success_at, last_attempt_at) as emission_time
from response_status

0 comments on commit 202f2e1

Please sign in to comment.