Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: move response status to a MV #91

Merged
merged 14 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ on:
branches:
- main
pull_request:
branches:
- main

env:
DBT_PROFILES_DIR: ./.github/
Expand Down
4 changes: 3 additions & 1 deletion macros/items_per_subsection.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
subsection_blocks.display_name_with_location as subsection_with_name,
ips.course_order as course_order,
ips.graded as graded,
ips.item_count as item_count
ips.item_count as item_count,
subsection_blocks.block_id as subsection_block_id,
section_blocks.block_id as section_block_id
from items_per_subsection ips
left join
{{ ref("dim_course_blocks") }} section_blocks
Expand Down
111 changes: 28 additions & 83 deletions models/problems/fact_problem_engagement.sql
Original file line number Diff line number Diff line change
@@ -1,104 +1,49 @@
with
subsection_counts as (
subsection_engagement as (
select
org,
course_key,
course_run,
section_with_name,
subsection_with_name,
actor_id,
item_count,
count(distinct problem_id) as problems_attempted,
case
when problems_attempted = 0
then 'No problems attempted yet'
when problems_attempted = item_count
then 'All problems attempted'
else 'At least one problem attempted'
end as engagement_level,
username,
name,
email
from {{ ref("fact_problem_engagement_per_subsection") }}
group by
org,
course_key,
course_run,
section_with_name,
subsection_with_name,
'subsection' as content_level,
actor_id,
item_count,
username,
name,
email
subsection_block_id as block_id,
engagement_level as section_subsection_problem_engagement
from {{ ref("subsection_problem_engagement") }}
),
section_counts as (
section_engagement as (
select
org,
course_key,
course_run,
section_with_name,
actor_id,
sum(item_count) as item_count,
sum(problems_attempted) as problems_attempted,
case
when problems_attempted = 0
then 'No problems attempted yet'
when problems_attempted = item_count
then 'All problems attempted'
else 'At least one problem attempted'
end as engagement_level,
username,
name,
email
from subsection_counts
group by
org,
course_key,
course_run,
section_with_name,
'section' as content_level,
actor_id,
username,
name,
email
section_block_id as block_id,
engagement_level as section_subsection_problem_engagement
from {{ ref("section_problem_engagement") }}
),
problem_engagement as (
select
org,
course_key,
course_run,
subsection_with_name as section_subsection_name,
'subsection' as content_level,
actor_id as actor_id,
engagement_level as section_subsection_problem_engagement,
username,
name,
email
from subsection_counts
select *
from subsection_engagement
union all
select
org,
course_key,
course_run,
section_with_name as section_subsection_name,
'section' as content_level,
actor_id as actor_id,
engagement_level as section_subsection_problem_engagement,
username,
name,
email
from section_counts
select *
from section_engagement
)

select
pe.org as org,
pe.course_key as course_key,
pe.course_run as course_run,
pe.section_subsection_name as section_subsection_name,
course_blocks.course_run as course_run,
course_blocks.display_name_with_location as section_subsection_name,
pe.content_level as content_level,
pe.actor_id as actor_id,
pe.section_subsection_problem_engagement as section_subsection_problem_engagement,
pe.username as username,
pe.name as name,
pe.email as email
users.username as username,
users.name as name,
users.email as email
from problem_engagement pe
join
{{ ref("dim_course_blocks") }} course_blocks
on (
pe.org = course_blocks.org
and pe.course_key = course_blocks.course_key
and pe.block_id = course_blocks.block_id
)
left outer join
{{ ref("dim_user_pii") }} users on toUUID(pe.actor_id) = users.external_user_id
16 changes: 7 additions & 9 deletions models/problems/fact_problem_engagement_per_subsection.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ with
course_order as course_order,
graded,
actor_id,
problem_id
problem_id,
username,
name,
email
from {{ ref("fact_problem_responses") }}
)

select
attempts.attempted_on as attempted_on,
attempts.org as org,
attempts.course_key as course_key,
attempts.course_run as course_run,
Expand All @@ -25,11 +27,9 @@ select
problems.item_count as item_count,
attempts.actor_id as actor_id,
attempts.problem_id as problem_id,
attempts.course_order as course_order,
attempts.graded as graded,
users.username as username,
users.name as name,
users.email as email
attempts.username as username,
attempts.name as name,
attempts.email as email
from attempted_subsection_problems attempts
join
{{ ref("int_problems_per_subsection") }} problems
Expand All @@ -39,5 +39,3 @@ join
and attempts.section_number = problems.section_number
and attempts.subsection_number = problems.subsection_number
)
left outer join
{{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id
2 changes: 1 addition & 1 deletion models/problems/fact_problem_responses.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ with
org,
course_key,
object_id,
{{ get_problem_id("object_id") }} as problem_id,
problem_id,
actor_id,
responses,
success,
Expand Down
2 changes: 1 addition & 1 deletion models/problems/int_problem_hints.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ with
emission_time,
org,
course_key,
{{ get_problem_id("object_id") }} as problem_id,
problem_id,
actor_id,
case
when object_id like '%/hint%'
Expand Down
76 changes: 44 additions & 32 deletions models/problems/int_problem_results.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,9 @@
-- this will be used to pick the xAPI event corresponding to that submission
with
successful_responses as (
select
org,
course_key,
problem_id,
actor_id,
min(emission_time) as first_success_at
from {{ ref("fact_problem_responses") }}
where
-- clickhouse throws an error when shortening this to `where success`
success = true
group by org, course_key, problem_id, actor_id
select org, course_key, problem_id, actor_id, first_success_at
from {{ ref("responses") }}
where isNotNull(first_success_at)
),
-- for all learners who did not submit a successful response,
-- find the timestamp of the most recent unsuccessful response
Expand All @@ -25,8 +17,8 @@ with
course_key,
problem_id,
actor_id,
max(emission_time) as last_response_at
from {{ ref("fact_problem_responses") }}
max(last_attempt_at) as last_attempt_at
from {{ ref("responses") }}
where actor_id not in (select distinct actor_id from successful_responses)
group by org, course_key, problem_id, actor_id
),
Expand All @@ -35,26 +27,46 @@ with
select org, course_key, problem_id, actor_id, first_success_at as emission_time
from successful_responses
union all
select org, course_key, problem_id, actor_id, last_response_at as emission_time
select org, course_key, problem_id, actor_id, last_attempt_at as emission_time
from unsuccessful_responses
),
full_responses as (
select
events.emission_time as emission_time,
events.org as org,
events.course_key as course_key,
events.problem_id as problem_id,
events.object_id as object_id,
events.actor_id as actor_id,
events.responses as responses,
events.success as success,
events.attempts as attempts,
events.interaction_type as interaction_type
from {{ ref("problem_events") }} events
join responses using (org, course_key, problem_id, actor_id, emission_time)
)

select
emission_time,
org,
course_key,
course_name,
course_run,
problem_id,
problem_name,
problem_name_with_location,
course_order,
problem_link,
actor_id,
responses,
success,
attempts,
graded,
interaction_type
from {{ ref("fact_problem_responses") }} problem_responses
join responses using (org, course_key, problem_id, actor_id, emission_time)
full_responses.emission_time as emission_time,
full_responses.org as org,
full_responses.course_key as course_key,
blocks.course_name as course_name,
blocks.course_run as course_run,
full_responses.problem_id as problem_id,
blocks.block_name as problem_name,
blocks.display_name_with_location as problem_name_with_location,
blocks.course_order as course_order,
{{ a_tag("full_responses.object_id", "blocks.block_name") }} as problem_link,
full_responses.actor_id as actor_id,
full_responses.responses as responses,
full_responses.success as success,
full_responses.attempts as attempts,
full_responses.interaction_type as interaction_type,
blocks.graded
from full_responses
join
{{ ref("dim_course_blocks") }} blocks
on (
full_responses.course_key = blocks.course_key
and full_responses.problem_id = blocks.block_id
)
3 changes: 2 additions & 1 deletion models/problems/problem_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ select
) as Int16
),
0
) as attempts
) as attempts,
{{ get_problem_id("object_id") }} as problem_id
from {{ ref("xapi_events_all_parsed") }}
where
verb_id in (
Expand Down
43 changes: 43 additions & 0 deletions models/problems/responses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
-- select one record per (learner, problem, course, org) tuple
-- contains either the first successful attempt
-- or the most recent unsuccessful attempt
-- find the timestamp of the earliest successful response
-- this will be used to pick the xAPI event corresponding to that submission
{{
config(
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("ReplacingMergeTree()"),
primary_key="(org, course_key, problem_id)",
order_by="(org, course_key, problem_id, actor_id)",
partition_by="toYYYYMM(emission_time)",
ttl=env_var("ASPECTS_DATA_TTL_EXPRESSION", ""),
)
}}

with
responses as (
select emission_time, org, course_key, object_id, problem_id, actor_id, success
from {{ ref("problem_events") }}
where verb_id = 'https://w3id.org/xapi/acrossx/verbs/evaluated'
),
response_status as (
select
org,
course_key,
problem_id,
actor_id,
MIN(case when success then emission_time else NULL end) as first_success_at,
MAX(emission_time) as last_attempt_at
from responses
group by org, course_key, problem_id, actor_id
)
select
org,
course_key,
problem_id,
actor_id,
first_success_at,
last_attempt_at,
coalesce(first_success_at, last_attempt_at) as emission_time
from response_status
Loading
Loading