Skip to content

Commit

Permalink
Merge pull request #629 from mapswipe/feature/time-spent-threshold
Browse files Browse the repository at this point in the history
Add threshold for time-spent calculation
  • Loading branch information
Hagellach37 committed Dec 23, 2022
2 parents bb16ef8 + 04b498d commit 06ec554
Showing 1 changed file with 65 additions and 42 deletions.
107 changes: 65 additions & 42 deletions django/apps/aggregated/management/commands/update_aggregated_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,43 @@
AggregatedUserGroupStatData,
AggregatedUserStatData,
)
from apps.existing_database.models import MappingSession
from apps.existing_database.models import MappingSession, Project
from django.core.management.base import BaseCommand
from django.db import connection, models, transaction
from django.utils import timezone

# Factor calculated by @Hagellach37
# For defining the threshold for outliers using `95_percent`
# Used by TASK_GROUP_METADATA_QUERY
# |project_type|median|95_percent|avg|
# |------------|------|----------|---|
# |1|00:00:00.208768|00:00:01.398161|00:00:28.951521|
# |2|00:00:01.330297|00:00:06.076814|00:00:03.481192|
# |3|00:00:02.092967|00:00:11.271081|00:00:06.045881|
TASK_GROUP_METADATA_QUERY = f"""
SELECT
project_id,
group_id,
SUM(
ST_Area(geom::geography(GEOMETRY,4326)) / 1000000
) as total_task_group_area, -- sqkm
(
CASE
-- Using 95_percent value of existing data for each project_type
WHEN UG.project_type = {Project.Type.BUILD_AREA.value} THEN 1.4
WHEN UG.project_type = {Project.Type.COMPLETENESS.value} THEN 1.4
WHEN UG.project_type = {Project.Type.CHANGE_DETECTION.value} THEN 11.2
-- FOOTPRINT: Not calculated right now
WHEN UG.project_type = {Project.Type.FOOTPRINT.value} THEN 6.1
ELSE 1
END
) * COUNT(*) as time_spent_max_allowed
FROM tasks T
INNER JOIN used_task_groups UG USING (project_id, group_id)
GROUP BY project_id, project_type, group_id
"""


UPDATE_USER_DATA_SQL = f"""
INSERT INTO "{AggregatedUserStatData._meta.db_table}" (
project_id,
Expand All @@ -26,49 +58,45 @@
WITH used_task_groups as (
SELECT
MS.project_id,
P.project_type,
MS.group_id
FROM mapping_sessions MS
INNER JOIN projects P USING (project_id)
WHERE
MS.start_time >= %(from_date)s and MS.start_time < %(until_date)s
AND P.project_type != 2 -- Skip for footprint type missions
GROUP BY project_id, group_id -- To get unique
-- Skip for footprint type missions
P.project_type != {Project.Type.FOOTPRINT.value}
AND MS.start_time >= %(from_date)s
AND MS.start_time < %(until_date)s
GROUP BY project_id, project_type, group_id -- To get unique
),
-- Calculated area by task_groups
task_group_area_data as (
SELECT
project_id,
group_id,
SUM(
ST_Area(geom::geography(GEOMETRY,4326)) / 1000000
) as total_task_group_area -- sqkm
FROM tasks T
INNER JOIN used_task_groups UG USING (project_id, group_id)
GROUP BY project_id, group_id
),
task_group_metadata as ({TASK_GROUP_METADATA_QUERY}),
-- Aggregate data by user
user_data as (
SELECT
MS.project_id,
MS.group_id,
MS.user_id,
MS.start_time::date as timestamp_date,
MS.start_time,
MS.end_time,
LEAST(
EXTRACT(EPOCH FROM (MS.end_time - MS.start_time)),
TG.time_spent_max_allowed
) as time_spent_sec,
MS.items_count as task_count,
Coalesce(TG.total_task_group_area, 0) as area_swiped
FROM mapping_sessions MS
LEFT JOIN task_group_area_data TG USING (project_id, group_id)
LEFT JOIN task_group_metadata TG USING (project_id, group_id)
WHERE
MS.start_time >= %(from_date)s and MS.start_time < %(until_date)s
MS.start_time >= %(from_date)s
AND MS.start_time < %(until_date)s
),
-- Additional aggregate by timestamp_date
user_agg_data as (
SELECT
project_id,
user_id,
timestamp_date,
COALESCE(SUM(EXTRACT(EPOCH FROM (end_time - start_time))), 0) as total_time,
COALESCE(SUM(time_spent_sec), 0) as total_time,
COALESCE(SUM(task_count), 0) as task_count,
COALESCE(SUM(area_swiped), 0) as area_swiped
FROM user_data
Expand All @@ -92,7 +120,6 @@
swipes = EXCLUDED.swipes;
"""


UPDATE_USER_GROUP_SQL = f"""
INSERT INTO "{AggregatedUserGroupStatData._meta.db_table}" (
project_id,
Expand All @@ -109,27 +136,20 @@
WITH used_task_groups as (
SELECT
MS.project_id,
P.project_type,
MS.group_id
From mapping_sessions_user_groups MSUR
FROM mapping_sessions_user_groups MSUR
INNER JOIN mapping_sessions MS USING (mapping_session_id)
INNER JOIN projects P USING (project_id)
WHERE
MS.start_time >= %(from_date)s and MS.start_time < %(until_date)s
AND P.project_type != 2 -- Skip for footprint type missions
GROUP BY project_id, group_id -- To get unique
-- Skip for footprint type missions
P.project_type != {Project.Type.FOOTPRINT.value}
AND MS.start_time >= %(from_date)s
AND MS.start_time < %(until_date)s
GROUP BY project_id, project_type, group_id -- To get unique
),
-- Calculated area by task_groups
task_group_area_data as (
SELECT
project_id,
group_id,
SUM(
ST_Area(geom::geography(GEOMETRY,4326)) / 1000000
) as total_task_group_area -- sqkm
FROM tasks T
INNER JOIN used_task_groups UG USING (project_id, group_id)
GROUP BY project_id, group_id
),
task_group_metadata as ({TASK_GROUP_METADATA_QUERY}),
-- Aggregate data by user-group
user_group_data as (
SELECT
Expand All @@ -138,15 +158,18 @@
MS.user_id,
MSUR.user_group_id,
MS.start_time::date as timestamp_date,
MS.start_time as start_time,
MS.end_time as end_time,
LEAST(
EXTRACT(EPOCH FROM (MS.end_time - MS.start_time)),
TG.time_spent_max_allowed
) as time_spent_sec,
MS.items_count as task_count,
Coalesce(TG.total_task_group_area, 0) as area_swiped
From mapping_sessions_user_groups MSUR
FROM mapping_sessions_user_groups MSUR
INNER JOIN mapping_sessions MS USING (mapping_session_id)
LEFT JOIN task_group_area_data TG USING (project_id, group_id)
LEFT JOIN task_group_metadata TG USING (project_id, group_id)
WHERE
MS.start_time >= %(from_date)s and MS.start_time < %(until_date)s
MS.start_time >= %(from_date)s
AND MS.start_time < %(until_date)s
),
-- Additional aggregate by timestamp_date
user_group_agg_data as (
Expand All @@ -155,7 +178,7 @@
user_id,
user_group_id,
timestamp_date,
COALESCE(SUM(EXTRACT(EPOCH FROM (end_time - start_time))), 0) as total_time,
COALESCE(SUM(time_spent_sec), 0) as total_time,
COALESCE(SUM(task_count), 0) as task_count,
COALESCE(SUM(area_swiped), 0) as area_swiped
FROM user_group_data
Expand Down

0 comments on commit 06ec554

Please sign in to comment.