-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #608 from mapswipe/transfer-results-to-new-schema
add script to transfer results to new schema
- Loading branch information
Showing
2 changed files
with
94 additions
and
0 deletions.
There are no files selected for viewing
66 changes: 66 additions & 0 deletions
66
mapswipe_workers/python_scripts/results_to_mapping_sessions_results.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import sys | ||
|
||
import pandas as pd | ||
|
||
from mapswipe_workers import auth | ||
from mapswipe_workers.definitions import logger | ||
|
||
|
||
def copy_results_batch(first_timestamp, last_timestamp): | ||
logger.info( | ||
f"Start process for : ms.start_time>={first_timestamp} " | ||
f"and ms.start_time<{last_timestamp}" | ||
) | ||
p_con = auth.postgresDB() | ||
query = """ | ||
-- create table with results for given time span | ||
drop table if exists results_batch; | ||
create table results_batch as | ||
select r.* | ||
from mapping_sessions ms, results r | ||
where | ||
ms.start_time >= %(first_timestamp)s | ||
and ms.start_time < %(last_timestamp)s | ||
and ms.project_id = r.project_id | ||
and ms.group_id = r.group_id | ||
and ms.user_id = r.user_id; | ||
insert into mapping_sessions_results | ||
( | ||
select | ||
m.mapping_session_id | ||
,r.task_id | ||
,r."result" | ||
from results_batch r, mapping_sessions m | ||
where | ||
r.project_id = m.project_id and | ||
r.group_id = m.group_id and | ||
r.user_id = m.user_id | ||
) | ||
on conflict do nothing; | ||
""" | ||
p_con.query( | ||
query, {"first_timestamp": first_timestamp, "last_timestamp": last_timestamp} | ||
) | ||
logger.info( | ||
f"Finished process for : ms.start_time >= {first_timestamp} " | ||
f"and ms.start_time < {last_timestamp}" | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
"""Use this command to run in docker container. | ||
docker-compose run -d mapswipe_workers_creation python3 python_scripts/results_to_mapping_sessions_results.py "2016-01-01" "2022-10-01" # noqa | ||
""" | ||
min_timestamp = sys.argv[1] | ||
max_timestamp = sys.argv[2] | ||
timestamps_list = ( | ||
pd.date_range(min_timestamp, max_timestamp, freq="MS") | ||
.strftime("%Y-%m-%d") | ||
.tolist() | ||
) | ||
|
||
for i in range(0, len(timestamps_list) - 1): | ||
first_timestamp = timestamps_list[i] | ||
last_timestamp = timestamps_list[i + 1] | ||
copy_results_batch(first_timestamp, last_timestamp) | ||
logger.info(f"progress: {i+1}/{len(timestamps_list) - 1}") |
28 changes: 28 additions & 0 deletions
28
postgres/scripts/v2_to_v3/04b_fix_timestamps_for_mapping_sessions.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
/* | ||
* This script takes the data from the 'results' table. | ||
* Results submitted before 2019-09-30 11:30:02.823 | ||
* do NOT provide the "start_time" and "end_time" attribute, | ||
* but only use a single attribute "timestamp". | ||
* During the initial transfer this has not been considered. | ||
*/ | ||
set search_path = 'public'; | ||
|
||
insert into mapping_sessions | ||
( | ||
select | ||
project_id | ||
,group_id | ||
,user_id | ||
,nextval('mapping_sessions_mapping_session_id_seq') as mapping_session_id | ||
,Min(timestamp) - INTERVAL '2 Minutes' as start_time | ||
,Max(timestamp) as end_time | ||
,count(*) as items_count | ||
from results | ||
where start_time is null and end_time is null | ||
group by project_id, group_id, user_id | ||
) | ||
on conflict (project_id, group_id, user_id) | ||
DO UPDATE SET | ||
start_time = EXCLUDED.start_time, | ||
end_time = EXCLUDED.end_time; | ||
|