In [None]:
from sqlalchemy import create_engine, text

import getpass
import sqlalchemy as sa

## Settings

Replace these parameters

In [None]:
CDM_SCHEMA = ''  # 'erspc'
RESULTS_SCHEMA = ''  # 'erspc_results'
CDM_DATA_PATH = ''  # '../data/erspc_omop_cdm.sql'

# Connect to the db

In [None]:
server = 'postgresql'
port = '5432'
db = 'ohdsi'
engine = create_engine(f'postgresql://{input("User:")}:{getpass.getpass("Password:")}@{server}:{port}/{db}')

In [None]:
#sa.inspect(engine).get_schema_names()

In [None]:
#sa.inspect(engine).get_table_names('erspc_results')

In [None]:
if CDM_SCHEMA in sa.inspect(engine).get_schema_names():
    print('WARNING: the schema already exists. Proceed with care')

## Create CDM

In [None]:
con = engine.connect()

In [None]:
# con.execute('DROP SCHEMA %s CASCADE;' % CDM_SCHEMA)
con.execute('CREATE SCHEMA %s;' % CDM_SCHEMA)

In [None]:
con.execute('SET search_path TO %s;' % CDM_SCHEMA)
with open('pioneer_omop_cdm/OMOP CDM postgresql v6_0_onco_modified ddl.sql') as f_ddl:
    con.execute(f_ddl.read())

## Load data

In [None]:
from extract_inserts_from_sql_dump import extract_inserts

In [None]:
with open(CDM_DATA_PATH) as f_sql:
    queries = extract_inserts(f_sql)

In [None]:
con.execute('SET search_path TO %s;' % CDM_SCHEMA)
for query in queries:
    if 'visit_occurrence_source_value' in query:
        # Quick and dirty fix for visit occurrence source value. 
        # TODO: this field should not be exported (removed in final ETL step)
        import re
        query = query.replace(', visit_occurrence_source_value', '')
        if query.startswith('INSERT INTO observation'):
            query, n = re.subn(r'\(((?:.+?,){13}).+?,((?:.+?,){7})', r'(\1\2', query)
        else:
            query, n = re.subn(r", ?'[^']+?'\)", ')', query)
    try:
        r = con.execute(query)
        print(query[:20], ' ', r.rowcount)
    except Exception as e:
        print('Failed insert: %s - %s' % (query[:20], exc.args[0]))
print('Inserts done')

## Apply constraints, indexes and add vocab views

In [None]:
con.execute('SET search_path TO %s;' % CDM_SCHEMA)
with open('pioneer_omop_cdm/OMOP CDM postgresql v6_0_dev pk indexes.sql') as f_ind:
    con.execute(f_ind.read())

In [None]:
with open('pioneer_omop_cdm/OMOP CDM postgresql v6_0_dev constraints.sql') as f_constr:
    con.execute(f_constr.read())

In [None]:
con.execute('SET search_path TO %s;' % CDM_SCHEMA)
with open('other/vocab_view.sql') as f_vocab_view:
    con.execute(f_vocab_view.read())

## Setup results schema

In [None]:
con = engine.connect()

In [None]:
# con.execute('DROP SCHEMA %s CASCADE;' % RESULTS_SCHEMA)
con.execute('CREATE SCHEMA %s;' % RESULTS_SCHEMA)

In [None]:
con.execute('SET search_path TO %s;' % RESULTS_SCHEMA)
with open('other/results_ddl_2.7.4.sql') as f_results:
    con.execute(text(f_results.read()).execution_options(auto_commit = True))

In [None]:
con.execute('CREATE OR REPLACE VIEW concept_hierarchy AS (SELECT * FROM vocab.concept_hierarchy);')

Somehow Achilles doesn't create the results_dist table, copy from other results schema

In [None]:
con.execute('CREATE TABLE %s.achilles_results_dist AS (SELECT * FROM synpuf_1k_results.achilles_results_dist) WITH NO DATA' % RESULTS_SCHEMA)

In [None]:
con.close()

## Source Daimon

_manual step_

To add in Atlas->Configuration

Note: had to 'Clear Configuration Cash' to make it work. Gettign EntityExsistsErrors before that

## Run Achilles (R)

Run Achilles R script

# End of new data source setup steps

========================================================

# One time scripts. Do NOT execute again

## Add CPT4 to concepts
Only needed once (run on 2020-02-16)

In [None]:
con = engine.connect()

In [None]:
import csv

In [None]:
with open('../data/CONCEPT_CPT4_WITH_CONCEPT_NAMES_20191223.csv') as f:
    cpt4_concepts = csv.DictReader(f,delimiter='\t')
    for cpt4 in cpt4_concepts:
        if "'" in cpt4['concept_name']:
            cpt4['concept_name'] = cpt4['concept_name'].replace("'", "''")
            
        query = "insert into vocab.concept VALUES ('%s');" % "','".join(cpt4.values())
        query = query.replace(",''", ',NULL')
        try:
            r = con.execute(text(query))
        except Exception as e:
            if 'xpk_concept' in e.args[0]:
                continue
            print(e)
            break

In [None]:
con.execute("select count(*) from vocab.concept where vocabulary_id = 'CPT4'").fetchone() # 15935

In [None]:
con.close()

## Concept Hierarchy to vocab table

In [None]:
VOCABULARY_SCHEMA = 'vocab'

In [None]:
con = engine.connect()

In [None]:
con.execute('SET search_path TO %s;' % VOCABULARY_SCHEMA)
with open('other/concept_hierarchy_2.7.4.sql') as f_ch:
    query = ''
    for line in f_ch:
        query += line
        if ';' in line:
            print(query[:100])
            con.execute(text(query).execution_options(auto_commit = True))
            query = ''

In [None]:
con.execute('SET search_path TO %s;' % RESULTS_SCHEMA)
con.execute('CREATE OR REPLACE VIEW concept_hierarchy AS (SELECT * FROM vocab.concept_hierarchy);')

In [None]:
con.close()