## Read cohorts workbook
Assumptions:
 - First column is an id, coding the type of cohort. Starts with T for target, O for outcome or S for strata.
 - Second column is the cohort name
 - A column exists with the link to the cohort id on PIONEER Atlas

In [14]:
import xlrd
import re

In [3]:
book = xlrd.open_workbook('OHDSI studyathon phenotype tracker.xlsx')

In [4]:
sheet = book.sheet_by_index(0)

In [33]:
PHENOTYPE_ID_PATTERN = re.compile(r'([TOS])(.+)')
COHORT_LINK_PATTERN = re.compile(r'/cohortdefinition/(\d+)')

In [36]:
def find_cohort_atlas_id(row):
    for col in row:
        if not col.value:
            continue
        cohort_link_match = COHORT_LINK_PATTERN.search(col.value)
        if cohort_link_match:
            return cohort_link_match.group(1)

In [43]:
cohorts = {
    'T': [],
    'O': [],
    'S': []
}
for rx in range(sheet.nrows):
    row = sheet.row(rx)
    id_match = PHENOTYPE_ID_PATTERN.search(row[0].value)
    if not id_match:
        continue

    cohort_id = id_match.group(0)
    cohort_type = id_match.group(1)
    cohort_name = f'{row[1].value}'
    cohort_atlas_id = find_cohort_atlas_id(row)
    cohort_atlas_name = f'[PIONEER {cohort_id}] {row[1].value}'

    cohorts[cohort_type].append({
        'name': cohort_name,
        'atlasName': cohort_atlas_name,
        'atlasId': cohort_atlas_id,
        'cohortId': cohort_id
    })

In [47]:
[(k,len(v)) for k,v in cohorts.items()]

[('T', 18), ('O', 13), ('S', 38)]

## Write cohorts csv
Write the 'CohortsToCreate' csv files for target, outcome and strata. 
Repeat for diagnostics.

In [48]:
import csv

In [50]:
def write_cohorts_to_create(filename, rows):
    with open(filename, 'w') as f_out:
        writer = csv.DictWriter(f_out, fieldnames=['name', 'atlasName', 'atlasId', 'cohortId'])
        writer.writeheader()
        writer.writerows(rows)

In [51]:
write_cohorts_to_create('settings/CohortsToCreateTarget.csv', cohorts['T'])

In [52]:
write_cohorts_to_create('settings/CohortsToCreateOutcome.csv', cohorts['O'])

In [53]:
write_cohorts_to_create('settings/CohortsToCreateStrata.csv', cohorts['S'])

In [54]:
write_cohorts_to_create('settings/diagnostics/CohortsToCreateTarget.csv', cohorts['T'])

In [55]:
write_cohorts_to_create('settings/diagnostics/CohortsToCreateOutcome.csv', cohorts['O'])

In [56]:
write_cohorts_to_create('settings//diagnostics/CohortsToCreateStrata.csv', cohorts['S'])