In [1]:
import pandas as pd
import requests
from urllib.parse import urlencode
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from sqlalchemy import text
from kf_model_omop.factory import scoped_session

In [7]:
API_URL = 'http://athena.ohdsi.org/api/v1/concepts'
query_params = {'query': 'Asian'.lower(),
               'standardConcept': 'Standard',
               'domain': 'Race'}
query_string = urlencode(query_params)
prefer_standard = False

concept = None

response = requests.get(f'{API_URL}?{query_string}')
athena_results = []
if response.status_code == 200:
    athena_results = response.json()['content']
else:
    print(response.text())
    
# Fuzzy text search
if athena_results:
    concept = athena_results[0]
    
    fuzzy_results = process.extract(query_params['query'], athena_results, scorer=fuzz.token_sort_ratio)
    fuzzy_results_df = pd.DataFrame([r[0] for r in fuzzy_results])
    
    # Apply filters
    filtered_df = fuzzy_results_df[fuzzy_results_df['invalidReason'] == 'Valid']
    if prefer_standard:
        filters = [('standardConcept', 'Standard')]
        for f in filters:
            if not filtered_df.empty:
                filtered_df = filtered_df[filtered_df[f[0]] == f[1]]

    # Choose best result
    if not filtered_df.empty:
        concept = filtered_df.iloc[0].to_dict()
    elif not fuzzy_results_df.empty:
        concept = fuzzy_results_df.iloc[0].to_dict()

concept

{'className': 'Race',
 'code': '2',
 'domain': 'Race',
 'id': 8515,
 'invalidReason': 'Valid',
 'name': 'Asian',
 'standardConcept': 'Standard',
 'vocabulary': 'Race'}

In [8]:
from common.target_api_config import schema
from common.concept_schema import OMOP

id_model_map = {}
for model_name, model_schema in schema.items():
    if model_schema['_primary_key']:
        primary_key_name = list(model_schema['_primary_key'].keys())[0]
        id_model_map[primary_key_name] = model_name

id_model_map

{'care_site_id': 'CareSite',
 'concept_id': 'Concept',
 'concept_class_id': 'ConceptClass',
 'condition_era_id': 'ConditionEra',
 'condition_occurrence_id': 'ConditionOccurrence',
 'cost_id': 'Cost',
 'device_exposure_id': 'DeviceExposure',
 'domain_id': 'Domain',
 'dose_era_id': 'DoseEra',
 'drug_era_id': 'DrugEra',
 'drug_exposure_id': 'DrugExposure',
 'location_id': 'Location',
 'location_history_id': 'LocationHistory',
 'measurement_id': 'Measurement',
 'note_id': 'Note',
 'note_nlp_id': 'NoteNlp',
 'observation_id': 'Observation',
 'observation_period_id': 'ObservationPeriod',
 'payer_plan_period_id': 'PayerPlanPeriod',
 'person_id': 'Person',
 'procedure_occurrence_id': 'ProcedureOccurrence',
 'provider_id': 'Provider',
 'relationship_id': 'Relationship',
 'source_code': 'SourceToConceptMap',
 'specimen_id': 'Speciman',
 'survey_conduct_id': 'SurveyConduct',
 'visit_detail_id': 'VisitDetail',
 'visit_occurrence_id': 'VisitOccurrence',
 'vocabulary_id': 'Vocabulary'}