# ATC Classification Mappings

In [9]:
#imports 

import re 
import numpy as np
import pandas as pd

### Build ATC Mapping
The following code is the function to build the ATC Mapping for different levels. This mapping allows easy identification of ancestor-descendant relationships. A pre-built mapping of all drugs found in AEOLUS/FAERS is provided in the "ATC_MAPPING.csv" file. However, this code can be used to create a new map of any list of drugs given their drug concept IDS. 

In [10]:
# BUILD ATC MAP 

# map different ATC levels to create ancestor-descendant relationships
# writes to "ATC_MAPPING.csv" file in ROOT directory
# pass any numpy int array of ATC drug concept IDs here as concept_ids parameter
# currently builds a map of all IDs found in AEOLUS
# pass str path of files location to ROOT
# ignore 'low memory' error if thrown
def build_ATC_map(ROOT, concept_ids):

    #reads ATC data provided by OHDSI
    ATC_ancestor = pd.read_csv(ROOT + "CONCEPT_ANCESTOR.csv", delimiter="\t")
    ATC_concept = pd.read_csv(ROOT + "CONCEPT.csv", delimiter="\t")

    #set up ATC mapping dataframe
    descendant_concept_id = concept_ids.unique()
    columns = [
        'ATC 1st id', 'ATC 1st name', 
        'ATC 2nd id', 'ATC 2nd name',
        'ATC 3rd id', 'ATC 3rd name', 
        'ATC 4th id', 'ATC 4th name',
        'ATC 5th id', 'ATC 5th name']
    ATC_mapping = pd.DataFrame(columns=columns, index=descendant_concept_id)

    #add values to ATC mapping
    for drugID in ATC_mapping.index.values:
        ancestors = ATC_ancestor.query('descendant_concept_id==@drugID')['ancestor_concept_id'].values
        result = ATC_concept[ATC_concept['concept_id'].isin(ancestors)]\
                                                        .query('vocabulary_id=="ATC"')\
                                                        .set_index('concept_class_id')
        for classid, row in result.iterrows():
            idCol = str(classid) + ' id'
            nameCol = str(classid) + ' name'
            ATC_mapping.at[drugID, idCol] = row.concept_id
            ATC_mapping.at[drugID, nameCol] = row.concept_name

    #reformat
    columns = [
        'ATC_1_id', 'ATC_1_name', 
        'ATC_2_id', 'ATC_2_name', 
        'ATC_3_id', 'ATC_3_name', 
        'ATC_4_id', 'ATC_4_name', 
        'ATC_5_id', 'ATC_5_name']
    ATC_mapping.columns = columns
    ATC_mapping = ATC_mapping.reset_index().rename(columns={'index': 'drug_concept_id'})

    #write to file
    ATC_mapping.to_csv(ROOT + "ATC_MAPPING.csv")
    
    return

ROOT = "ATC Vocabulary/"
aeolus_ID = pd.read_csv("Reference Data/AEOLUS_DRUG_IDNAME.csv")['drug_concept_id']

#build_ATC_map(ROOT, aeolus_ID)

### Use ATC Mapping

The following code provides various functions to map, assign and group your data by ATC levels given drug concept names.

In [11]:
# ASSIGN ATC LEVEL 

# Provide dataframe with column or index of 'drug_concept_name' and ATC Classification level 
# Returns same dataframe with added columns of NAME and ID for specified  Level
def assignATC(df, ATC_level="3"):
    
    #set up
    ROOT = "ATC Vocabulary/"
    ATC_mapping = pd.read_csv(ROOT + "ATC_MAPPING.csv")
    aeolus_IDNAME = pd.read_csv("Reference Data/AEOLUS_DRUG_IDNAME.csv")

    if (type(ATC_level)!=str):
        ATC_level = str(ATC_level)
    
    ATC_col = "ATC_" + ATC_level + "_name"
    df = df.assign(newCol="").rename(columns={'newCol': ATC_col})

    if (df.index.name == 'drug_concept_name'):
        for drugName, row in df.iterrows():
            drugID = aeolus_IDNAME.query('drug_concept_name==@drugName')['drug_concept_id'].values
            ATC_val = ATC_mapping.query('drug_concept_id==@drugID')[ATC_col].values[0]
            df.at[drugName, ATC_col] = ATC_val
    else:
        for idx, row in df.iterrows():
            drugName = row.drug_concept_name
            drugID = aeolus_IDNAME.query('drug_concept_name==@drugName')['drug_concept_id'].values
            ATC_val = ATC_mapping.query('drug_concept_id==@drugID')[ATC_col].values[0]
            df.at[idx, ATC_col] = ATC_val
    return df


In [12]:
# GROUP BY ATC level (& any secondary column) 

# Provide dataframe with column or index of 'drug_concept_name' 
# Input dataframe need not have ATC information ~ just a column of drug names is sufficient
# Defaults to ATC_level 3, but can be set between 1 and 5
# Defaults to returning all counts for give ATC level, but a minimum count can be specified
# Returns dataframe of ATC level drug IDs and NAMEs with counts found in input dataframe
def groupbyATC(df, ATC_level="3", minimum_count=0, secondary=""):
    
    #set up
    ROOT = "ATC Vocabulary/"
    ATC_mapping = pd.read_csv(ROOT + "ATC_MAPPING.csv")
    aeolus_IDNAME = pd.read_csv("Reference Data/AEOLUS_DRUG_IDNAME.csv")

    if (type(ATC_level)!=str):
        ATC_level = str(ATC_level)

    ATC_col = "ATC_" + ATC_level + "_name"
    if(ATC_col not in df.columns):
        df = assignATC(df, ATC_level=ATC_level)

    if (bool(secondary)):
        df_grouped = df.groupby([ATC_col, secondary]).count()
    else:
        df_grouped = df.groupby(ATC_col).count()

    df_grouped = df_grouped.take(
        [0], axis=1).rename(columns={df_grouped.columns[0]: "count"})

    if (bool(minimum_count)):
        df_grouped = df_grouped[df_grouped['count'] > minimum_count]

    return df_grouped

In [13]:
# GET DATA BY ATC GROUP 

# extract information for a specific ATC grouping at a specific ATC level from your dataframe 
def getdataforATCgroup(df, ATCgroup, ATC_level="3"):
    if (type(ATC_level)!=str):
        ATC_level = str(ATC_level)
    
    ATC_col = "ATC_" + ATC_level + "_name"
    indicesToKeep = []

    if (df.index.name == 'drug_concept_name'):
        for drugName, row in df.iterrows():
            drugID = aeolus_IDNAME.query('drug_concept_name==@drugName')[
                'drug_concept_id'].values
            ATC_val = ATC_mapping.query('drug_concept_id==@drugID')[
                ATC_col].values[0]
            if (ATC_val == ATCgroup):
                indicesToKeep = np.append(indicesToKeep, drugName)
    else:
        for idx, row in df.iterrows():
            drugName = row.drug_concept_name
            drugID = aeolus_IDNAME.query('drug_concept_name==@drugName')[
                'drug_concept_id'].values
            ATC_val = ATC_mapping.query('drug_concept_id==@drugID')[
                ATC_col].values[0]
            if (ATC_val == ATCgroup):
                indicesToKeep = np.append(indicesToKeep, idx)

    return df.loc[indicesToKeep, :]