If you are developing a tool that will require ICD-11 codes and the structure as shown in the ICD-11 Release then in the API you need to use the linearization endpoints with the linearizationname set to mms.

In [None]:
import requests
import json
import time
import os
import collections

# ========== CONFIGURATION ==========
CLIENT_ID = "6afc7b11-86c2-40f5-82d7-39c6c5869dec_38c25903-b150-4472-80cf-58648427fd58"
CLIENT_SECRET = "4kfkQUPOf4fBozRuZIMYsfff4n22mxUg/7dz85YXMqM="
TOKEN_ENDPOINT = 'https://icdaccessmanagement.who.int/connect/token'
ROOT_ENTITY_URI = 'https://id.who.int/icd/release/11/2023-01/mms'
MMS_BASE_URI = 'https://id.who.int/icd/release/11/2023-01/mms/'
OUTPUT_DIR = 'icd11_mms_crawled_entities'
SLEEP_TIME = 0.3  # To avoid rate limiting
MAX_ENTITIES = 1000  # Limit for fast test run
# ===================================

def get_access_token():
    payload = {
        'client_id': CLIENT_ID,
        'client_secret': CLIENT_SECRET,
        'scope': 'icdapi_access',
        'grant_type': 'client_credentials'
    }
    response = requests.post(TOKEN_ENDPOINT, data=payload)
    response.raise_for_status()
    token_data = response.json()
 
    return token_data['access_token']

def fetch_entity(entity_id, headers, visited, output_dir):
    if entity_id in visited:
        return
    visited.add(entity_id)

    uri = f'{MMS_BASE_URI}{entity_id}'
    response = requests.get(uri, headers=headers)
    if response.status_code != 200:
        print(f"⚠️ Failed to fetch {uri}")
        return

    data = response.json()
   

    with open(os.path.join(output_dir, f"{entity_id}.json"), 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    # Recurse into children (limit to 1 child per node for quick test)
    child_uris = data.get('child', [])[:1]
    for child_uri in child_uris:
        child_id = child_uri.split("/")[-1]
        time.sleep(SLEEP_TIME)
        fetch_entity(child_id, headers, visited, output_dir)

def crawl_icd11():
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    print("🔐 Getting access token...")
    token = get_access_token()

    headers = {
        'Authorization': f'Bearer {token}',
        'Accept': 'application/json',
        'Accept-Language': 'en',
        'API-Version': 'v2'
    }

    
    response = requests.get(ROOT_ENTITY_URI, headers=headers)
    root_data = response.json()
    root_children = root_data.get('child', [])

    root_ids = [uri.split('/')[-1] for uri in root_children[:MAX_ENTITIES]]
    with open("MMS_ROOTS.json", 'w') as f:
        json.dump(root_ids, f)

    visited = set()
    for entity_id in root_ids:
        fetch_entity(entity_id, headers, visited, OUTPUT_DIR)

    print(f"✅ Done! MMS linearized entities saved to '{OUTPUT_DIR}'")
crawl_icd11()

🔐 Getting access token...
✅ Done! MMS linearized entities saved to 'icd11_mms_crawled_entities'


In [2]:
import os
import json
import pandas as pd

def extract_icd_text_data(json_folder):
    records = []

    for filename in os.listdir(json_folder):
        if not filename.endswith(".json"):
            continue
        filepath = os.path.join(json_folder, filename)
        with open(filepath, "r", encoding="utf-8") as f:
            data = json.load(f)
        
        code = data.get("code", "")
        id_ = data.get("@id", "").split("/")[-1]
        title = data.get("title", {}).get("@value", "")
        definition = data.get("definition", {}).get("@value", "")
        
        # Collect synonyms if available
        synonyms = []
        for s in data.get("synonym", []):
            label = s.get("label", {}).get("@value")
            if label:
                synonyms.append(label)
        
        full_text = f"{title}. {definition} {' '.join(synonyms)}"

        records.append({
            "id": id_,
            "code": code,
            "title": title,
            "definition": definition,
            "synonyms": "; ".join(synonyms),
            "full_text": full_text
        })

    return pd.DataFrame(records)

# Usage
df = extract_icd_text_data("icd11_mms_crawled_entities")
#df.to_csv("icd11_text_data.csv", index=False)


In [3]:
df

Unnamed: 0,id,code,title,definition,synonyms,full_text
0,1028330801,3A00.01,Chronic posthaemorrhagic anaemia,Chronic iron-deficiency anaemia from bleeding ...,,Chronic posthaemorrhagic anaemia. Chronic iron...
1,135352227,,Bacterial intestinal infections,"Any condition of the intestines, caused by an ...",,Bacterial intestinal infections. Any condition...
2,1435254666,01,Certain infectious or parasitic diseases,This chapter includes certain conditions cause...,,Certain infectious or parasitic diseases. This...
3,1459690929,4A00.00,Neutrophil immunodeficiency syndrome,Neutrophil immunodeficiency syndrome is a prim...,,Neutrophil immunodeficiency syndrome. Neutroph...
4,1577750667,3A00,Iron deficiency anaemia,"A disease caused by chronic or acute bleeding,...",,Iron deficiency anaemia. A disease caused by c...
5,1630407678,02,Neoplasms,An abnormal or uncontrolled cellular prolifera...,,Neoplasms. An abnormal or uncontrolled cellula...
6,1719389232,2A00,Primary neoplasms of brain,,,Primary neoplasms of brain.
7,1722092627,5A00,Hypothyroidism,,,Hypothyroidism.
8,1766440644,03,Diseases of the blood or blood-forming organs,This chapter includes diseases of the blood as...,,Diseases of the blood or blood-forming organs....
9,1907280211,,Disorders of the thyroid gland or thyroid horm...,Disorders due to dysfunction of thyroid gland ...,,Disorders of the thyroid gland or thyroid horm...
