# Downloading all of the MMS data

We can download all of the MMS data from ICD11 API by performing a breadth first search and download since the ICD11 MMS codes are organized in a tree structure

In [6]:
import icd11
import time
import pickle
import os

from collections import deque

def save_to_pickle(value, file_name):
    pickle.dump(value,open('mms_data/'+file_name+'.pkl','wb'))
    
child_nodes = deque()
root =  icd11.release_data("")
save_to_pickle(root,'root')
initial_nodes = [node.split('/')[-1] for node in root['child']]
number_of_nodes = 1
start_time = time.time()
saved_nodes = set(os.listdir('mms_data'))
for child_node in initial_nodes:
    child_nodes.append(child_node)
while child_nodes:
    print(number_of_nodes,end='\r')
    current_id = child_nodes.popleft()
    current_node = None
    try:
        if current_id + '.pkl' in saved_nodes:
            current_node = pickle.load(open('mms_data/'+current_id+'.pkl','rb'))
        else:
            current_node = icd11.release_data(current_id)
        if not 'child' in current_node.keys():
            current_node['child'] = []
        current_child_nodes = list()
        for child_id in current_node['child']:
            chunked_id = child_id.split('/')
            if 'other' in child_id or 'unspecified' in child_id:
                current_child_nodes.append(chunked_id[-2]+'/'+chunked_id[-1])
            else:
                current_child_nodes.append(chunked_id[-1])
        for node in current_child_nodes:
            child_nodes.append(node)
            number_of_nodes += 1
        save_to_pickle(current_node, current_id.replace('/','_'))
    except Exception as e:
        print(e)
        child_nodes.appendleft(current_id)
        icd11.update_token()
print("Number of nodes processed:", number_of_nodes)
print("Time elapsed in seconds:",  time.time() - start_time)

{'@context': 'http://id.who.int/icd/contexts/contextForTopLevel.json', '@id': 'http://id.who.int/icd/release/11/2019-04/mms', 'title': {'@language': 'en', '@value': 'International Classification of Diseases 11th Revision - Mortality and Morbidity Statistics'}, 'releaseId': '2019-04', 'releaseDate': '2019-04-06', 'child': ['http://id.who.int/icd/release/11/2019-04/mms/1435254666', 'http://id.who.int/icd/release/11/2019-04/mms/1630407678', 'http://id.who.int/icd/release/11/2019-04/mms/1766440644', 'http://id.who.int/icd/release/11/2019-04/mms/1954798891', 'http://id.who.int/icd/release/11/2019-04/mms/21500692', 'http://id.who.int/icd/release/11/2019-04/mms/334423054', 'http://id.who.int/icd/release/11/2019-04/mms/274880002', 'http://id.who.int/icd/release/11/2019-04/mms/1296093776', 'http://id.who.int/icd/release/11/2019-04/mms/868865918', 'http://id.who.int/icd/release/11/2019-04/mms/1218729044', 'http://id.who.int/icd/release/11/2019-04/mms/426429380', 'http://id.who.int/icd/release/11

Once we have downloaded all the files we can join them all into one file

In [1]:
import os
import pickle
import json


node_paths = os.listdir('mms_data')
node_paths = ['mms_data/' + node_path for node_path in node_paths]
nodes = list()
for node_path in node_paths:
    node = pickle.load(open(node_path,'rb'))
    nodes.append(node)
json.dump(nodes,open('icd11_mms.json','w'))