In [1]:
import networkx as nx
from collections import defaultdict
from google.cloud import translate
from joblib import Parallel, delayed

In [2]:
def get_icd10_codes():
    icd10 = defaultdict()
    with open('../results/icd10cm_codes_2018.txt', 'r+') as f:
        for line in f.readlines():
            line = line.split()
            icd10[line[0].lower()] = ' '.join(line[1:]).lower()
        return icd10

def retrieve_titles_for_subgraph(graph, icd10, min_nodes=3):
    subgraphs = list(nx.connected_component_subgraphs(graph))
    results = defaultdict(list)
    for subgraph in subgraphs:
        nodes = subgraph.nodes()
        if len(nodes) >= min_nodes:
            results[', '.join(nodes.keys())] = [icd10[node] for node in nodes]
    return results

In [3]:
icd10 = get_icd10_codes()

In [4]:
graph = nx.read_edgelist('doc2vec_analysis/graph1000')

In [5]:
described_relations = retrieve_titles_for_subgraph(graph, icd10)

In [12]:
def get_translation_of_title(title, translate_client):
    try:
        translation = translate_client.translate(title, target_language='pl')
        return translation['translatedText']
    except:
        return None

In [13]:
def get_translation_of_titles(keys, titles):
    polish_titles = []
    for index, title in enumerate(titles):
        translation = get_translation_of_title(title, translate_client)
        if translation:
            polish_titles.append(translation)
        else:
            polish_titles.append(title)
            print('Could not have retrieved translation for {}: {}'.format(keys.split(', ')[index], title))
    return (keys, polish_titles)

In [None]:
polish_titles = Parallel(n_jobs=-1, backend='threading', verbose=50)(
    delayed(get_translation_of_titles)(keys, titles) for keys, titles in described_relations.items())

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapse