In [1]:
import networkx as nx
from collections import defaultdict
from google.cloud import translate
from joblib import Parallel, delayed

In [2]:
%%bash
export GOOGLE_APPLICATION_CREDENTIALS="/Users/michalkukielka/Downloads/My First Project-6d2d0241b71d.json"

In [3]:
def get_icd10_codes():
    icd10 = defaultdict()
    with open('../results/icd10cm_codes_2018.txt', 'r+') as f:
        for line in f.readlines():
            line = line.split()
            icd10[line[0].lower()] = ' '.join(line[1:]).lower()
        return icd10

def retrieve_titles_for_subgraph(graph, icd10, min_nodes=3):
    subgraphs = list(nx.connected_component_subgraphs(graph))
    results = defaultdict(list)
    for subgraph in subgraphs:
        nodes = subgraph.nodes()
        if len(nodes) >= min_nodes:
            results[', '.join(nodes.keys())] = [icd10[node] for node in nodes]
    return results

In [4]:
icd10 = get_icd10_codes()

Load graph

In [11]:
# graph = nx.read_edgelist('glove_analysis/150/graph_glove_1000')
graph = nx.read_edgelist('doc2vec_analysis/dm/50/graph_1000')

In [12]:
described_relations = retrieve_titles_for_subgraph(graph, icd10)

Retrieve translations

In [13]:
def get_translation_of_title(title, translate_client):
    try:
        translation = translate_client.translate(title, target_language='pl')
        return translation['translatedText']
    except:
        return None

In [14]:
def get_translation_of_titles(keys, titles, translate_client):
    polish_titles = []
    for index, title in enumerate(titles):
        translation = get_translation_of_title(title, translate_client)
        if translation:
            polish_titles.append(translation)
        else:
            polish_titles.append(title)
            print('Could not have retrieved translation for {}: {}'.format(keys.split(', ')[index], title))
    return (keys, polish_titles)

In [9]:
translate_client = translate.Client()
polish_titles = Parallel(n_jobs=-1, backend='threading', verbose=50)(
    delayed(get_translation_of_titles)(keys, titles, translate_client) for keys, titles in described_relations.items())

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapse

[Parallel(n_jobs=-1)]: Done 132 tasks      | elapsed:    8.9s
[Parallel(n_jobs=-1)]: Done 133 tasks      | elapsed:    9.0s
[Parallel(n_jobs=-1)]: Done 134 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 135 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:    9.3s
[Parallel(n_jobs=-1)]: Done 139 tasks      | elapsed:    9.4s
[Parallel(n_jobs=-1)]: Done 140 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done 143 tasks      | elapsed:    9.8s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:   10.0s
[Parallel(n_jobs=-1)]: Done 145 tasks      | elapsed:   10.0s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   10.0s
[Parallel(n_jobs=-1)]: Done 147 tasks      | elapsed:   10.1s
[Paralle

Saving translated relations

In [10]:
# with open('glove_analysis/150/translated_relations_glove_1000', 'w+') as output_file:  
with open('doc2vec_analysis/dm/50/translated_relations_doc2vec_1000', 'w+') as output_file:  
    for keys, titles in polish_titles:
        output_file.write('{}:\n'.format(keys))
        output_file.write('{}\n'.format('\n'.join(titles)))
        output_file.write('\n')