In [1]:
import networkx as nx
from collections import defaultdict
from google.cloud import translate
from joblib import Parallel, delayed

In [2]:
def get_icd10_codes():
    icd10 = defaultdict()
    with open('../results/icd10cm_codes_2018.txt', 'r+') as f:
        for line in f.readlines():
            line = line.split()
            icd10[line[0].lower()] = ' '.join(line[1:]).lower()
        return icd10

def retrieve_titles_for_subgraph(graph, icd10, min_nodes=3):
    subgraphs = list(nx.connected_component_subgraphs(graph))
    results = defaultdict(list)
    for subgraph in subgraphs:
        nodes = subgraph.nodes()
        if len(nodes) >= min_nodes:
            results[', '.join(nodes.keys())] = [icd10[node] for node in nodes]
    return results

In [3]:
icd10 = get_icd10_codes()

Load graph

In [20]:
# graph = nx.read_edgelist('glove_analysis/graph_glove_1000')
graph = nx.read_edgelist('doc2vec_analysis/graph1000')

In [21]:
described_relations = retrieve_titles_for_subgraph(graph, icd10)

Retrieve translations

In [15]:
def get_translation_of_title(title, translate_client):
    try:
        translation = translate_client.translate(title, target_language='pl')
        return translation['translatedText']
    except:
        return None

In [16]:
def get_translation_of_titles(keys, titles, translate_client):
    polish_titles = []
    for index, title in enumerate(titles):
        translation = get_translation_of_title(title, translate_client)
        if translation:
            polish_titles.append(translation)
        else:
            polish_titles.append(title)
            print('Could not have retrieved translation for {}: {}'.format(keys.split(', ')[index], title))
    return (keys, polish_titles)

In [17]:
translate_client = translate.Client()
polish_titles = Parallel(n_jobs=-1, backend='threading', verbose=50)(
    delayed(get_translation_of_titles)(keys, titles, translate_client) for keys, titles in described_relations.items())

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done   2 out of   6 | elapsed:    1.0s remaining:    1.9s
[Parallel(n_jobs=-1)]: Done   3 out of   6 | elapsed:    1.0s remaining:    1.0s
[Parallel(n_jobs=-1)]: Done   4 out of   6 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    1.4s finished


In [18]:
polish_titles

[('k279, p139, d135',
  ['wrzód trawienny, miejsce nieokreślone, nieokreślone jako ostre lub przewlekłe, bez krwotoku lub perforacji',
   'uraz urodzenia szkieletu, nieokreślony',
   'łagodny nowotwór zewnątrzwątrobowych przewodów żółciowych']),
 ('f88, c721, a302',
  ['inne zaburzenia rozwoju psychologicznego',
   'nowotwór złośliwy ogona końskiego',
   'graniczny trąd gruźliczy']),
 ('j64, p132, q143',
  ['nieokreślona pylica płuc',
   'uraz urodzenia kości udowej',
   'wrodzone wady rozwojowe naczyniówki']),
 ('p0229, g113, m270',
  ['noworodka dotkniętego innymi morfologicznymi i funkcjonalnymi nieprawidłowościami łożyska',
   'ataksja móżdżkowa z wadliwą naprawą DNA',
   'zaburzenia rozwojowe szczęk']),
 ('c163, c52, c494',
  ['złośliwy nowotwór odźwiernika',
   'nowotwór złośliwy pochwy',
   'nowotwór złośliwy tkanki łącznej i miękkiej brzucha']),
 ('m5114, g619, m179',
  ['zaburzenia krążka międzykręgowego z radikulopatią, region piersiowy',
   'polineuropatia zapalna, nieokreśl

Saving translated relations

In [19]:
with open('glove_analysis/translated_relations_glove_1000', 'w+') as output_file:  
    for keys, titles in polish_titles:
        output_file.write('{}:\n'.format(keys))
        output_file.write('{}\n'.format('\n'.join(titles)))
        output_file.write('\n')