In [None]:
import networkx as nx
from collections import defaultdict
from google.cloud import translate
from joblib import Parallel, delayed

In [None]:
%%bash
export GOOGLE_APPLICATION_CREDENTIALS="/Users/michalkukielka/Downloads/My First Project-6d2d0241b71d.json"

In [None]:
def get_icd10_codes():
    icd10 = defaultdict()
    with open('../results/icd10cm_codes_2018.txt', 'r+') as f:
        for line in f.readlines():
            line = line.split()
            icd10[line[0].lower()] = ' '.join(line[1:]).lower()
        return icd10

def retrieve_titles_for_subgraph(graph, icd10, min_nodes=3):
    subgraphs = list(nx.connected_component_subgraphs(graph))
    results = defaultdict(list)
    for subgraph in subgraphs:
        nodes = subgraph.nodes()
        if len(nodes) >= min_nodes:
            results[', '.join(nodes.keys())] = [icd10[node] for node in nodes]
    return results

In [None]:
icd10 = get_icd10_codes()

Load graph

In [None]:
# graph = nx.read_edgelist('glove_analysis/150/graph_glove_1000')
graph = nx.read_edgelist('doc2vec_analysis/150/graph_1000')

In [None]:
described_relations = retrieve_titles_for_subgraph(graph, icd10)

Retrieve translations

In [None]:
def get_translation_of_title(title, translate_client):
    try:
        translation = translate_client.translate(title, target_language='pl')
        return translation['translatedText']
    except:
        return None

In [None]:
def get_translation_of_titles(keys, titles, translate_client):
    polish_titles = []
    for index, title in enumerate(titles):
        translation = get_translation_of_title(title, translate_client)
        if translation:
            polish_titles.append(translation)
        else:
            polish_titles.append(title)
            print('Could not have retrieved translation for {}: {}'.format(keys.split(', ')[index], title))
    return (keys, polish_titles)

In [None]:
translate_client = translate.Client()
polish_titles = Parallel(n_jobs=-1, backend='threading', verbose=50)(
    delayed(get_translation_of_titles)(keys, titles, translate_client) for keys, titles in described_relations.items())

Saving translated relations

In [None]:
# with open('glove_analysis/150/translated_relations_glove_1000', 'w+') as output_file:  
with open('doc2vec_analysis/150/translated_relations_doc2vec_1000', 'w+') as output_file:  
    for keys, titles in polish_titles:
        output_file.write('{}:\n'.format(keys))
        output_file.write('{}\n'.format('\n'.join(titles)))
        output_file.write('\n')