In [1]:
import os
import re
from nltk.tokenize import word_tokenize
import nltk
import string
import subprocess

In [51]:
regnumber = re.compile(r'\d+(?:,\d*)?')

def fix(arr):
    input = '\n'.join(arr).encode("ascii")
    procStr = ["onmt_translate", "-model", "final_model.pt", "-src", "/dev/stdin", "-output", "/tmp/translation.txt"]
    proc = subprocess.Popen(procStr, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    proc.communicate(input=input)
    f = open("/tmp/translation.txt", "r")
    out = f.read()
    f.close()
    return [x.replace(' ','').replace('##',' ') for x in out.split('\n')]

def build_structure(input, acc):
    if len(input) == 0:
        return [("str", acc)]
    elif input[0] in (list(string.punctuation.replace("'","")) + ["''","``"]):
        return [("str",acc)] + [("punct",input[0]+' ')] + build_structure(input[1:], '')
    else:
        return build_structure(input[1:], acc + input[0])

def fix_text(text):
    tokens = word_tokenize(text)
    structure = [(type,str) for (type,str) in build_structure(tokens,'') if str]
    str_sequences = [(index,' '.join(str)) for (index,(type,str)) in enumerate(structure) if type == "str"]
    translated_arr = fix([seq for _, seq in str_sequences])
    translated_sequences = [(index,translated) for (translated,(index,original)) in zip(translated_arr,str_sequences)]
    not_str_sequences = [(index,str) for (index,(type,str)) in enumerate(structure) if type != "str"]
    translated_structure = sorted(translated_sequences + not_str_sequences, key=lambda x: x[0])
    return ''.join([str for _,str in translated_structure])
    

In [7]:
print(fix_text('lev eragingex periencefr omallagen ts')) #leveraging experience from all agents
print(fix_text('ce ntralcon trola re un reali stici n')) #central control are unrealistic in
print(fix_text('wh ichinc omi ngfl owst osch edu le')) #which incoming flows to schedule
print(fix_text('deeple arn ing is beau ti ful')) #deep learning is beautiful
print(fix_text('coor dinationdec isionsindivid ually inpa rall el')) #coordination decisions individually in parallel

leveraging experience from all agents
central control are unrealistic in
which incoming flows to schedule
deep learning is beautiful
coordination decisions individually in parallel


In [9]:
#fix_block(' '.join(['a', 'c', 'c', 'e', 'l', 'e', 'r', 'a', 't', 'e', 'd', 'e', 'r', 'o', 's', 'i', 'o', 'n', 'i', 's', 'm', 'o', 'r', 'e', 'r', 'a', 'p', 'i', 'd', 't', 'h', 'a', 'n', 'n', 'o', 'r', 'm', 'a', 'l', 'e', 'r', 'o', 's', 'i', 'o', 'n', 'a', 'n', 'd', 'r', 'e', 's', 'u', 'l', 't', 's', 'p', 'r', 'i', 'm', 'a', 'r', 'i', 'l', 'y', 'f', 'r', 'o', 'm', 'm', 'a', 'n', "'", 's', 'a', 'c', 't', 'i', 'v', 'i', 't', 'i', 'e', 's']))
fix_text("""
As mobile edge computing emerges as a paradigm to meet the ever-increasing computation demands from real-time Internet of Things applications in era, the development trends of which are mainly divided into two, with one being MEC with advanced computing architectures, and the other being MEC with high efficiency for sustainable operations. We are committed to taking advantage of these two trends to explore a novel multi-tier edge computing scenario with hierarchical task offloading and green energy provisioning via leveraging the energy harvesting technique.
""")

'As mobile edge computing emerges as a paradigm to meet the ever-increasing computation demands from real-time Internet of Things applications in era, the development trends of which a remainly divided into two, with one being MEC with advanced computing architectures, and the other being MEC with high efficiency for sustainable operations. We are committed to taking advantage of these two trends to explore a novel multi-tier edge computing scenario with hierarchical task of floading and green energy provisioning via leveraging the energy harvesting technique. '

In [11]:
fix_text("""
Tony O'Connor is a former international rower from Ireland. He is a double Olympian who represented Ireland and won five medals at the World Championships as part of a lightweight pair.
""")

"Tony O'Connor is a former international rower from Ireland. He is a double Olympian who represented Ireland and won five medals at the World Championships as part of a light weight pair. "

In [50]:
fix_text("""
Information retrieval did not begin with the Web. In response to various challenges of providing information access, the field of IR evolved to give principled approaches to searching various forms of content. The field be- gan with scientific publications and library records but soon spread to other forms of content, particularly those of information professionals, such as journalists, lawyers, and doctors. Much of the scientific research on IR has occurred in these contexts, and much of the continued practice of IR deals with providing access to unstructured information in various corporate and governmental domains, and this work forms much of the foundation of our book.""")

'Information retrieval did not begin with the Web. In response to various challenges of providing information access, the field of I Revolved to give principled approaches to searching various forms of content. The field be-gan with scientific publications and library records but soon spread to other forms of content, particularly those of information professionals, such as journalists, lawyers, and doctors. Much of the scientific research on IR has occurred in these contexts, and much of the continued practice of IR deals with providing access to unstructured information in various corporate and governmental domains, and this workforms much of the foundation of our book. '

In [37]:
fix_text("""
In 1624, a Franciscan Mission established a reduction for the indigenous tribes of the area named Santo Domingo Soriano. In spite of interruptions in its existence, it is considered the earliest populated centre of the actual Uruguay. Eventually, in its place Villa Soriano was founded.[citation needed]
The first division of Uruguay in departments happened on 27 January 1816. At the time, eight departments were formed, with Soriano being one of them. When the first constitution was signed in 1830, Soriano Department was one of the nine original departments of the Republic.
""")

'In 1624, a Franciscan Mission established a reduction for the indigenous tribes of the area named Santo Domingo Soriano. Inspite of interruptions in its existence, it is considered the earliest populated centre of the actual Uruguay. Eventually, in its place Villa Soria now as founded. [ citation needed] The first division of Uruguay in departments happened on 27 January 1816. At the time, eight departments were formed, with Soriano being one of them. When the first constitution was signed in 1830, Soriano Department was one of the nine original departments of the Republic. '