#Translation Guide Generator

In case you want to experiment with my main post-processing step without having to scroll through my mess of a source code document, here's a tool to help you use it here!

**Scroll to the bottom to get started on generation!**

In [39]:
import nltk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')

def get_parts_of_speech_nltk(sentence):
  words = nltk.word_tokenize(sentence)
  pos_tags = nltk.pos_tag(words)
  return pos_tags


def format_pos(pos_list):
  #Creating an empty dictionary with keys for parts of speech
  my_dict = {
      "subjects": [],
      "objects": [],
      "verbs": [],
      "adjectives": [],
      "adverbs": [],
      "others": []
  }

  #Declaring lists for the nltk tags
  nouns = ["NN", "NNS", "NNP", "NNPS"]
  verbs = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
  adjectives = ["JJ", "JJR", "JJS"]
  adverbs = ["RB", "RBR", "RBS"]
  pronouns = ["PRP", "PRP$", "WP", "WP$"]
  prepositions = ["IN"]

  #Now we iterate through and add to the dictionary!
  for item in pos_list:
    if item[1] in nouns:
      if my_dict["subjects"] == []:
        my_dict["subjects"].append(item[0])
      else:
        my_dict["objects"].append(item[0])
    elif item[1] in verbs:
      my_dict["verbs"].append(item[0])
    elif item[1] in adjectives:
      my_dict["adjectives"].append(item[0])
    elif item[1] in adverbs:
      my_dict["adverbs"].append(item[0])
    else:
      my_dict["others"].append(item[0])

  return my_dict

def build_translation_guide(english_text, pos_dict):
  subjects = pos_dict["subjects"][0]
  objects = ""
  verbs = ""
  adjectives = ""
  adverbs = ""
  others = ""

  for obj in pos_dict["objects"]:
    objects += obj + ", "
  for verb in pos_dict["verbs"]:
    verbs += verb + ", "
  for adj in pos_dict["adjectives"]:
    adjectives += adj + ", "
  for adv in pos_dict["adverbs"]:
    adverbs += adv + ", "
  for other in pos_dict["others"]:
    others += other + ", "


  print("--------------------------------------")
  print("English text: " + english_text)
  print("Subjects: " + subjects)
  print("Objects: " + objects)
  print("Verbs: " + verbs)
  print("Adjectives: " + adjectives)
  print("Adverbs: " + adverbs)
  print("Other words: " + others)
  print("--------------------------------------")

def format_translation(english_text):
  pos_dict = format_pos(get_parts_of_speech_nltk(english_text))
  build_translation_guide(english_text, pos_dict)

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


#Down here!
Replace the placeholder string with an english sentence to get a translation guide! If you want, you can pull text you got from a MT model to replicate the full effect!

In [40]:
your_text = "Rhys after midnight AU went to"

format_translation(your_text)

--------------------------------------
English text: Rhys after midnight AU went to
Subjects: Rhys
Objects: midnight, AU, 
Verbs: went, 
Adjectives: 
Adverbs: 
Other words: after, to, 
--------------------------------------


It's far from perfect. It assumes the first noun is the subject (as it should be in Japanese sentence structure) and tends to confude adjectives and verbs, as it is looking for context to confirm itself.

# Experimental: Example Sentence Generator

This function takes the text and comes up with some example sentences!

In [41]:
def example_gen(text):
  print("Input text: " + text)

  pos_dict = format_pos(get_parts_of_speech_nltk(text))

  subjects = pos_dict["subjects"][0]
  objects = ""
  verbs = ""
  adjectives = ""
  adverbs = ""
  others = ""

  for obj in pos_dict["objects"]:
    objects += obj + ", "
  for verb in pos_dict["verbs"]:
    verbs += verb + ", "
  for adj in pos_dict["adjectives"]:
    adjectives += adj + ", "
  for adv in pos_dict["adverbs"]:
    adverbs += adv + ", "
  for other in pos_dict["others"]:
    others += other + ", "

  print("Example sentences:")
  for obj in pos_dict["objects"]:
    for verb in pos_dict["verbs"]:
      print(subjects + " " + verb + " to " + obj + ".")
      if (others != ""):
        print(subjects + " " + verb + " to " + pos_dict["others"][0] + " " + obj + ".")
        print(subjects + " " + verb + " to " + pos_dict["others"][0] + " " + obj + ".")
      if (adjectives != ""):
        print(subjects + " " + verb + " to " + pos_dict["adjectives"][0] + " " + obj + ".")

example_gen(your_text)

Input text: Rhys after midnight AU went to
Example sentences:
Rhys went to midnight.
Rhys went to after midnight.
Rhys went to after midnight.
Rhys went to AU.
Rhys went to after AU.
Rhys went to after AU.
