In [6]:
from stanza.models.common.constant import is_right_to_left
import stanza
import spacy
from spacy import displacy
from spacy.tokens import Doc


def visualize_doc(doc, pipeline):
    """
    Takes in a Document and visualizes it using displacy. The document must be from the stanza pipeline.
    Works for English inputs. The reverse_order parameter can be set as True to flip the display of the
    words for languages such as Arabic, which are read from right-to-left.
    """
    visualization_options = {"compact": True, "bg": "#09a3d5", "color": "white", "distance": 100,
                             "font": "Source Sans Pro", "offset_x": 30, "arrow_spacing": 20}
    nlp = spacy.load("en_core_web_sm")   # Must install the latest version of spaCy's en_core_web_sm before running
    sentences_to_visualize = []
    for sentence in doc.sentences:
        words, lemmas, heads, deps, tags = [], [], [], [], []
        if is_right_to_left(pipeline):  # order of words displayed is reversed, dependency arcs remain intact
            sent_len = len(sentence.words)
            for word in reversed(sentence.words):
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:  # spaCy head indexes are formatted differently than that of Stanza
                    heads.append(sent_len - word.id)
                else:
                    heads.append(sent_len - word.head)
        else:   # standard left to right rendering
            for word in sentence.words:
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:
                    heads.append(word.id - 1)
                else:
                    heads.append(word.head - 1)
        document_result = Doc(nlp.vocab, words=words, lemmas=lemmas, heads=heads, deps=deps, pos=tags)
        sentences_to_visualize.append(document_result)

        for line in sentences_to_visualize:  # render all sentences through displaCy
            displacy.render(line, style="dep", options=visualization_options)


def visualize_str(text, pipeline):
    """
    Takes a string and visualizes it using displacy. The string is processed using the stanza pipeline and
    its dependencies are formatted into a spaCy doc object for easy visualization. Accepts valid stanza (UD)
    pipelines as the pipeline argument.
    """
    pipe = stanza.Pipeline(pipeline)
    doc = pipe(text)
    visualize_doc(doc, pipeline)


def main():
#     print("PRINTING ARABIC DOCUMENTS")
#     # example sentences in right to left language
#     visualize_str('برلين ترفض حصول شركة اميركية على رخصة تصنيع دبابة "ليوبارد" الالمانية', "ar")
# #     visualize_str("هل بإمكاني مساعدتك؟", "ar")
# #     visualize_str("أراك في مابعد", "ar")
# #     visualize_str("لحظة من فضلك", "ar")
# #     # example sentences in left to right language
# #     visualize_str("This is a sentence.", "en")
#     visualize_str("我在考虑是否去西班牙旅行。", "zh")
#     visualize_str('''.مرحبا اسمي أليكس. أنا منالولايات المتحدة الأمريكية''', "ar")
    visualize_str("قفز الثعلب البني السريع فوق الكلب الكسول الذي كان اسمه ألبرت.", "ar")
    visualize_str("توقعت صحيفة نيويورك تايمز فوز دونالد ترامب في الانتخابات الرئاسية الأمريكية لعام 2016 ضد هيلاري كلينتون.", "ar")


main()


2022-07-05 15:19:57 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-07-05 15:19:59 INFO: Loading these models for language: ar (Arabic):
| Processor | Package |
-----------------------
| tokenize  | padt    |
| mwt       | padt    |
| pos       | padt    |
| lemma     | padt    |
| depparse  | padt    |
| ner       | aqmar   |

2022-07-05 15:19:59 INFO: Use device: cpu
2022-07-05 15:19:59 INFO: Loading: tokenize
2022-07-05 15:19:59 INFO: Loading: mwt
2022-07-05 15:19:59 INFO: Loading: pos
2022-07-05 15:20:00 INFO: Loading: lemma
2022-07-05 15:20:00 INFO: Loading: depparse
2022-07-05 15:20:00 INFO: Loading: ner
2022-07-05 15:20:02 INFO: Done loading processors!


2022-07-05 15:20:04 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-07-05 15:20:06 INFO: Loading these models for language: ar (Arabic):
| Processor | Package |
-----------------------
| tokenize  | padt    |
| mwt       | padt    |
| pos       | padt    |
| lemma     | padt    |
| depparse  | padt    |
| ner       | aqmar   |

2022-07-05 15:20:06 INFO: Use device: cpu
2022-07-05 15:20:06 INFO: Loading: tokenize
2022-07-05 15:20:06 INFO: Loading: mwt
2022-07-05 15:20:06 INFO: Loading: pos
2022-07-05 15:20:07 INFO: Loading: lemma
2022-07-05 15:20:07 INFO: Loading: depparse
2022-07-05 15:20:07 INFO: Loading: ner
2022-07-05 15:20:09 INFO: Done loading processors!
