In [3]:
from stanza.models.common.constant import is_right_to_left
import stanza
import spacy
from spacy import displacy
from spacy.tokens import Doc


def visualize_doc(doc, pipeline):
    """
    Takes in a Document and visualizes it using displacy. The document must be from the stanza pipeline.
    Works for English inputs. The reverse_order parameter can be set as True to flip the display of the
    words for languages such as Arabic, which are read from right-to-left.
    """
    visualization_options = {"compact": True, "bg": "#09a3d5", "color": "white", "distance": 90,
                             "font": "Source Sans Pro", "offset_x": 30, "arrow_spacing": 20}
    nlp = spacy.blank("en")   # Must install the latest version of spaCy's en_core_web_sm before running
    sentences_to_visualize = []
    for sentence in doc.sentences:
        words, lemmas, heads, deps, tags = [], [], [], [], []
        if is_right_to_left(pipeline):  # order of words displayed is reversed, dependency arcs remain intact
            sent_len = len(sentence.words)
            for word in reversed(sentence.words):
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:  # spaCy head indexes are formatted differently than that of Stanza
                    heads.append(sent_len - word.id)
                else:
                    heads.append(sent_len - word.head)
        else:   # standard left to right rendering
            for word in sentence.words:
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:
                    heads.append(word.id - 1)
                else:
                    heads.append(word.head - 1)
        document_result = Doc(nlp.vocab, words=words, lemmas=lemmas, heads=heads, deps=deps, pos=tags)
        sentences_to_visualize.append(document_result)

    for line in sentences_to_visualize:  # render all sentences through displaCy
        displacy.render(line, style="dep", options=visualization_options)


def visualize_str(text, pipeline):
    """
    Takes a string and visualizes it using displacy. The string is processed using the stanza pipeline and
    its dependencies are formatted into a spaCy doc object for easy visualization. Accepts valid stanza (UD)
    pipelines as the pipeline argument.
    """
    pipe = stanza.Pipeline(pipeline)
    doc = pipe(text)
    visualize_doc(doc, pipeline)


def main():
#     print("PRINTING ARABIC DOCUMENTS")
#     # example sentences in right to left language
#     visualize_str('برلين ترفض حصول شركة اميركية على رخصة تصنيع دبابة "ليوبارد" الالمانية', "ar")
# #     visualize_str("هل بإمكاني مساعدتك؟", "ar")
# #     visualize_str("أراك في مابعد", "ar")
# #     visualize_str("لحظة من فضلك", "ar")
# #     # example sentences in left to right language
# #     visualize_str("This is a sentence.", "en")
#     visualize_str("我在考虑是否去西班牙旅行。", "zh")
#     visualize_str('''.مرحبا اسمي أليكس. أنا منالولايات المتحدة الأمريكية''', "ar")
    visualize_str('I love Balybee. She is the best ever.', 'en')
    visualize_str('我 爱 我的 女朋友。 她 很 漂亮 和 热！', 'zh')
    visualize_str("قفز الثعلب البني السريع فوق الكلب الكسول الذي كان اسمه ألبرت.", "ar")
    visualize_str("توقعت صحيفة نيويورك تايمز فوز دونالد ترامب في الانتخابات الرئاسية الأمريكية لعام 2016 ضد هيلاري كلينتون.", "ar")


main()


2022-07-11 16:18:07 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-07-11 16:18:11 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| depparse     | combined  |
| sentiment    | sstplus   |
| constituency | wsj       |
| ner          | ontonotes |

2022-07-11 16:18:11 INFO: Use device: cpu
2022-07-11 16:18:11 INFO: Loading: tokenize
2022-07-11 16:18:11 INFO: Loading: pos
2022-07-11 16:18:11 INFO: Loading: lemma
2022-07-11 16:18:11 INFO: Loading: depparse
2022-07-11 16:18:12 INFO: Loading: sentiment
2022-07-11 16:18:12 INFO: Loading: constituency
2022-07-11 16:18:14 INFO: Loading: ner
2022-07-11 16:18:15 INFO: Done loading processors!


2022-07-11 16:18:17 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-07-11 16:18:18 INFO: "zh" is an alias for "zh-hans"
2022-07-11 16:18:23 INFO: Loading these models for language: zh-hans (Simplified_Chinese):
| Processor    | Package   |
----------------------------
| tokenize     | gsdsimp   |
| pos          | gsdsimp   |
| lemma        | gsdsimp   |
| depparse     | gsdsimp   |
| sentiment    | ren       |
| constituency | ctb       |
| ner          | ontonotes |

2022-07-11 16:18:23 INFO: Use device: cpu
2022-07-11 16:18:23 INFO: Loading: tokenize
2022-07-11 16:18:23 INFO: Loading: pos
2022-07-11 16:18:24 INFO: Loading: lemma
2022-07-11 16:18:24 INFO: Loading: depparse
2022-07-11 16:18:24 INFO: Loading: sentiment
2022-07-11 16:18:25 INFO: Loading: constituency
2022-07-11 16:18:27 INFO: Loading: ner
2022-07-11 16:18:28 INFO: Done loading processors!


2022-07-11 16:18:29 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-07-11 16:18:32 INFO: Loading these models for language: ar (Arabic):
| Processor | Package |
-----------------------
| tokenize  | padt    |
| mwt       | padt    |
| pos       | padt    |
| lemma     | padt    |
| depparse  | padt    |
| ner       | aqmar   |

2022-07-11 16:18:32 INFO: Use device: cpu
2022-07-11 16:18:32 INFO: Loading: tokenize
2022-07-11 16:18:32 INFO: Loading: mwt
2022-07-11 16:18:32 INFO: Loading: pos
2022-07-11 16:18:33 INFO: Loading: lemma
2022-07-11 16:18:33 INFO: Loading: depparse
2022-07-11 16:18:33 INFO: Loading: ner
2022-07-11 16:18:35 INFO: Done loading processors!


2022-07-11 16:18:36 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-07-11 16:18:38 INFO: Loading these models for language: ar (Arabic):
| Processor | Package |
-----------------------
| tokenize  | padt    |
| mwt       | padt    |
| pos       | padt    |
| lemma     | padt    |
| depparse  | padt    |
| ner       | aqmar   |

2022-07-11 16:18:38 INFO: Use device: cpu
2022-07-11 16:18:38 INFO: Loading: tokenize
2022-07-11 16:18:38 INFO: Loading: mwt
2022-07-11 16:18:38 INFO: Loading: pos
2022-07-11 16:18:38 INFO: Loading: lemma
2022-07-11 16:18:38 INFO: Loading: depparse
2022-07-11 16:18:39 INFO: Loading: ner
2022-07-11 16:18:41 INFO: Done loading processors!


In [6]:
from stanza.models.common.constant import is_right_to_left
import stanza
import spacy
from spacy import displacy
from spacy.tokens import Doc


def visualize_doc(doc, pipeline):
    """
    Takes in a Document and visualizes it using displacy. The document must be from the stanza pipeline.
    Works for English inputs. The reverse_order parameter can be set as True to flip the display of the
    words for languages such as Arabic, which are read from right-to-left.
    """
    visualization_options = {"compact": True, "bg": "#09a3d5", "color": "white", "distance": 80,
                             "font": "Source Sans Pro"}
    # blank model - we don't use any of the model features, just the viz
    nlp = spacy.blank("en")
    # Find the download here: https://spacy.io/models/en
    sentences_to_visualize = []
    for sentence in doc.sentences:
        words, lemmas, heads, deps, tags = [], [], [], [], []
        if is_right_to_left(pipeline):  # order of words displayed is reversed, dependency arcs remain intact
            sent_len = len(sentence.words)
            for word in reversed(sentence.words):
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:  # spaCy head indexes are formatted differently than that of Stanza
                    heads.append(sent_len - word.id)
                else:
                    heads.append(sent_len - word.head)
        else:   # left to right rendering
            for word in sentence.words:
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:
                    heads.append(word.id - 1)
                else:
                    heads.append(word.head - 1)
        document_result = Doc(nlp.vocab, words=words, lemmas=lemmas, heads=heads, deps=deps, pos=tags)
        sentences_to_visualize.append(document_result)

    for line in sentences_to_visualize:  # render all sentences through displaCy
        # If this program is NOT being run in a Jupyter notebook, replace displacy.render with displacy.serve
        # and the visualization will be hosted locally, link being provided in the program output.
        displacy.render(line, style="dep", options=visualization_options)


def visualize_str(text, pipeline_code, pipe):
    """
    Takes a string and visualizes it using displacy. The string is processed using the stanza pipeline and
    its dependencies are formatted into a spaCy doc object for easy visualization. Accepts valid stanza (UD)
    pipelines as the pipeline argument. Must supply the stanza pipeline code (the two-letter abbreviation of the
    language, such as 'en' for English. Must also supply the stanza pipeline object as the third argument.
    """
    doc = pipe(text)
    visualize_doc(doc, pipeline_code)


def main():
    # Load all necessary pipelines
    en_pipe = stanza.Pipeline('en')
#     ar_pipe = stanza.Pipeline('ar')
#     zh_pipe = stanza.Pipeline('zh')
#     print("PRINTING ARABIC DOCUMENTS")
#     # example sentences in right to left language
#     visualize_str('برلين ترفض حصول شركة اميركية على رخصة تصنيع دبابة "ليوبارد" الالمانية', "ar", ar_pipe)
#     visualize_str("هل بإمكاني مساعدتك؟", "ar", ar_pipe)
#     visualize_str("أراك في مابعد", "ar", ar_pipe)
#     visualize_str("لحظة من فضلك", "ar", ar_pipe)
#     # example sentences in left to right language
#     print("PRINTING left to right examples")
#     visualize_str("This is a sentence.", "en", en_pipe)
#     visualize_str("中国是一个很有意思的国家。", "zh", zh_pipe)
    visualize_str("What if Google morphed into GoogleOS? What if Google expanded upon its search-engine (and now e-mail) wares into a full-fledged operating system? [via Microsoft Watch from Mary Jo Foley] (And, by the way, is anybody else just a little nostalgic for the days when that was a good thing?)", 'en', en_pipe)
    return


# if __name__ == '__main__':
main()



2022-07-11 16:44:50 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-07-11 16:44:54 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| depparse     | combined  |
| sentiment    | sstplus   |
| constituency | wsj       |
| ner          | ontonotes |

2022-07-11 16:44:54 INFO: Use device: cpu
2022-07-11 16:44:54 INFO: Loading: tokenize
2022-07-11 16:44:54 INFO: Loading: pos
2022-07-11 16:44:54 INFO: Loading: lemma
2022-07-11 16:44:54 INFO: Loading: depparse
2022-07-11 16:44:55 INFO: Loading: sentiment
2022-07-11 16:44:55 INFO: Loading: constituency
2022-07-11 16:44:57 INFO: Loading: ner
2022-07-11 16:44:58 INFO: Done loading processors!


In [10]:
from stanza.models.common.constant import is_right_to_left
import stanza
import spacy
from spacy import displacy
from spacy.tokens import Doc


def visualize_doc(doc, pipeline):
    """
    Takes in a Document and visualizes it using displacy. The document must be from the stanza pipeline.
    Works for English inputs. The reverse_order parameter can be set as True to flip the display of the
    words for languages such as Arabic, which are read from right-to-left.
    """
    visualization_options = {"compact": True, "bg": "#09a3d5", "color": "white", "distance": 90,
                             "font": "Source Sans Pro", "arrow_spacing": 25}
    # blank model - we don't use any of the model features, just the viz
    nlp = spacy.blank("en")
    sentences_to_visualize = []
    for sentence in doc.sentences:
        words, lemmas, heads, deps, tags = [], [], [], [], []
        if is_right_to_left(pipeline):  # order of words displayed is reversed, dependency arcs remain intact
            sent_len = len(sentence.words)
            for word in reversed(sentence.words):
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:  # spaCy head indexes are formatted differently than that of Stanza
                    heads.append(sent_len - word.id)
                else:
                    heads.append(sent_len - word.head)
        else:   # left to right rendering
            for word in sentence.words:
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:
                    heads.append(word.id - 1)
                else:
                    heads.append(word.head - 1)
        document_result = Doc(nlp.vocab, words=words, lemmas=lemmas, heads=heads, deps=deps, pos=tags)
        sentences_to_visualize.append(document_result)

    for line in sentences_to_visualize:  # render all sentences through displaCy
        # If this program is NOT being run in a Jupyter notebook, replace displacy.render with displacy.serve
        # and the visualization will be hosted locally, link being provided in the program output.
        displacy.render(line, style="dep", options=visualization_options)


def visualize_str(text, pipeline_code, pipe):
    """
    Takes a string and visualizes it using displacy. The string is processed using the stanza pipeline and
    its dependencies are formatted into a spaCy doc object for easy visualization. Accepts valid stanza (UD)
    pipelines as the pipeline argument. Must supply the stanza pipeline code (the two-letter abbreviation of the
    language, such as 'en' for English. Must also supply the stanza pipeline object as the third argument.
    """
    doc = pipe(text)
    visualize_doc(doc, pipeline_code)


def visualize_docs(docs, lang_code):
    """
    Takes in a list of Stanza document objects and a language code (ex: 'en' for English) and visualizes the
    dependency relationships within each document. This function uses spaCy visualizations. See the visualize_doc
    function for more details.
    """
    for doc in docs:
        visualize_doc(doc, lang_code)


def visualize_strings(texts, lang_code):
    """
    Takes a language code (ex: 'en' for English) and a list of strings to process and visualizes the 
    dependency relationships in each text. This function loads the Stanza pipeline for the given language 
    and uses it to visualize all of the strings provided.
    """
    pipe = stanza.Pipeline(lang_code)
    for text in texts:
        visualize_str(text, lang_code, pipe)


def main():
    ar_strings = ['برلين ترفض حصول شركة اميركية على رخصة تصنيع دبابة "ليوبارد" الالمانية', "هل بإمكاني مساعدتك؟", 
               "أراك في مابعد", "لحظة من فضلك"]
    en_strings = ["This is a sentence.", 
                  "Barack Obama was born in Hawaii. He was elected President of the United States in 2008."]
    zh_strings = ["中国是一个很有意思的国家。"]
    # Testing with right to left language
    visualize_strings(ar_strings, "ar")
    # Testing with left to right languages
    visualize_strings(en_strings, "en")
    visualize_strings(zh_strings, "zh")
    return


if __name__ == '__main__':
    main()



2022-07-12 22:53:42 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-07-12 22:53:46 INFO: Loading these models for language: ar (Arabic):
| Processor | Package |
-----------------------
| tokenize  | padt    |
| mwt       | padt    |
| pos       | padt    |
| lemma     | padt    |
| depparse  | padt    |
| ner       | aqmar   |

2022-07-12 22:53:46 INFO: Use device: cpu
2022-07-12 22:53:46 INFO: Loading: tokenize
2022-07-12 22:53:46 INFO: Loading: mwt
2022-07-12 22:53:46 INFO: Loading: pos
2022-07-12 22:53:46 INFO: Loading: lemma
2022-07-12 22:53:46 INFO: Loading: depparse
2022-07-12 22:53:47 INFO: Loading: ner
2022-07-12 22:53:50 INFO: Done loading processors!


2022-07-12 22:53:54 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-07-12 22:53:57 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| depparse     | combined  |
| sentiment    | sstplus   |
| constituency | wsj       |
| ner          | ontonotes |

2022-07-12 22:53:57 INFO: Use device: cpu
2022-07-12 22:53:57 INFO: Loading: tokenize
2022-07-12 22:53:57 INFO: Loading: pos
2022-07-12 22:53:58 INFO: Loading: lemma
2022-07-12 22:53:58 INFO: Loading: depparse
2022-07-12 22:53:58 INFO: Loading: sentiment
2022-07-12 22:53:59 INFO: Loading: constituency
2022-07-12 22:53:59 INFO: Loading: ner
2022-07-12 22:54:00 INFO: Done loading processors!


2022-07-12 22:54:03 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-07-12 22:54:03 INFO: "zh" is an alias for "zh-hans"
2022-07-12 22:54:09 INFO: Loading these models for language: zh-hans (Simplified_Chinese):
| Processor    | Package   |
----------------------------
| tokenize     | gsdsimp   |
| pos          | gsdsimp   |
| lemma        | gsdsimp   |
| depparse     | gsdsimp   |
| sentiment    | ren       |
| constituency | ctb       |
| ner          | ontonotes |

2022-07-12 22:54:09 INFO: Use device: cpu
2022-07-12 22:54:09 INFO: Loading: tokenize
2022-07-12 22:54:09 INFO: Loading: pos
2022-07-12 22:54:09 INFO: Loading: lemma
2022-07-12 22:54:09 INFO: Loading: depparse
2022-07-12 22:54:10 INFO: Loading: sentiment
2022-07-12 22:54:10 INFO: Loading: constituency
2022-07-12 22:54:12 INFO: Loading: ner
2022-07-12 22:54:14 INFO: Done loading processors!
