In [None]:
from stanza.utils import conll
from stanza.utils.visualization import document_to_viz as viz


def conll_to_visual(conll_file, pipeline):
    """
    Takes in a conll file and visualizes it by converting the conll file to a Stanza Document object
    and visualizing it with the visualize_doc method. Input should be a proper conll file. The pipeline
    for the conll file to be processed in must be provided as well.
    """
    viz.visualize_doc(conll.CoNLL.conll2doc(conll_file), pipeline)


def main():
    file = "C:\\Users\\Alex\\stanza\\stanza\\utils\\visualization\\en_ewt-ud-test.conllu.txt"
    conll_to_visual(file, "en")
    return 0

main()

2022-06-30 22:21:10 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


PRINTING ARABIC DOCUMENTS


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-06-30 22:21:12 INFO: Loading these models for language: ar (Arabic):
| Processor | Package |
-----------------------
| tokenize  | padt    |
| mwt       | padt    |
| pos       | padt    |
| lemma     | padt    |
| depparse  | padt    |
| ner       | aqmar   |

2022-06-30 22:21:12 INFO: Use device: cpu
2022-06-30 22:21:12 INFO: Loading: tokenize
2022-06-30 22:21:12 INFO: Loading: mwt
2022-06-30 22:21:12 INFO: Loading: pos
2022-06-30 22:21:12 INFO: Loading: lemma
2022-06-30 22:21:12 INFO: Loading: depparse
2022-06-30 22:21:12 INFO: Loading: ner
2022-06-30 22:21:13 INFO: Done loading processors!


2022-06-30 22:21:14 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-06-30 22:21:15 INFO: Loading these models for language: ar (Arabic):
| Processor | Package |
-----------------------
| tokenize  | padt    |
| mwt       | padt    |
| pos       | padt    |
| lemma     | padt    |
| depparse  | padt    |
| ner       | aqmar   |

2022-06-30 22:21:15 INFO: Use device: cpu
2022-06-30 22:21:15 INFO: Loading: tokenize
2022-06-30 22:21:15 INFO: Loading: mwt
2022-06-30 22:21:16 INFO: Loading: pos
2022-06-30 22:21:16 INFO: Loading: lemma
2022-06-30 22:21:16 INFO: Loading: depparse
2022-06-30 22:21:16 INFO: Loading: ner
2022-06-30 22:21:17 INFO: Done loading processors!


2022-06-30 22:21:18 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-06-30 22:21:19 INFO: Loading these models for language: ar (Arabic):
| Processor | Package |
-----------------------
| tokenize  | padt    |
| mwt       | padt    |
| pos       | padt    |
| lemma     | padt    |
| depparse  | padt    |
| ner       | aqmar   |

2022-06-30 22:21:19 INFO: Use device: cpu
2022-06-30 22:21:19 INFO: Loading: tokenize
2022-06-30 22:21:19 INFO: Loading: mwt
2022-06-30 22:21:19 INFO: Loading: pos
2022-06-30 22:21:19 INFO: Loading: lemma
2022-06-30 22:21:19 INFO: Loading: depparse
2022-06-30 22:21:19 INFO: Loading: ner
2022-06-30 22:21:20 INFO: Done loading processors!


2022-06-30 22:21:21 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

In [12]:
from stanza.models.common.constant import is_right_to_left
import spacy
from spacy import displacy
from spacy.tokens import Doc
from stanza.utils import conll


def conll_to_visual(conll_file, pipeline, sent_count):
    """
    Takes in a conll file and visualizes it by converting the conll file to a Stanza Document object
    and visualizing it with the visualize_doc method. Input should be a proper conll file. The pipeline
    for the conll file to be processed in must be provided as well.
    """
    # convert conll file to doc
    doc = conll.CoNLL.conll2doc(conll_file)
    # visualize doc object for given number of sentences
    visualization_options = {"compact": True, "bg": "#09a3d5", "color": "white", "distance": 80,
                             "font": "Source Sans Pro"}
    nlp = spacy.load("en_core_web_sm")  # Must install the latest version of spaCy's en_core_web_sm before running
    sentences_to_visualize = []

    for i in range(sent_count):
        sentence = doc.sentences[i]
        words, lemmas, heads, deps, tags = [], [], [], [], []
        if is_right_to_left(pipeline):  # order of words displayed is reversed, dependency arcs remain intact
            sent_len = len(sentence.words)
            for word in reversed(sentence.words):
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:  # spaCy head indexes are formatted differently than that of Stanza
                    heads.append(sent_len - word.id)
                else:
                    heads.append(sent_len - word.head)
        else:  # standard left to right rendering
            for word in sentence.words:
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:
                    heads.append(word.id - 1)
                else:
                    heads.append(word.head - 1)
        document_result = Doc(nlp.vocab, words=words, lemmas=lemmas, heads=heads, deps=deps, pos=tags)
        sentences_to_visualize.append(document_result)

    for line in sentences_to_visualize:  # render all sentences through displaCy
        displacy.render(line, style="dep", options=visualization_options)
        


def main():
    file = "C:\\Users\\Alex\\stanza\\stanza\\utils\\visualization\\en_ewt-ud-test.conllu.txt"
    conll_to_visual(file, "en", 15)


main()


In [9]:
from stanza.models.common.constant import is_right_to_left
import spacy
from spacy import displacy
from spacy.tokens import Doc
from stanza.utils import conll
from stanza.utils.visualization import document_to_viz as viz


def conll_to_visual(conll_file, pipeline, sent_count=10, display_all=False):
    """
    Takes in a conll file and visualizes it by converting the conll file to a Stanza Document object
    and visualizing it with the visualize_doc method. Input should be a proper conll file. The pipeline
    for the conll file to be processed in must be provided as well. Optionally, the sent_count argument
    can be tweaked to display a different amount of sentences. To display all of the sentences in a
    conll file, the display_all argument can optionally be set to True. BEWARE: setting this argument
    for a large conll file may result in too many renderings, resulting in a crash.
    """
    # convert conll file to doc
    doc = conll.CoNLL.conll2doc(conll_file)

    if display_all:
        viz.visualize_doc(conll.CoNLL.conll2doc(conll_file), pipeline)
    else:  # visualize a given number of sentences
        visualization_options = {"compact": True, "bg": "#09a3d5", "color": "white", "distance": 80,
                                 "font": "Source Sans Pro"}  # see spaCy visualization settings doc for more options
        nlp = spacy.load("en_core_web_sm")  # Must install the latest version of spaCy's en_core_web_sm before running
        sentences_to_visualize = []
    
        for i in range(sent_count):
            sentence = doc.sentences[i]
            print(sentence.text)
            words, lemmas, heads, deps, tags = [], [], [], [], []
            if is_right_to_left(pipeline):  # order of words displayed is reversed, dependency arcs remain intact
                sent_len = len(sentence.words)
                for word in reversed(sentence.words):
                    words.append(word.text)
                    lemmas.append(word.lemma)
                    deps.append(word.deprel)
                    tags.append(word.upos)
                    if word.head == 0:  # spaCy head indexes are formatted differently than that of Stanza
                        heads.append(sent_len - word.id)
                    else:
                        heads.append(sent_len - word.head)
            else:  # standard left to right rendering
                for word in sentence.words:
                    words.append(word.text)
                    lemmas.append(word.lemma)
                    deps.append(word.deprel)
                    tags.append(word.upos)
                    if word.head == 0:
                        heads.append(word.id - 1)
                    else:
                        heads.append(word.head - 1)
            document_result = Doc(nlp.vocab, words=words, lemmas=lemmas, heads=heads, deps=deps, pos=tags)
            sentences_to_visualize.append(document_result)
    
        for line in sentences_to_visualize:  # render all sentences through displaCy
            displacy.render(line, style="dep", options=visualization_options)


def main():
    file = "C:\\Users\\Alex\\stanza\\stanza\\utils\\visualization\\en_ewt-ud-test.conllu.txt"
    conll_to_visual(file, "en", sent_count=2)
    # conll_to_visual(file, "en")
    # conll_to_visual(file, "en", sent_count=20)
    # conll_to_visual(file, "en", display_all=True)
    jp_file = "C:\\Users\\Alex\\stanza\\stanza\\utils\\visualization\\ja_gsd-ud-test.conllu.txt"
    ud_file = "C:\\Users\\Alex\\stanza\\stanza\\utils\\visualization\\ur_udtb-ud-test.conllu.txt"
    ar_file = "C:\\Users\\Alex\\stanza\\stanza\\utils\\visualization\\ar_padt-ud-test.conllu.txt"
#     conll_to_visual(jp_file, "ja")
 #    conll_to_visual(ar_file, "ar")
    conll_to_visual(ud_file, "ur")


main()



What if Google Morphed Into GoogleOS?
What if Google expanded on its search-engine (and now e-mail) wares into a full-fledged operating system?


صدر تلگودیشم مسٹر این چندرا بابو نائیڈو نے کڑپہ کے ضمنی انتخابات مےں اضافی فورسیس کی تعیناتی کا مطالبہ کرتے ہوئے چیف الیکشن کمشنر کو مکتوب روانہ کیا۔
انہوں نے کڑپہ کے پارلیمانی اور پولی ویندلہ کے اسمبلی انتخابات کو صاف و شفاف بنانے کے لئے زائد پولیس فورس تعینات کرنے کا مطالبہ کرتے ہوئے کہا کہ ان انتخابات مےں غیرسماجی عناصر کی جانب سے بدامنی پھیلائے جانے کا خدشہ ہے۔
مسٹر نائیڈو نے اپنے مکتوب مےں چیف الیکشن کمشنر سے خواہش کی کہ وہ ہر حلقہ اسمبلی مےں علحدہ علحدہ فورس کی تعیناتی کو یقینی بنانے کے اقدامات کریں تاکہ آزادانہ و منصفانہ انتخابات کو یقینی بنایا جا سکے۔
واضح رہے کہ گزشتہ یوم مسٹر نائیڈو پر ضلع کڑپہ مےں ہوئے حملہ کے بعد مسٹر نائیڈو نے اس طرح کے واقعات کا حوالہ دیتے ہوئے کہا کہ ضلع کڑپہ مےں گروہ_واری عناصر انتخابات مےں دھاندلیوں کی منصوبہ_بندی کر رہے ہیں۔
انہوں نے بتایا کہ جمہوریت کی بقاء کے لئے یہ ضروری ہے کہ چیف الیکشن کمشنر کی جانب سے ان انتخابات کی راست نگرانی اور غنڈہ عناصر پر کنٹرول کے لئے سخت_ترین انتظامات کئے جائیں۔
مسٹر نائیڈو نے سی ای سی کو روانہ_کردہ مکتوب مےں اس بات کی 

In [10]:
from stanza.models.common.constant import is_right_to_left
import spacy
from spacy import displacy
from spacy.tokens import Doc
from stanza.utils import conll
from stanza.utils.visualization import document_to_viz as viz


def conll_to_visual(conll_file, pipeline, sent_count=10, display_all=False):
    """
    Takes in a conll file and visualizes it by converting the conll file to a Stanza Document object
    and visualizing it with the visualize_doc method. Input should be a proper conll file. The pipeline
    for the conll file to be processed in must be provided as well. Optionally, the sent_count argument
    can be tweaked to display a different amount of sentences. To display all of the sentences in a
    conll file, the display_all argument can optionally be set to True. BEWARE: setting this argument
    for a large conll file may result in too many renderings, resulting in a crash.
    """
    # convert conll file to doc
    doc = conll.CoNLL.conll2doc(conll_file)

    if display_all:
        viz.visualize_doc(conll.CoNLL.conll2doc(conll_file), pipeline)
    else:  # visualize a given number of sentences
        visualization_options = {"compact": True, "bg": "#09a3d5", "color": "white", "distance": 80,
                                 "font": "Source Sans Pro"}  # see spaCy visualization settings doc for more options
        nlp = spacy.load("en_core_web_sm")  # Must install the latest version of spaCy's en_core_web_sm before running
        sentences_to_visualize, rtl = [], is_right_to_left(pipeline)

        for i in range(sent_count):
            sentence = doc.sentences[i]
            words, lemmas, heads, deps, tags = [], [], [], [], []
            sentence_words = sentence.words
            if rtl:
                sentence_words = reversed(sentence.words)
                sent_len = len(sentence.words)
            for word in sentence_words:
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if rtl and word.head == 0:  # word heads are off-by-1 in spaCy doc inits compared to Stanza
                    heads.append(sent_len - word.id)
                elif rtl and word.head != 0:
                    heads.append(sent_len - word.head)
                elif not rtl and word.head == 0:
                    heads.append(word.id - 1)
                elif not rtl and word.head != 0:
                    heads.append(word.head - 1)
                    
            document_result = Doc(nlp.vocab, words=words, lemmas=lemmas, heads=heads, deps=deps, pos=tags)
            sentences_to_visualize.append(document_result)

        for line in sentences_to_visualize:  # render all sentences through displaCy
            displacy.render(line, style="dep", options=visualization_options)


def main():
    file = "C:\\Users\\Alex\\stanza\\stanza\\utils\\visualization\\en_ewt-ud-test.conllu.txt"
    conll_to_visual(file, "en", sent_count=2)
    # conll_to_visual(file, "en")
    # conll_to_visual(file, "en", sent_count=20)
    # conll_to_visual(file, "en", display_all=True)
    jp_file = "C:\\Users\\Alex\\stanza\\stanza\\utils\\visualization\\ja_gsd-ud-test.conllu.txt"
    # ud_file = "C:\\Users\\Alex\\stanza\\stanza\\utils\\visualization\\ur_udtb-ud-test.conllu.txt"
    ar_file = "C:\\Users\\Alex\\stanza\\stanza\\utils\\visualization\\ar_padt-ud-test.conllu.txt"
    conll_to_visual(jp_file, "ja")
    conll_to_visual(ar_file, "ar")
    # conll_to_visual(ud_file, "ur")


main()