In [1]:
import spacy
from spacy.tokens import Doc

def doc_from_conllu(vocab, lines):
    words, spaces, tags, poses, morphs, lemmas = [], [], [], [], [], []
    heads, deps = [], []
    for i in range(len(lines)):
        line = lines[i]
        parts = line.split("\t")
        id_, word, lemma, pos, tag, morph, head, dep, _1, misc = parts
        if "." in id_ or "-" in id_:
            continue
        if "SpaceAfter=No" in misc:
            spaces.append(False)
        else:
            spaces.append(True)

        id_ = int(id_) - 1
        head = (int(head) - 1) if head not in ("0", "_") else id_
        tag = pos if tag == "_" else tag
        morph = morph if morph != "_" else ""
        dep = "ROOT" if dep == "root" else dep

        words.append(word)
        lemmas.append(lemma)
        poses.append(pos)
        tags.append(tag)
        morphs.append(morph)
        heads.append(head)
        deps.append(dep)

    doc = Doc(vocab, words=words, spaces=spaces)
    for i in range(len(doc)):
        doc[i].tag_ = tags[i]
        doc[i].pos_ = poses[i]
        doc[i].dep_ = deps[i]
        doc[i].lemma_ = lemmas[i]
        doc[i].head = doc[heads[i]]
    doc.is_parsed = True
    doc.is_tagged = True

    return doc

In [4]:
from nltk import Tree

def to_nltk_tree(node):
    if node.n_lefts + node.n_rights > 0:
        return Tree(node.orth_, [to_nltk_tree(child) for child in node.children])
    else:
        return node.orth_

In [None]:
import stanza
from spacy_stanza import StanzaLanguage

snlp = stanza.Pipeline(lang="ru")
nlp = StanzaLanguage(snlp)

In [12]:
import re

with open("DeepPavlov_7777.txt", "r") as f:
    text = f.read()

sentences = text.split("\n\n")
for sent in range(len(sentences)):
    pattern = "# sent_id = (.*?)\n"
    sentences[sent] = re.sub(pattern, '', sentences[sent])
    pattern = "# text = (.*?)\n"
    sentences[sent] = re.sub(pattern, '', sentences[sent])

In [13]:
for i in sentences[:1]:
    doc = doc_from_conllu(nlp.vocab, i.split("\n"))
    [to_nltk_tree(sent.root).pretty_print() for sent in doc.sents]
    print('*'*150)

    Болеет                
  ____|_________           
 |    СД       лет        
 |    |      ___|_____     
 |   типа   |         в   
 |    |     |         |    
 .    2     5      течении

******************************************************************************************************************************************************
