In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
doc = nlp(u'This is the first sentence. This is another sentence. This is the last sentence.')

In [3]:
for sent in doc.sents:
    print(sent)

This is the first sentence.
This is another sentence.
This is the last sentence.


In [4]:
doc[0]

This

In [6]:
# grab sentences
list(doc.sents)[0]

This is the first sentence.

In [7]:
doc = nlp(u'"Management is doing things right; leadership is doing the right things." -Peter Drucker')

In [8]:
doc.text

'"Management is doing things right; leadership is doing the right things." -Peter Drucker'

In [14]:
def set_custom_boundaries(doc):
    for token in doc[:-1]:
        if token.text == ';':
            doc[token.i+1].is_sent_start = True
    return doc

In [15]:
nlp.add_pipe(set_custom_boundaries, before = 'parser')

nlp.pipe_names

['tagger', 'set_custom_boundaries', 'parser', 'ner']

In [16]:
doc[:-1]

"Management is doing things right; leadership is doing the right things." -Peter

In [17]:
doc4 = nlp(u'"Management is doing things right; leadership is doing the right things." -Peter Drucker')

In [18]:
for sent in doc4.sents:
    print(sent)

"Management is doing things right;
leadership is doing the right things."
-Peter Drucker


In [19]:
nlp = spacy.load('en_core_web_sm')

In [30]:
mystring = u"This is a sentence. This is another. \n\nThis is a \nthird sentence"

In [21]:
print(mystring)

This is a sentence. This ia another. 

This is a 
third sentence


In [22]:
from spacy.pipeline import SentenceSegmenter

In [24]:
def split_on_newlines(doc):
    start = 0
    seen_newline = False
    
    for word in doc:
        if seen_newline:
            yield doc[start:word.i]
            start = word.i
            seen_newline = False
        elif word.text.startswith('\n'):
            seen_newline = True
            
            
    yield doc[start:]   

In [26]:
sbd = SentenceSegmenter(nlp.vocab, strategy = split_on_newlines)

In [27]:
nlp.add_pipe(sbd)

In [28]:
doc = nlp(mystring)

In [31]:
for sentence in doc.sents:
    print(sentence)

This is a sentence. This ia another. 


This is a 

third sentence


In [13]:
set_custom_boundaries(doc)

"
0
Management
1
is
2
doing
3
things
4
right
5
;
6
leadership
7
is
8
doing
9
the
10
right
11
things
12
.
13
"
14
-Peter
15
Drucker
16
