In [1]:
import spacy

In [2]:
nlp=spacy.load('en_core_web_sm')

In [3]:
doc=nlp(u"This is first sentence.This is another sentence.This is last sentence")

In [4]:
for sent in doc.sents:
    print(sent)

This is first sentence.
This is another sentence.
This is last sentence


In [5]:
list(doc.sents)[0]

This is first sentence.

In [6]:
doc=nlp(u'"Management is doing right things;leadership is doing right things."-Peter Drucker')

In [7]:
doc.text

'"Management is doing right things;leadership is doing right things."-Peter Drucker'

In [8]:
for sent in doc.sents:
    print(sent)
    print('\n')

"Management is doing right things;leadership is doing right things."-


Peter Drucker




In [9]:
#Adding  segmentation rule to break sentence on semicolon
def set_custom_boundary(doc):
    for token in doc[:-1]:
        if(token.text==';'):
            doc[token.i+1].is_sent_start=True
    return doc        

In [10]:
nlp.add_pipe(set_custom_boundary,before='parser')
nlp.pipe_names

['tagger', 'set_custom_boundary', 'parser', 'ner']

In [11]:
doc4=nlp(u'"Management is doing right things; leadership is doing right things."-Peter Drucker')

In [12]:
for sent in doc4.sents:
    print(sent)

"Management is doing right things;
leadership is doing right things."-
Peter Drucker


In [13]:
#Change segmentation rule to divide sentence on new line
nlp=spacy.load('en_core_web_sm')

In [14]:
string="This is a sentence. This is another sentence. \n\nThis is a\nthird sentence."

In [15]:
print(string)

This is a sentence. This is another sentence. 

This is a
third sentence.


In [16]:

doc=nlp(string)
for sent in doc.sents:
    print(sent)

This is a sentence.
This is another sentence. 


This is a
third sentence.


In [17]:
from spacy.pipeline import SentenceSegmenter

In [19]:
def split_on_new_lines(doc):
    start=0
    seen_new_line=False
    for token in doc:
        if seen_new_line:
            yield doc[start:token.i]
            start=token.i
            seen_new_line=False
        elif token.text.startswith('\n'):
            seen_new_line=True
    yield doc[start:]       
        
        

In [20]:
sbd=SentenceSegmenter(nlp.vocab,strategy=split_on_new_lines)

In [21]:
nlp.add_pipe(sbd)
nlp.pipe_names

['tagger', 'parser', 'ner', 'sbd']

In [22]:
doc=nlp(string)
for sent in doc.sents:
    print(sent)

This is a sentence. This is another sentence. 


This is a

third sentence.
