In [3]:
import spacy

In [4]:
nlp = spacy.blank("en")
doc = nlp('''"Let's go to N.Y!"''')

for token in doc:
    print(token)

"
Let
's
go
to
N.Y
!
"


In [5]:
nlp.pipe_names #to check if we have any pipeline rn or not

[]

In [6]:
#To do certain tokenizations we need language module pipelines

nlp = spacy.load("en_core_web_sm")


In [7]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [8]:
nlp.pipeline#now we got some pipelines

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x13a3776a0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x13a376fe0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x13a39a490>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x137d51840>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x138d30a00>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x13a39a650>)]

In [9]:

doc = nlp('''The crow started collecting pebbles one by one and dropped them into the pitcher. As more and more pebbles went into the pitcher, the water rose up. Soon enough, the water came to a level through which the crow could drink water. He drank the water happily and thanked mother nature''')

for token in doc:
    print(token, " | ", token.pos_," | ",token.lemma_) 
    ''' 
        lemma_ --> lemetizer --> It will give the base word of the token --> lemmatizer
    '''

The  |  DET  |  the
crow  |  NOUN  |  crow
started  |  VERB  |  start
collecting  |  VERB  |  collect
pebbles  |  NOUN  |  pebble
one  |  NUM  |  one
by  |  ADP  |  by
one  |  NUM  |  one
and  |  CCONJ  |  and
dropped  |  VERB  |  drop
them  |  PRON  |  they
into  |  ADP  |  into
the  |  DET  |  the
pitcher  |  NOUN  |  pitcher
.  |  PUNCT  |  .
As  |  SCONJ  |  as
more  |  ADJ  |  more
and  |  CCONJ  |  and
more  |  ADJ  |  more
pebbles  |  NOUN  |  pebble
went  |  VERB  |  go
into  |  ADP  |  into
the  |  DET  |  the
pitcher  |  NOUN  |  pitcher
,  |  PUNCT  |  ,
the  |  DET  |  the
water  |  NOUN  |  water
rose  |  VERB  |  rise
up  |  ADP  |  up
.  |  PUNCT  |  .
Soon  |  ADV  |  soon
enough  |  ADV  |  enough
,  |  PUNCT  |  ,
the  |  DET  |  the
water  |  NOUN  |  water
came  |  VERB  |  come
to  |  ADP  |  to
a  |  DET  |  a
level  |  NOUN  |  level
through  |  ADP  |  through
which  |  PRON  |  which
the  |  DET  |  the
crow  |  NOUN  |  crow
could  |  AUX  |  could
drink  |  V

In [10]:
doc = nlp(''' His dad founded Stark Industries, and as a result his parents were wealthy. It's important to note, though, that Tony doesn't rest on his laurels. Instead, he works hard to make Stark Industries successful. While you may not have wealthy parents, almost everyone has some assets on which they can start building.''')
for ent in doc.ents:
    print(ent.text, " | ", ent.label_," |", spacy.explain(ent.label_)) #ent is a ner pipeline --> ent i.e. entities 
    '''
    ent.label_ --> gives the type of entity the text contains
    '''

Stark Industries  |  ORG  | Companies, agencies, institutions, etc.
Tony  |  PERSON  | People, including fictional
Stark Industries  |  ORG  | Companies, agencies, institutions, etc.


In [11]:
# FOR PRE-DEFINED LANGUAGE PIPELINES WE CAN VISIT SPACY-LANGUAGE AND MODULES ON THE INTERNET

In [12]:
from spacy import displacy #For better representation of the entity

displacy.render(doc, style ="ent")

In [13]:
#an example with WW2 text from a book 

doc = nlp('''World War I left unresolved the question of who would dominate
Europe. The tremendous dislocations caused by the war laid the
groundwork for the collapse of democratic institutions there and set
the stage for a second German attempt at conquest. A worldwide depression that began in 1929 destroyed the fragile democratic regime
in Germany. In 1933 Adolf Hitler led to power the National Socialist German Workers’ (Nazi) Party, a mass movement that was virulently nationalistic, antidemocratic, and anti-Semitic. He ended parliamentary government, assumed dictatorial powers, and proclaimed
the Third Reich. The Nazi government increased the strength of the
German armed forces and sought to overturn the Versailles Treaty,
to recover German territory lost at the peace settlement, and to return to the so-called Fatherland German-speaking minorities within
the borders of surrounding countries.
The ultimate goal of Hitler’s policy was to secure “living space” for
the German “master race” in eastern Europe. A gambler by instinct,
Hitler relied on diplomatic bluff and military innovation to overcome
Germany’s weaknesses. He played skillfully on the divisions among
the European powers to gain many of his aims without war. With the
Italian Fascist dictator Benito Mussolini he announced a RomeBerlin alliance (the Axis) in 1935. Meanwhile, in the Far East, the
Japanese—the only Asian industrial power—coveted the natural resources of China and Southeast Asia, but found their expansion
blocked by European colonial powers or by the United States. Having seized Manchuria in 1931, they began a war against China in 1937.
The League of Nations failed to counter effectively Japanese aggression in Manchuria and an Italian invasion of Ethiopia. Soon Germany, Italy, and Japan became allies, facing Western democratic governments that wanted to avoid another war and the Soviet Union
whose Communist government was widely distrusted.
The people of the United States, having rejected the Versailles
Treaty and the Covenant of the League of Nations after World War
I, remained largely indifferent to most international concerns. They
firmly discounted the likelihood of American involvement in an-
4
other major war, except perhaps with Japan. Isolationist strength in
Congress led to the passage of the Neutrality Act of 1937, making it
unlawful for the United States to trade with belligerents. American
policy aimed at continental defense and designated the Navy as the
first line of such defense. The Army’s role was to serve as the nucleus of a mass mobilization that would defeat any invaders who
managed to fight their way past the Navy and the nation’s powerful
coastal defense installations. The National Defense Act of 1920 allowed an Army of 280,000, the largest in peacetime history, but until
1939 Congress never appropriated funds to pay for much more than
half of that strength. ''')

displacy.render(doc, style = "ent")

In [14]:
for token in doc:
    print(token.i, " --> ", token)

0  -->  World
1  -->  War
2  -->  I
3  -->  left
4  -->  unresolved
5  -->  the
6  -->  question
7  -->  of
8  -->  who
9  -->  would
10  -->  dominate
11  -->  

12  -->  Europe
13  -->  .
14  -->  The
15  -->  tremendous
16  -->  dislocations
17  -->  caused
18  -->  by
19  -->  the
20  -->  war
21  -->  laid
22  -->  the
23  -->  

24  -->  groundwork
25  -->  for
26  -->  the
27  -->  collapse
28  -->  of
29  -->  democratic
30  -->  institutions
31  -->  there
32  -->  and
33  -->  set
34  -->  

35  -->  the
36  -->  stage
37  -->  for
38  -->  a
39  -->  second
40  -->  German
41  -->  attempt
42  -->  at
43  -->  conquest
44  -->  .
45  -->  A
46  -->  worldwide
47  -->  depression
48  -->  that
49  -->  began
50  -->  in
51  -->  1929
52  -->  destroyed
53  -->  the
54  -->  fragile
55  -->  democratic
56  -->  regime
57  -->  

58  -->  in
59  -->  Germany
60  -->  .
61  -->  In
62  -->  1933
63  -->  Adolf
64  -->  Hitler
65  -->  led
66  -->  to
67  -->  power
68  -->  the


In [16]:
# To add a particular pipeline, not the whole thing
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en") # no pipeline here

nlp.add_pipe("ner", source = source_nlp) # ner pipeline is added
nlp.pipe_names


['ner']