In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - ' +str(ent.start_char) +' - '+ str(ent.end_char) +
                  ' - '+ent.label_+ ' - '+str(spacy.explain(ent.label_)))
    else:
        print('No named entities found.')

In [3]:
doc1 = nlp("Stray Kids is a South Korean boy band formed by JYP Entertainment through the 2017 reality show Survival")
show_ents(doc1)

Stray Kids - 0 - 10 - PERSON - People, including fictional
South Korean - 16 - 28 - NORP - Nationalities or religious or political groups
JYP Entertainment - 48 - 65 - ORG - Companies, agencies, institutions, etc.
2017 - 78 - 82 - DATE - Absolute or relative dates or periods
Survival - 96 - 104 - NORP - Nationalities or religious or political groups


In [4]:
import pandas as pd
doc2 = nlp(u'Right now, Stray Kids is reaching the same level of success as famous groups like BTS, BLACKPINK, and NewJeans. They got over 2 billion streams for their songs in just one year. They made this record by adding up all the time people listened to their songs from all their albums and singles over the years.')
show_ents(doc2)
ents = [(ent.text, ent.start_char, ent.end_char, ent.label_) for ent in doc2.ents]
df = pd.DataFrame(ents, columns=['Text', 'Start', 'End', 'Label'])
df

Stray Kids - 11 - 21 - PERSON - People, including fictional
BTS - 82 - 85 - ORG - Companies, agencies, institutions, etc.
BLACKPINK - 87 - 96 - ORG - Companies, agencies, institutions, etc.
NewJeans - 102 - 110 - ORG - Companies, agencies, institutions, etc.
over 2 billion - 121 - 135 - CARDINAL - Numerals that do not fall under another type
just one year - 163 - 176 - DATE - Absolute or relative dates or periods
the years - 296 - 305 - DATE - Absolute or relative dates or periods


Unnamed: 0,Text,Start,End,Label
0,Stray Kids,11,21,PERSON
1,BTS,82,85,ORG
2,BLACKPINK,87,96,ORG
3,NewJeans,102,110,ORG
4,over 2 billion,121,135,CARDINAL
5,just one year,163,176,DATE
6,the years,296,305,DATE


In [5]:
text = "Stray Kids originally referred to a lost child who wants to chase their dreams and later evolved to represent the idea of finding a way together out of the ordinary."
doc = nlp(text)
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
ents = [(ent.text, ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]
print(ents)
if len(doc.ents) > 0:
    ent_Stray = [doc.ents[0].text, doc.ents[0].start_char, doc.ents[0].end_char, doc.ents[0].label_]
    print(ent_Stray)
    if len(doc.ents) > 1:
        ent_Kids = [doc.ents[1].text, doc.ents[1].start_char, doc.ents[1].end_char, doc.ents[1].label_]
        print(ent_Kids)

Stray Kids 0 10 PERSON
[('Stray Kids', 0, 10, 'PERSON')]
['Stray Kids', 0, 10, 'PERSON']


In [6]:
import pandas as pd
text = "Stray Kids originally referred to a lost child who wants to chase their dreams and later evolved to represent the idea of finding a way together out of the ordinary."
doc = nlp(text)
ents = [(ent.text, ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]
df = pd.DataFrame(ents, columns=['Text', 'Start', 'End', 'Label'])
df

Unnamed: 0,Text,Start,End,Label
0,Stray Kids,0,10,PERSON


In [8]:
from spacy import displacy
text="The 2024 Summer Olympics, officially the Games of the XXXIII Olympiad and officially branded as Paris 2024, is an upcoming international multi-sport event scheduled to take place from 26 July to 11 August 2024 in France, with some competitions starting on 24 July. "
doc = nlp(text)
displacy.render(doc, style="ent", jupyter=True)

In [9]:
text="""The modern Olympic Games or Olympics (French: Jeux olympiques) are the leading international sporting events featuring summer and winter sports competitions in which thousands of athletes from around the world participate in a variety of competitions. The Olympic Games are considered the world's foremost sports competition with more than 200 teams, representing sovereign states and territories, participating. By default, the Games generally substitute for any world championships during the year in which they take place (however, each class usually maintains its own records). The Olympic Games are held every four years. Since 1994, they have alternated between the Summer and Winter Olympics every two years during the four-year Olympiad."""
doc = nlp(text)
displacy.render(doc, style="ent", jupyter=True)
ents = [(ent.text, ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]
df = pd.DataFrame(ents, columns=['Text', 'Start', 'End', 'Label'])
df

Unnamed: 0,Text,Start,End,Label
0,Olympic Games,11,24,EVENT
1,Olympics,28,36,EVENT
2,French,38,44,NORP
3,Jeux,46,50,PERSON
4,summer,119,125,DATE
5,winter,130,136,DATE
6,thousands,166,175,CARDINAL
7,The Olympic Games,252,269,EVENT
8,more than 200,330,343,CARDINAL
9,the year,491,499,DATE


In [10]:
colors = {'EVENT': 'linear-gradient(90deg, #f2c707, #dc9ce7)','DATE':'linear-gradient(90deg,#aa9cde,#dc9ce7)'}
options = {'ents': ['EVENT','DATE'], 'colors':colors}
displacy.render(doc, style='ent', jupyter=True, options=options)