### Named Entity Recognition

In [5]:
import spacy
from nltk import sent_tokenize

### Load the model

In [6]:
def load_model():
    """
    Load the spaCy transformer model.
    """
    nlp = spacy.load("en_core_web_trf")
    return nlp

In [20]:
nlp_model = load_model()

### Load the datatset

In [9]:
import os 
import sys
import pathlib
folder_path = pathlib.Path().parent.resolve()
sys.path.append(os.path.join(folder_path, '../'))
from utils import load_subtitles_dataset

In [10]:
dataset_path = '../data/Subtitles/'
df = load_subtitles_dataset(dataset_path)

In [11]:
df.head()

Unnamed: 0,episode,script
0,94,We are Fighting Dreamers aiming high\n Fightin...
1,80,We are Fighting Dreamers aiming high\n Fightin...
2,32,"Press down hard on the gas\n That’s right, the..."
3,185,"Rock away your existence,\n Shouting that you ..."
4,191,"Rock away your existence,\n Shouting that you ..."


In [13]:
sample_script = df.iloc[0]['script']
sample_script

'We are Fighting Dreamers aiming high\n Fighting Dreamers don\'t care what people think about them\n Fighting Dreamers follow what they believe\n Oli Oli Oli Oh! Just go my way\n Right here right now (Bang) Hit it straight like a line drive!\n Right here right now (Burn)\n Down a difficult road filled with endless struggles\n Where do you think you are going following someone else\'s map?\n An insightful crow comes along to tear up the map\n Now open your eyes and take a look at the truth (Yeah!)\n There\'s nothing to lose, so let\'s GO!!!\n We are Fighting Dreamers aiming high\n Fighting Dreamers don\'t care what people think about them\n Fighting Dreamers follow what they believe\n Oli Oli Oli Oh! Just go my way\n Right here right now (Bang) Hit it straight like a line drive!\n Right here right now (Burn) We\'re gonna do it and do our best!\n Right here right now (Bang) Hit it straight like a line drive!\n Right here right now (Burn) We\'re gonna do it and do our best! BANG!\n My bod

In [15]:
sentences = sent_tokenize(sample_script)
sentences[60:90]

['To think I’d already have my handicap spotted.',
 'You are such a disgraceful guy as always…\n Dang it.',
 'It kind of ticks me off when the Pervy Sage is mocked.',
 'Summoning…Jutsu!',
 'The Nine-Tailed Fox kid from the Chunin Exam.',
 'I wonder if I should’ve killed him then, after all.',
 'For the sake of the Akatsuki organization,\n I acknowledged his achievement… and let him slide by.',
 'But the only ones who can break a Five-Pronged Spell\n are the Sannin members and the Third Hokage.',
 'Which means Jiraiya may have already taught it to Naruto.',
 'Perhaps.',
 'If he’s able to control the Nine-Tailed Fox’s power…\n Oh…\n Th-This is…\n It doesn’t quite look to be so.',
 'He isn’t endowed with Shinobi ability to begin with, so…\n Why?',
 'He’s still got a long way to go, sure enough.',
 'Why?!',
 'Hello.',
 'Hey, Gamatatsu.',
 'Why did you come out?',
 'Oh, Brother Gamakichi.',
 'It’s the first time I’ve been Summoned.',
 'I-I wonder if I can do my best?',
 'Idiot.',
 'Get some

In [16]:
sentence = ".".join(sentences[60:90])
sentence

'To think I’d already have my handicap spotted..You are such a disgraceful guy as always…\n Dang it..It kind of ticks me off when the Pervy Sage is mocked..Summoning…Jutsu!.The Nine-Tailed Fox kid from the Chunin Exam..I wonder if I should’ve killed him then, after all..For the sake of the Akatsuki organization,\n I acknowledged his achievement… and let him slide by..But the only ones who can break a Five-Pronged Spell\n are the Sannin members and the Third Hokage..Which means Jiraiya may have already taught it to Naruto..Perhaps..If he’s able to control the Nine-Tailed Fox’s power…\n Oh…\n Th-This is…\n It doesn’t quite look to be so..He isn’t endowed with Shinobi ability to begin with, so…\n Why?.He’s still got a long way to go, sure enough..Why?!.Hello..Hey, Gamatatsu..Why did you come out?.Oh, Brother Gamakichi..It’s the first time I’ve been Summoned..I-I wonder if I can do my best?.Idiot..Get some snacks or something and hide..What?.I can get snacks?.Goody-goody..Here I go..I’ll t

### Running the model

In [21]:
doc = nlp_model(sentence)
doc.ents

(the Pervy Sage,
 the Chunin Exam,
 Akatsuki,
 Five,
 Sannin,
 Third,
 Jiraiya,
 Naruto,
 Shinobi,
 Gamatatsu,
 Gamakichi,
 first,
 Jiraiya,
 Jiraiya,
 Sannin,
 Naruto)

In [22]:
for entity in doc.ents:
    print(entity.text, entity.label_)

the Pervy Sage PERSON
the Chunin Exam EVENT
Akatsuki ORG
Five CARDINAL
Sannin NORP
Third ORDINAL
Jiraiya PERSON
Naruto PERSON
Shinobi NORP
Gamatatsu PERSON
Gamakichi PERSON
first ORDINAL
Jiraiya PERSON
Jiraiya PERSON
Sannin NORP
Naruto PERSON


In [23]:
def get_ners_inference(script):
    script_sentences = sent_tokenize(script)
    
    ner_output = []
    for sentence in script_sentences:
        doc = nlp_model(sentence)
        ners = set()
        for entity in doc.ents:
            if entity.label_ == 'PERSON':
                full_name = entity.text
                first_name = full_name.split(" ")[0]
                first_name = first_name.strip()
                ners.add(first_name)
        ner_output.append(ners)
        
    return ner_output

In [25]:
df =df.head(10)
df

Unnamed: 0,episode,script
0,94,We are Fighting Dreamers aiming high\n Fightin...
1,80,We are Fighting Dreamers aiming high\n Fightin...
2,32,"Press down hard on the gas\n That’s right, the..."
3,185,"Rock away your existence,\n Shouting that you ..."
4,191,"Rock away your existence,\n Shouting that you ..."
5,190,"Rock away your existence,\n Shouting that you ..."
6,184,"Rock away your existence,\n Shouting that you ..."
7,27,"Press down hard on the gas\n That’s right, the..."
8,33,"Press down hard on the gas\n That’s right, the..."
9,81,We are Fighting Dreamers aiming high\n Fightin...


In [26]:
df['ners'] = df['script'].apply(get_ners_inference)

In [27]:
df

Unnamed: 0,episode,script,ners
0,94,We are Fighting Dreamers aiming high\n Fightin...,"[{Oli}, {}, {}, {}, {}, {Oli}, {}, {Burn}, {},..."
1,80,We are Fighting Dreamers aiming high\n Fightin...,"[{Oli}, {}, {}, {}, {}, {Oli}, {}, {Burn}, {},..."
2,32,"Press down hard on the gas\n That’s right, the...","[{}, {}, {}, {}, {}, {}, {Lee}, {}, {}, {}, {}..."
3,185,"Rock away your existence,\n Shouting that you ...","[{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {..."
4,191,"Rock away your existence,\n Shouting that you ...","[{}, {}, {}, {}, {}, {}, {}, {}, {}, {Hinata},..."
5,190,"Rock away your existence,\n Shouting that you ...","[{}, {}, {}, {}, {}, {}, {}, {Naruto}, {}, {},..."
6,184,"Rock away your existence,\n Shouting that you ...","[{}, {}, {}, {}, {}, {Akamaru}, {}, {}, {Kiba}..."
7,27,"Press down hard on the gas\n That’s right, the...","[{}, {}, {}, {}, {}, {}, {}, {}, {Naruto}, {},..."
8,33,"Press down hard on the gas\n That’s right, the...","[{}, {}, {}, {}, {}, {}, {}, {}, {}, {Sasuke},..."
9,81,We are Fighting Dreamers aiming high\n Fightin...,"[{Oli}, {}, {}, {}, {}, {Oli}, {}, {Burn}, {},..."


### Character network