# TRAINING A NERUAL NETWORK MODEL 

when to use:
    1. Essential- Text classification
    2. Very useful- NER
    3. Less critical- POS & Dependecy Parsing

process
1. Initialize - nlp.begin_training
2. Predict - nlp.update
3. Compare
4. Calculate
5. Update
6. revert to step 2

In [7]:
TEXTS = ['How to preorder the iPhone X',
 'iPhone X is coming',
 'Should I pay $1,000 for the iPhone X?',
 'The iPhone 8 reviews are here',
 'Your iPhone goes up to 11 today',
 'I need a new phone! Any tips?']

In [8]:
import spacy 
from spacy.lang.en import English
from spacy.matcher import Matcher
nlp = English()
matcher = Matcher(nlp.vocab)

In [9]:
# Two tokens whose lowercase forms match 'iphone' and 'x'
pattern1 = [{"LOWER": "iphone"}, {"LOWER": "x"}]

# Token whose lowercase form matches 'iphone' and an optional digit
pattern2 = [{"LOWER": "iphone"}, {"IS_DIGIT": "x", "OP": "?"}]

# Add patterns to the matcher
matcher.add('GADGET', None, pattern1, pattern2)

In [10]:
# Create a Doc object for each text in TEXTS
for doc in nlp.pipe(TEXTS):
    # Find the matches in the doc
    matches = matcher(doc)
    
    # Get a list of (start, end, label) tuples of matches in the text
    entities = [(start, end, 'GADGET') for match_id, start, end in matches]
    print(doc.text, entities)

How to preorder the iPhone X [(4, 6, 'GADGET'), (4, 5, 'GADGET')]
iPhone X is coming [(0, 2, 'GADGET'), (0, 1, 'GADGET')]
Should I pay $1,000 for the iPhone X? [(7, 9, 'GADGET'), (7, 8, 'GADGET')]
The iPhone 8 reviews are here [(1, 2, 'GADGET')]
Your iPhone goes up to 11 today [(1, 2, 'GADGET')]
I need a new phone! Any tips? []


##### training with spans

In [11]:
TRAINING_DATA = []

#create a Doc object for each text in TEXTS
for doc in nlp.pipe(TEXTS):
    #Match on the doc and create a list of matched spans
    spans = [doc[start:end] for match_id, start, end in matcher(doc)]
    
    #get the start char, end char, label for tuples of matches
    entities = [(span.start_char, span.end_char, 'GADGET') for span in spans]
    
    #format matches as a (doc.text, entities)  tuple
    training_example = (doc.text, {"entities": entities})
    
    #append to training data
    TRAINING_DATA.append(training_example)

In [12]:
print(*TRAINING_DATA, sep='\n')

('How to preorder the iPhone X', {'entities': [(20, 28, 'GADGET'), (20, 26, 'GADGET')]})
('iPhone X is coming', {'entities': [(0, 8, 'GADGET'), (0, 6, 'GADGET')]})
('Should I pay $1,000 for the iPhone X?', {'entities': [(28, 36, 'GADGET'), (28, 34, 'GADGET')]})
('The iPhone 8 reviews are here', {'entities': [(4, 10, 'GADGET')]})
('Your iPhone goes up to 11 today', {'entities': [(5, 11, 'GADGET')]})
('I need a new phone! Any tips?', {'entities': []})


TRAINING LOOP 

In [14]:
#create a blank 'en' model 
NLP = spacy.blank('en')

#create a new NER 
ner = NLP.create_pipe('ner')
nlp.add_pipe(ner)

#add label "GADGET" to pipe
ner.add_label("GADGET")

##### Building the training loop 

In [19]:
import random
nlp.begin_training()

#loop through training
for itn in range(10):
    #shuffle training data
    random.shuffle(TRAINING_DATA)
    losses = {}
    
    #batch examples together and iterate over them
    for batch in spacy.util.minibatch(TRAINING_DATA, size=2):
        
        texts = [text for text, entities in batch]
        annotations = [entities for text, entities in batch]
        
        #update model 
        nlp.update(texts, annotations, losses=losses)
        print(losses)
        



{'ner': 6.51512819543863e-08}
{'ner': 6.614709624369499e-08}
{'ner': 6.652261900226123e-08}
{'ner': 5.090853019817102e-11}
{'ner': 5.329953295477475e-11}
{'ner': 3.078630782480545e-10}
{'ner': 3.4044795407430947e-13}
{'ner': 3.642978338192603e-13}
{'ner': 8.052305506719143e-13}
{'ner': 3.598232994053063e-13}
{'ner': 3.726822629527728e-13}
{'ner': 4.631656761721895e-13}
{'ner': 2.8713117779108397e-13}
{'ner': 3.1756543163958325e-13}
{'ner': 3.2242860418382725e-13}
{'ner': 1.1053114014639007e-14}
{'ner': 1.5802349161864956e-13}
{'ner': 1.580777916356791e-13}
{'ner': 3.1525596792589103e-15}
{'ner': 8.240035998311857e-14}
{'ner': 8.243964220871929e-14}
{'ner': 4.8759706422508677e-14}
{'ner': 4.883008463186351e-14}
{'ner': 4.965230489658564e-14}
{'ner': 2.1978362822724574e-14}
{'ner': 2.265651490213919e-14}
{'ner': 2.3190005199734364e-14}
{'ner': 1.0213869019842861e-15}
{'ner': 1.0356378779968938e-14}
{'ner': 1.0402111820333674e-14}
