In [22]:
import spacy
import random
import pickle
from spacy.util import minibatch, compounding
import sys

In [None]:
# now let's load the data and it's a pickle data
training_data = pickle.load(open('train_data.pkl','rb'))

In [5]:
# create blank en model
nlp = spacy.blank('en')

# now we will create a function to prepare our train data

def train_model(train_data):
    # create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if 'ner' not in nlp.pipe_names:
      # create to the name entity reog
      ner = nlp.create_pipe('ner')
      # add this to the pipe
      nlp.add_pipe(ner,last=True)
    else:
      ner = nlp.get_pipe('ner')

    # add labels
    for _, annotations in train_data:
      # now we will get the entities
      for ent in annotations.get("entities"):
        # now we will add the label from that entities
        ner.add_label(ent[2])
    #  remove other pipe if there is pip exepect the ner 
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe !='ner' ]
    # only train NER
    with nlp.disable_pipes(*other_pipes):
        # reset and initialize the weights randomly – but only if we're
        # training a new model
        opti = nlp.begin_training()    
        for itn in range(10):
            random.shuffle(train_data)
            losses = {}
            index = 0

            for text, annotations in train_data:
                try:
                   nlp.update(
                        [text],  # batch of texts
                        [annotations],  # batch of annotations
                        drop=0.5,  # dropout - make it harder to memorise data
                        sgd = opti,
                        losses=losses)
                except Exception as e:
                  pass      
            print("Losses", losses)


In [None]:
train_model(training_data)

In [7]:
# now we will save the trained model
nlp.to_disk('nlp_model_summ_spacy')

In [None]:
# we can load the trained model
nlp_model = spacy.load('nlp_model_summ_spacy')


In [17]:
training_data[2][0]

'Saurabh Saurabh Bengaluru, Karnataka - Email me on Indeed: indeed.com/r/Saurabh- Saurabh/87e6b26903460061  Willing to relocate: Anywhere  WORK EXPERIENCE  Developer Support Engineer  Microsoft iGTSC -  Bangalore Urban, Karnataka -  August 2007 to Present  EDUCATION  Certificate of Achievement  Microsoft Virtual Academy  December 2016  SKILLS  C, C++, Data Structure, Java (1 year)  ADDITIONAL INFORMATION  ➢ Managerial Skills: Organizer and volunteer at many inter and intra college events, symposia etc. in college. ➢ Co-curricular: - 1. Selected by College (TISL) to train the first-year engineering students under the Finishing School Program (January 2017) 2. District level soccer and cricket player. 3. Honored at State Level Singing competition; Performed in college.  ➢ Leadership Skills: Effective leadership; associated with many student led organizations; Student leader for Oxygen, A movement for and by Students (2007 &amp; 2009)  DECLARATION  The abovementioned is true to the best o

In [18]:
# now we can test our model
doc = nlp_model(training_data[2][0])
for ent in doc.ents:
  print(f'{ent.label_.upper():{30}} - {ent.text}')

NAME                           - Saurabh Saurabh
LOCATION                       - Bengaluru
EMAIL ADDRESS                  - indeed.com/r/Saurabh- Saurabh/87e6b26903460061
DESIGNATION                    - Developer Support
DEGREE                         - Certificate of Achievement  Microsoft Virtual Academy  December 2016  
SKILLS                         - C, C++, Data Structure, Java (1 year)  ADDITIONAL INFORMATION  ➢ Managerial Skills: Organizer and volunteer at many inter and intra college events, symposia etc. in college. ➢ Co-curricular: - 1. Selected by College (TISL) to train the first-year engineering students under the Finishing School Program (January 2017) 2. District level soccer and cricket player. 3. Honored at State Level Singing competition; Performed in college.  ➢ Leadership Skills: Effective leadership; associated with many student led organizations; Student leader for Oxygen, A movement for and by Students (2007 &amp; 2009)  DECLARATION
SKILLS                     