## Load Packages

In [1]:
from __future__ import unicode_literals, print_function
#import plac
import random
from pathlib import Path
import spacy
from tqdm import tqdm 

In [2]:
import en_core_web_sm

In [3]:
nlp1 = spacy.load('en_core_web_sm')

## Train Data

Use the following link to annotate data.

https://tecoholic.github.io/ner-annotator/

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import json
  
# Opening JSON file
f = open('/content/drive/MyDrive/NLP/NLPClassV1/cricket_ner')
  
# returns JSON object as 
# a dictionary
data = json.load(f)

In [None]:
data['annotations'][0:2]

[["Brathwaite to Shubman Gill, out Caught by Rashid Khan!! Rashid Khan has gobbled this up like a candy in the deep. What ridiculous ease. Full ball on leg stump from round the wicket, Brathwaite's round the wicket angle cramped Gill up meaning that there was no power behind the shot. He merely chipped it up. Rashid settles under it, around five yards in front of the boundary, and swallows it. Shubman Gill c Rashid Khan b Brathwaite 30(20) [4s-2 6s-1]",
  {'entities': [[136, 145, 'BALLTYPE'], [293, 300, 'SHOTTYPE']]}],
 ['Axar to Uthappa, FOUR, deft touch from Uthappa, Axar dropped one short and wide of off, Uthappa makes room, waits on it, uses the pace on the ball and dabs it late, beats Sandeep at short third man and rolls away to the fence',
  {'entities': [[65, 79, 'BALLTYPE']]}]]

In [None]:
TRAIN_DATA = data['annotations']

In [None]:
type(TRAIN_DATA)

list

## Define our variables

In [None]:
model = None
output_dir=Path("/content/ner/")
n_iter=100

## Load the model

In [None]:
if model is not None:
    nlp = spacy.load(model)  
    print("Loaded model '%s'" % model)
else:
    nlp = spacy.blank('en')  
    print("Created blank 'en' model")

Created blank 'en' model


## Set up the pipeline

In [None]:
if 'ner' not in nlp.pipe_names:
    ner = nlp.add_pipe('ner', last=True)
else:
    ner = nlp.get_pipe('ner')

## Train the Recognizer

In [None]:
for _, annotations in TRAIN_DATA:
    for ent in annotations.get('entities'):
        ner.add_label(ent[2])

In [None]:
from spacy.training import Example

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):  # only train NER
    
    # Init loss
    losses = None

    # Init and configure optimizer
    optimizer = nlp.create_optimizer()
    optimizer.learn_rate = 0.001  # Change to some lr you prefers
    batch_size = 32  # Choose batch size you prefers

    for itn in range(n_iter):
        print("Starting iterations " + str(itn))
        random.shuffle(TRAIN_DATA)
        losses = {}

        # Batch the examples and iterate over them
        for batch in spacy.util.minibatch(TRAIN_DATA, size=batch_size):
            # Create Example instance for each training example in mini batch
            examples = [Example.from_dict(nlp.make_doc(text), annotations) for text, annotations in batch]
            # Update model with mini batch
            losses = nlp.update(examples, drop=0.2, sgd=optimizer)

    print(losses) 

Starting iterations 0
Starting iterations 1
Starting iterations 2
Starting iterations 3
Starting iterations 4
Starting iterations 5
Starting iterations 6
Starting iterations 7
Starting iterations 8
Starting iterations 9
Starting iterations 10
Starting iterations 11
Starting iterations 12
Starting iterations 13
Starting iterations 14
Starting iterations 15
Starting iterations 16
Starting iterations 17
Starting iterations 18
Starting iterations 19
Starting iterations 20
Starting iterations 21
Starting iterations 22
Starting iterations 23
Starting iterations 24
Starting iterations 25
Starting iterations 26
Starting iterations 27
Starting iterations 28
Starting iterations 29
Starting iterations 30
Starting iterations 31
Starting iterations 32
Starting iterations 33
Starting iterations 34
Starting iterations 35
Starting iterations 36
Starting iterations 37
Starting iterations 38
Starting iterations 39
Starting iterations 40
Starting iterations 41
Starting iterations 42
Starting iterations 4

## Test the trained model

In [None]:
test_data = [
"Ashwin to Narine, FOUR, there's no stopping him. Walks down the wicket and chips the widish delivery over cover. Long-off sprints across to stop it but the ball wins the race",
"Southee to Rahul, SIX, hello says the batsman as he was greeted with a short ball. The batsman did not waste time to transfer his weight on the back foot and hammers it over deep backward square leg off the front foot. Terrific hit",
"Dwayne Bravo to Pooran, SIX, high full toss, and whacked with disdain. Pooran doesn't even bother looking up after flat-batting this into cow corner",
"Murugan Ashwin to Manish Pandey, FOUR, delicate hands from Pandey. Ashwin pulls his length and feeds him with width outside off, Pandey goes deep in his crease and opens his bat face at the last moment to place it in the gap past point. The timing is flawless and the sweeper in the deep had no chance",
"Axar to Dwayne Smith, SIX, just about clears the man at long-on. An almost tossed up ball. Dwayne Smith goes with the swing and muscles it over the fence",
"Navdeep Saini to Rahul, FOUR, 116.8kph and Rahul is waiting for it. Saini has to mix it up. This one had width as well, Rahul throws his bat at the right time to bisect backward point and short third man",
"McClenaghan to Karthik, SIX, he is a small man but when he hits them, he certainly packs a punch. Short of length delivery angled across off, Karthik goes deep inside the crease and pulls it over deep mid-wicket. Flat and all the way. 50-run stand and 150 up for GL",
"Rashid Khan to Pant, out Lbw!! Pant has been given out LBW, he reviews. No issues with the front foot. Clearly no bat there. Only factor that could favour Pant is the height. Hit in-line as well. Ball-tracker is out: 2 reds. Pitched - in-line. Impact - in-line. Wickets - umpire's call. Clipping the top of middle. What a fightback from SRH this has been. Skiddy leg-break from Rashid on a short of length, Pant went back for the pull and it didn't rise much as he expected. Misses and is rapped on the back thigh. Was given out on-field, he reviewed in hope but in vain. Pant lbw b Rashid Khan 18(19) [4s-1]",
"Nadeem to Gayle, out Caught by Zaheer!! Yes, the break does its job. The leg-spinner from Nadeem, spinning the other way and Gayle's committed slog-sweep has him holing out. The air on this one makes him lose his timing as well with the ball only managing the toe-end as he swings it down the throat of mid-wicket. Delhi needed this big time, pouching Gayle just when he was about to explode. Gayle c Zaheer b Nadeem 48(38) [4s-3 6s-3]",
"K Paul to Rohit, no ball, SIX, what was that? Seemed like it slipped out of Paul's hands. Delivers a waist-high full toss, Rohit gets into a great position and slams the pull towards deep square leg. Morris, the fielder was setting himself near the fence but it soars over his head for a six. It's called a no-ball and it also brings up the 50-run stand between Rohit and de Kock - their third consecutive half-century partnership",
"Badree to Uthappa, out Lbw!! Badree strikes and RCB are fighting back into the contest. This was the straighter one from the leg-break bowler, pitched on leg and slid through, Uthappa was looking to work it towards the on-side, ended up playing right across the line and missed, would have been going on and hitting a good chunk of middle and leg stump. The umpire takes his time and lifts the dreaded finger. Uthappa lbw b Badree 11(9) [4s-2]",
"Basil Thampi to Hardik Pandya, out Caught by Ishan Kishan!! Looks to go big on the on-side. Was it needed? He holes out at deep mid-wicket as Kishan gobbles that up. More twist in this gripping contest. That was full and the batsman tried to get undeneath it, but failed to scale substantial distance on it. Hardik Pandya c Ishan Kishan b Basil Thampi 4(5)",
"Krunal Pandya to Mandeep, 1 run, fired it in quick on off, Mandeep pushes it to the right of Rohit at extra cover, who swiftly moves across and releases the throw in one motion. Hits the target but Mandeep's makes it in by dragging his bat",
"Christian to Ishan Kishan, 2 runs, short of length delivery angled in on middle, 116kph, thwacked through square leg on the front foot. They amble back for two",
"McClenaghan to Cutting, FOUR, no fuss from Cutting! Arrives and gets a boundary, with utmost ease. A friendly full toss outside off, Cutting shows the full face of the bat, McClenaghan's pace takes care of the rest. Raced past mid-off",
"Shakib to Rayudu, FOUR, now he is playing with the field. Walked across his stumps to the flighted delivery, sat down and broomed it backward of square leg, struck it fine enough to beat the man in the deep",
"Rahul Chahar to Shreyas Iyer, out Bowled!! Quite a ripper from Chahar, pitching leg and hitting off. Along the way, it brought Iyer tentatively forward, then beat him with the dip, before spinning past his outside edge. Mumbai Indians are all over the Capitals. Shreyas Iyer b Rahul Chahar 3(6)",
"Jadeja to Pant, FOUR, now this is a back foot punch over covers. You have to see it to believe it. Jadeja is from over the wicket to the left hander here. Just a touch short, Pant is so early there to get back and punch it up and over. Graceful, if it's ever been any more brutal",
"Bhuvneshwar to A Russell, FOUR, it's a joke. It really is. It's a yorker, no less, but Russell's adept at squeezing it away from leg-stump with a drill into long-off",
"Hardik Pandya to A Russell, FOUR, cheeky and four, banged in short and wide as Russell was backing away - he waits on the ball and then ramps it over short third man, almost went all the way, bounced just in front of the ropes",
"S Kaul to Corey Anderson, SIX, after a barrage of mistimed swipes and pulls finally Corey Anderson gets one off the middle of the willow. And when he gets those off the meat, no ground is big enough. A length ball served with gentle pace. What else does he need? Swings and swings well to clear the man at long-on",
"Chris Morris to Bairstow, FOUR, he's carrying on from where he right off in Hyderabad. DC are helping him by offering pace too. Morris bangs this short and that's right down Bairstow's ally. Forehand pull pumped away to the deep mid-wicket fence",
"Woakes to Raina, 1 run, Raina and Woakes collide. Short and slow outside off, Raina cuts it on the bounce to backward point. Both were ball watching and Raina dashed into the bowler, Woakes is down on the ground and Raina makes his crease. The GL skipper goes back to check on Woakes and the Englishman is fine",
"Stokes to Warner, FOUR, another slower one @112.9km/h, Warner backs away into the on-side, gives himself some space, and slaps it over the in-field towards the sweeper cover boundary for four",
"Rabada to Mandeep, SIX, Mandeep finishes with a maximum. Rabada misses his length and bowls it right in the arc, Mandeep clears his front leg and smashes it straight back over. The boundaries straight are not big and it clears 'em with ease",
"Stokes to Parthiv Patel, FOUR, offers pace and width, Parthiv will feed on that all day, he cracks the back cut behind point and pace on the ball meant it raced away to the fence beating third man",
"Rashid Khan to Finch, out Lbw!! Rashid is leaving the Lions flummoxed. This was the wrong 'un, pitched just outside off and spun back into the right-hander, Finch didn't read it one bit, played for the straighter one, beaten on the inside edge and struck bang in front of middle. The height wasn't an issue, the ball would have been crashing into the leg-stump. Easy call for the umpire and Gujarat Lions slip into further trouble. Finch lbw b Rashid Khan 3(5)",
"Tom Curran to Watson, SIX, this is entertainment. Full length delivery around off, Watson keeps his head still and powers it over long-off for six more, the bowler is under pressure, straightaway",
"Plunkett to Watson, SIX, pick up off the pads. And the spectators pick it in the stands. Onto the hips as it angled in and the batsman just clipped the ball away with a lot of nonchalance to soar it over deep backward square. Back-to-back sixes for Watto",
"Boult to Nikhil Naik, out Caught by Hardik Pandya!! Sharp bouncer, a left-arm round angle that climbs up around the head, which Naik has no option but to try pulling off his front foot. Top-edged, tame, and deep mid-wicket. Nikhil Naik c Hardik Pandya b Boult 1(3)",
"Pat Cummins to Faulkner, FOUR, streaky, but GL will take it. It was a good over from Cummins. Full, fast and slanting into the batsman, Faulkner heaves across the line and the inside edge eludes a diving Sanju Samson",
"Arshdeep Singh to Abhishek Sharma, SIX, wow! That's some shot. Just a chip from Sharma and the ball has cleared the rope. It was the slower delivery and Sharma was deceived for a moment there, checks his loft and manages to time it beautifully, goes all the way straight down the ground",
"Chris Morris to Ashwin, out Bowled!! Cleaned up! It has all gone downward in the last 20 minutes or so. Morris picks up his third. A juicy full toss and Ashwin could have hit it anywhere, he backs away and is beaten for pace, the ball brushes the off-stump to light it up. Ashwin b Chris Morris 3(4)",
"Nadeem to de Villiers, FOUR, swept away for four. De Villiers has started off brilliantly. Nadeem lands it on a length for him and is dispatched",
"Binny to Watson, 1 run, edged and dropped! Rahul Tripathi the culprit on that occasion. Puts down a sitter. Was a full length delivery outside off, Watson goes for a drive and the away swing induces an outside edge. However, Tripathi - the lone slipper makes a mess out of it",
"Mohit Sharma to Dhawan, FOUR, Dhawan picks the length quickly. Stands tall and pulls it well in front of square. Gets his third boundary with that",
"Rashid Khan to Pooran, out Caught by T Natarajan!! He tried to save his team-mates, and in the end perishes. Didn't take singles off the previous two balls, sees the ball on the shorter side and goes after it with a cut shot. Isn't in control of it and offers a dolly to Natarajan at backward point. With that goes KXIP's hopes! Pooran c T Natarajan b Rashid Khan 77(37) [4s-5 6s-7]",
"Siraj to Parthiv Patel, SIX, that's a cracking cut shot from the little man. Siraj bowls a 135kph short ball, Parthiv is an excellent player of the cut shot, he deliberately cuts hard with an open face and hits over backward point, it's good enough to go all the way",
"Hardik Pandya to Nitish Rana, SIX, second ball and Rana puts it into the stand. Gentle full delivery outside off, Rana just extends his arms and lofts it handsomely over mid-off, the timing was such that it goes the distance",
"Rashid Khan to Moeen Ali, FOUR, from round the wicket, tossed up and Moeen Ali has smote it back over Rashid Khan's head. Flat hit and bisects long-off and long-on. Super shot. He is continuing his stroke-making",
"Aniket Choudhary to Chris Lynn, FOUR, drilled. Aniket pitches it up and is dispatched with disdain. Lynn does not bother to move his feet and hammers the drive past a hapless AB at mid-off",
"Lockie Ferguson to Rohit, FOUR, Ferguson is relieving some pressure off Rohit and MI. Length ball on his pads, Rohit gleefully puts it away behind square on the on-side. It went in the air - but well wide of Unadkat at short fine",
"Boult to Hardik Pandya, FOUR, full on the pads, and Hardik has done it! He whips it away past a diving Rajpoot at the fine leg boundary before running down and cutting a pose for the camera",
"Navdeep Saini to Miller, out Caught by de Villiers!! ABD dives and ABD snaffles it. Kohli with the fist pump. It was a length delivery on the stumps, Miller wanted to smash it over long-on. He hasn't timed it all evening and he failed to do so on this occassion also. ABD dives to his right emphatically and takes a beauty! Miller c de Villiers b Navdeep Saini 24(25) [4s-2]",
"Basil Thampi to Corey Anderson, FOUR, that should ease his nerves. Slightest of widths outside off and Anderson opens his bat face at the point of impact and punches it through square on the off-side",
"Stokes to Mayank Agarwal, out Caught by R Tripathi!! Dismal IPL for Mayank Agarwal continues. He will consider himself massively unlucky, a meter to the fielder's right or left and that was a four. It was a short of length ball, Mayank Agarwal gets in a good position, connects well, he does everything well but finds the fielder at deep square leg. This just get to show how imperative placement is in cricket. Mayank Agarwal c R Tripathi b Stokes 2(3)",
"McClenaghan to Samson, FOUR, that's why you can't not like him. McClenaghan strategy is pretty obvious: Bowl it slow and angle it across, because there's third man and backward point waiting to cut the runs off. And Samson here has found the tiniest of gaps between the fielders. Beautifully done, opening the bat face just enough to cleave the fielders",
"Lamichhane to Nikhil Naik, out Lbw!! Nikhil Naik's struggle comes to an end. Walks back after consuming 16 deliveries for his 7. Scored a boundary that too off an inside edge. Hard luck. Was the wrong 'un around middle and the batsman got beaten due to the googly. Struck in front of middle and going on to hit the leg-stump. Lamichanne makes an impact in his first game of the season. Nikhil Naik lbw b Lamichhane 7(16) [4s-1]",
"Chahal to Raina, FOUR, poor delivery from Chahal. Rank long hop and Raina had all the time in world to pick his spot, transfers his weight on the back foot and slots the pull past square leg, no chance for the man in the deep",
"Corey Anderson to Rohit, FOUR, clears his front leg. Goes conventional. Full meat, over the bolwer, and trickling further in. Deadly death batting this"]

In [None]:
for text in test_data:
    doc = nlp(text)
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])

Entities [('chips', 'SHOTTYPE')]
Entities [('short ball', 'BALLTYPE'), ('hammers', 'SHOTTYPE')]
Entities []
Entities [('length', 'BALLTYPE')]
Entities []
Entities [('had width', 'BALLTYPE')]
Entities [('pulls', 'SHOTTYPE')]
Entities [('fightback', 'BALLTYPE'), ('leg-break', 'BALLTYPE'), ('short of length', 'BALLTYPE')]
Entities []
Entities [('pull', 'SHOTTYPE'), ('half-century', 'SHOTTYPE')]
Entities [('lifts', 'SHOTTYPE')]
Entities []
Entities []
Entities [('short of length', 'BALLTYPE')]
Entities []
Entities []
Entities []
Entities [('punch', 'SHOTTYPE')]
Entities [('yorker', 'BALLTYPE')]
Entities [('short', 'BALLTYPE'), ('ramps', 'SHOTTYPE')]
Entities [('pulls', 'SHOTTYPE'), ('length', 'BALLTYPE')]
Entities [('short and that', 'BALLTYPE')]
Entities []
Entities [('slaps', 'SHOTTYPE')]
Entities [('length', 'BALLTYPE'), ('straight', 'SHOTTYPE'), ('straight are not', 'SHOTTYPE')]
Entities [('cut', 'SHOTTYPE')]
Entities []
Entities [('Full length', 'BALLTYPE'), ('powers', 'SHOTTYPE')]
En

## Save the model

In [None]:
if output_dir is not None:
    output_dir = Path(output_dir)
    if not output_dir.exists():
        output_dir.mkdir()
    nlp.to_disk(output_dir)
    print("Saved model to", output_dir)        

Saved model to /content/ner
