In [1]:
from spacy import displacy
import spacy
from spacy.matcher import Matcher
from spacy.language import Language
from spacy.training.example import Example
from spacy.tokens import DocBin
from spacy.pipeline.entityruler import EntityRuler

import pandas as pd
import itertools
import regex as re
import numpy as np
import json
import random

nlp = spacy.load('en_core_web_sm')

In [2]:
def get_name_indices(sentence: str, names: str):
    ents = []
    for name in names:
        occs = re.finditer(f"{name}", sentence)
        [ents.append((o.start(), o.end(), "PERSON")) for o in occs]
    print(ents)
    return

# ----------------------------------------------------------

ruler = nlp.add_pipe("entity_ruler", before="ner")

name_patterns = [
    {"label": "PERSON", "pattern": "Ha Ha Clinton-Dix"},
    {"label": "PERSON", "pattern": "Robert Griffin III"},
    {"label": "PERSON", "pattern": "Brian De La Puente"},
    {"label": "PERSON", "pattern": "C.J. Ah You"},
    {"label": "PERSON", "pattern": "Tyrion Davis-Price"},
    {"label": "PERSON", "pattern": "Yetur Gross-Matos"},
]

penalty_patterns = [
    {"label": "PENALTY", "pattern": "Horse Collar Tackle"},
    {"label": "PENALTY", "pattern": "Low Block"},
    {"label": "PENALTY", "pattern": "Unnecessary Roughness"},
    {"label": "PENALTY", "pattern": "False Start"},
    {"label": "PENALTY", "pattern": "Offside on Free Kick"},
    {"label": "PENALTY", "pattern": "Neutral Zone Infraction"},
    {"label": "PENALTY", "pattern": "Defensive Pass Interference"},
    {"label": "PENALTY", "pattern": "Offensive Pass Interference"},
    {"label": "PENALTY", "pattern": "Defensive Holding"},
    {"label": "PENALTY", "pattern": "Offensive Holding"},
    {"label": "PENALTY", "pattern": "Defensive Offside"},
    {"label": "PENALTY", "pattern": "Offensive Offside"},
    {"label": "PENALTY", "pattern": "Illegal Block Above the Waist"},
    {"label": "PENALTY", "pattern": "Illegal Use of Hands"},
    {"label": "PENALTY", "pattern": "Illegal Formation"},
    {"label": "PENALTY", "pattern": "Illegal Touch Pass"},
    {"label": "PENALTY", "pattern": "Illegal Motion"},
    {"label": "PENALTY", "pattern": "Ineligible Downfield Kick"},
    {"label": "PENALTY", "pattern": "Ineligible Downfield Pass"},
    {"label": "PENALTY", "pattern": "Delay of Game"},
    {"label": "PENALTY", "pattern": "Unsportsmanlike Conduct"},
    {"label": "PENALTY", "pattern": "Defensive 12 On-field"},
    {"label": "PENALTY", "pattern": "Offensive 12 On-field"},
    {"label": "PENALTY", "pattern": "Tripping"}
]

ruler.add_patterns(name_patterns + penalty_patterns)

In [3]:
TRAIN_DATA = [
    (
        "Thomas Morstead kicks off 68 yards, returned by Randall Cobb for 27 yards (tackle by Leigh Torrence)", 
        {"entities": [(0, 15, "PERSON"), (48, 60, "PERSON"), (85, 99, "PERSON")]}
    ),
    (
        "Aaron Rodgers pass complete short right to Greg Jennings for 9 yards (tackle by Tracy Porter)", 
        {"entities": [(0, 13, 'PERSON'), (43, 56, 'PERSON'), (80, 92, 'PERSON')]}
    ),
    (
        "Mason Crosby kicks extra point good", 
        {"entities": [(0, 12, 'PERSON')]}
    ),
    (
        "Michael Turner left guard for no gain (tackle by Matt Toeaina and Israel Idonije)",
        {"entities": [(0, 14, 'PERSON'), (49, 61, 'PERSON'), (66, 80, 'PERSON')]}
    ),
    (
        "Tim Masthay punts 38 yards, muffed catch by Earl Thomas, recovered by Ha Ha Clinton-Dix and returned for no gain",
        {"entities": [(0, 11, 'PERSON'), (44, 55, 'PERSON'), (70, 87, 'PERSON')]}
    ),
    (
        "Aaron Rodgers sacked by Michael Bennett for 0 yards",
        {"entities": [(0, 13, 'PERSON'), (24, 39, 'PERSON')]}
    ),
    (
        "Russell Wilson pass complete deep left to Percy Harvin for 33 yards (tackle by Ha Ha Clinton-Dix)",
        {"entities": [(0, 14, 'PERSON'), (42, 54, 'PERSON'), (79, 96, 'PERSON')]}
    ),
    (
        "Teddy Bridgewater sacked by Ha Ha Clinton-Dix for -10 yards",
        {"entities": [(0, 17, 'PERSON'), (28, 45, 'PERSON')]}
    ),
    (
        "Teddy Bridgewater pass complete deep right to Kyle Rudolph for 33 yards (tackle by Ha Ha Clinton-Dix)",
        {"entities": [(0, 17, 'PERSON'), (46, 58, 'PERSON'), (83, 100, 'PERSON')]}
    ),
    (
        "Teddy Bridgewater pass complete short left to Zach Line for 7 yards (tackle by Ha Ha Clinton-Dix)",
        {"entities": [(0, 17, 'PERSON'), (46, 55, 'PERSON'), (79, 96, 'PERSON')]}
    ),
    (
        "Carson Wentz pass incomplete intended for Mike Strachan. Penalty on D.J. Reed: Defensive Pass Interference, 13 yards (accepted) (no play)",
        {"entities": [(0, 12, 'PERSON'), (42, 55, 'PERSON'), (68, 77, 'PERSON')]}
    ),
    (
        "Tank Bigsby right end for 1 yard (tackle by Terrel Bernard and Dane Jackson). Alex Okafor for -10. Trevor Lawrence JAC-49.",
        {"entities": [(0, 11, 'PERSON'), (44, 58, 'PERSON'), (63, 75, 'PERSON'), (78, 89, 'PERSON'), (99, 114, 'PERSON')]}
    ),
    (
        "Patrick Mahomes pass incomplete. Penalty on Creed Humphrey: Ineligible Downfield Pass, 5 yards (declined)",
        {"entities": [(0, 15, 'PERSON'), (44, 58, 'PERSON')]}
    ),
    (
        "Russell Wilson sacked by Ha Ha Clinton-Dix for 0 yards",
        {"entities": [(0, 14, 'PERSON'), (25, 42, 'PERSON')]}
    ),
    (
        "Jon Ryan punts 35 yards, fair catch by Ted Ginn Jr.",
        {"entities": [(0, 8, 'PERSON'), (39, 51, 'PERSON')]}
    ),
    (
        "Leon Washington left tackle for 7 yards (tackle by Patrick Willis). Penalty on Russell Okung: Offensive Holding (Offsetting), Penalty on Ahmad Brooks: Defensive Offside (Offsetting) (no play)",
        {"entities": [(0, 15, 'PERSON'), (51, 65, 'PERSON'), (79, 92, 'PERSON'), (137, 149, 'PERSON')]}
    ),
    (
        "Chad Henne pass complete deep left to Brandon Marshall for 25 yards (tackle by Ras-I Dowling)",
        {"entities": [(0, 10, 'PERSON'), (38, 54, 'PERSON'), (79, 92, 'PERSON')]}
    ),
    (
        "Thomas Morstead kicks off 73 yards, touchback. Penalty on Jo-Lonn Dunbar: Offside on Free Kick, 5 yards (no play)",
        {"entities": [(0, 15, 'PERSON'), (58, 72, 'PERSON')]}
    ),
    (
        "Adam Podlesh punts 35 yards, fair catch by Darren Sproles. Penalty on Sam Hurd: Fair Catch Interference, 15 yards",
        {"entities": [(0, 12, 'PERSON'), (43, 57, 'PERSON'), (70, 78, 'PERSON')]}
    ),
    (
        "Mat McBriar punts 54 yards, returned by Ted Ginn Jr. for 8 yards (tackle by Sean Lee)",
        {"entities": [(0, 11, 'PERSON'), (40, 52, 'PERSON'), (76, 84, 'PERSON')]}
    ),
    (
        "Josh Freeman pass incomplete short left intended for Kellen Winslow Jr.. Penalty on Roman Harper: Defensive Pass Interference, 8 yards (no play)",
        {"entities": [(0, 12, 'PERSON'), (53, 70, 'PERSON'), (84, 96, 'PERSON')]}
    ),
    (
        "Melvin Gordon left guard for no gain (tackle by Mike Pennel)",
        {"entities": [(0, 13, 'PERSON'), (48, 59, 'PERSON')]}
    ),
    (
        "Timeout #3 by San Francisco 49ers",
        {"entities": [(14, 33, 'PERSON')]}
    ),
    (
        "Penalty on Tre'Davious White: Neutral Zone Infraction, 5 yards (no play)",
        {"entities": [(11, 28, 'PERSON')]}
    ),
    (
        "Le'Veon Bell right guard for 1 yard, touchdown",
        {"entities": [(0, 12, 'PERSON')]}
    ),
    (
        "Le'Veon Bell for 23 yards",
        {"entities": [(0, 12, 'PERSON')]}
    ),
    (
        "Penalty on T.J. Lang: False Start, 5 yards (no play)",
        {"entities": [(11, 20, 'PERSON')]}
    ),
    (
        "Penalty on Andrew Whitworth: False Start, 5 yards (no play)",
        {"entities": [(11, 27, 'PERSON')]}
    ),
    (
        "Alfred Morris up the middle for -3 yards (tackle by Eric Wright and Daniel Te'o-Nesheim)",
        {"entities": [(0, 13, 'PERSON'), (52, 63, 'PERSON'), (68, 87, 'PERSON')]}
    ),
    (
        "Brady Quinn pass complete short right to Jamaal Charles for -1 yards (tackle by Daniel Te'o-Nesheim and Quincy Black)",
        {"entities": [(0, 11, 'PERSON'), (41, 55, 'PERSON'), (80, 99, 'PERSON'), (104, 116, 'PERSON')]}
    ),
    (
        "Randy Bullock 24 yard field goal good",
        {"entities": [(0, 13, 'PERSON')]}
    ),
    (
        "Ryan Mallett pass incomplete deep right intended for Breshad Perriman is intercepted by KeiVarae Russell at CIN-26",
        {"entities": [(0, 12, 'PERSON'), (53, 69, 'PERSON'), (88, 104, 'PERSON')]}
    ),
    (
        "Cam Newton pass incomplete deep left intended for Steve Smith is intercepted by Charles Woodson at GNB-35 and returned for 1 yard (tackle by Steve Smith)",
        {"entities": [(0, 10, 'PERSON'), (50, 61, 'PERSON'), (80, 95, 'PERSON'), (141, 152, 'PERSON')]}
    ),
    (
        "Zane Gonzalez 23 yard field goal good",
        {"entities": [(0, 13, 'PERSON')]}
    ),
    (
        "Jay Feely 36 yard field goal no good",
        {"entities": [(0, 9, 'PERSON')]}
    ),
    (
        "Adam Vinatieri 37 yard field goal no good",
        {"entities": [(0, 14, 'PERSON')]}
    ),
    (
        "Matt Hasselbeck pass incomplete deep left intended for Kenny Britt is intercepted by Dwight Lowery at JAX-20 and returned for -4 yards (tackle by Kenny Britt)",
        {"entities": [(0, 15, 'PERSON'), (55, 66, 'PERSON'), (85, 98, 'PERSON'), (146, 157, 'PERSON')]}
    ),
    (
        "Matt Ryan pass incomplete intended for Russell Gage Replay Assistant challenged the pass completion ruling, and the original play was overturned. Matt Ryan pass complete short left to Russell Gage for 8 yards (tackle by Antoine Winfield)",
        {"entities": [(0, 9, 'PERSON'), (39, 51, 'PERSON'), (146, 155, 'PERSON'), (184, 196, 'PERSON'), (220, 236, 'PERSON')]}
    ),
    (
        "Jalen Hurts pass complete short right to Dallas Goedert for 9 yards, touchdown Replay Assistant challenged the pass completion ruling, and the play was upheld.",
        {"entities": [(0, 11, 'PERSON'), (41, 55, 'PERSON')]}
    ),
    (
        "Mac Jones pass complete short right to Rhamondre Stevenson for 9 yards. Rhamondre Stevenson fumbles (forced by Eric Rowe), recovered by Zach Sieler at NE-48 (tackle by Eric Rowe) Replay Assistant challenged the ruling, and the play was upheld.",
        {"entities": [(0, 9, 'PERSON'), (39, 58, 'PERSON'), (72, 91, 'PERSON'), (111, 120, 'PERSON'), (168, 177, 'PERSON'), (136, 147, 'PERSON')]}
    ),
    (
        "Timeout #1 by Green Bay Packers",
        {"entities": [(14, 31, 'PERSON')]}
    ),
    (
        "Timeout #1 by New Orleans Saints",
        {"entities": [(14, 32, 'PERSON')]}
    ),
    (
        "Timeout #3 by Chicago Bears",
        {"entities": [(14, 27, 'PERSON')]}
    ),
    (
        "Mason Rudolph pass incomplete intended for George Pickens BUF challenged the pass completion ruling, and the original play was overturned. Mason Rudolph pass complete short right to George Pickens for 8 yards. George Pickens fumbles (forced by Taron Johnson), recovered by Terrel Bernard at PIT-29",
        {"entities": [(0, 13, 'PERSON'), (139, 152, 'PERSON'), (43, 57, 'PERSON'), (182, 196, 'PERSON'), (210, 224, 'PERSON'), (244, 257, 'PERSON'), (273, 287, 'PERSON')]}
    ),
    (
        "Mason Rudolph pass complete deep left to Pat Freiermuth for 33 yards. Pat Freiermuth fumbles (forced by Christian Benford). Pat Freiermuth fumbles out of bounds BUF challenged the runner was in bounds ruling, and the play was upheld.",
        {"entities": [(0, 13, 'PERSON'), (41, 55, 'PERSON'), (70, 84, 'PERSON'), (124, 138, 'PERSON'), (104, 121, 'PERSON')]}
    ),
    (
        "Dak Prescott pass complete short middle to Jake Ferguson for 11 yards (tackle by Carrington Valentine) GNB challenged the pass completion ruling, and the play was upheld.",
        {"entities": [(0, 12, 'PERSON'), (43, 56, 'PERSON'), (81, 101, 'PERSON')]}
    ),
    (
        "Tua Tagovailoa for no gain. Tua Tagovailoa fumbles out of bounds MIA challenged the fumble ruling, and the original play was overturned. Tua Tagovailoa pass incomplete short right intended for De'Von Achane",
        {"entities": [(0, 14, 'PERSON'), (28, 42, 'PERSON'), (137, 151, 'PERSON'), (193, 206, 'PERSON')]}
    ),
    (
        "Teddy Bridgewater pass incomplete short left intended for Zach Zenner Penalty on NOR: Illegal Shift (Declined)",
        {"entities": [(0, 17, 'PERSON'), (58, 69, 'PERSON')]}
    ),
    (
        "Carson Wentz pass incomplete short right intended for John Hightower Penalty on NYG: Defensive Too Many Men on Field, 5 yards (no play)",
        {"entities": [(0, 12, 'PERSON'), (54, 68, 'PERSON')]}
    ),
    (
        "Josh Allen pass incomplete deep left intended for Kelvin Benjamin Penalty on BUF: Illegal Formation, 5 yards (no play)",
        {"entities": [(0, 10, 'PERSON'), (50, 65, 'PERSON')]}
    ),
    (
        "Olindo Mare kicks off 63 yards, returned by Randall Cobb for 18 yards (tackle by Jordan Senn). Randall Cobb fumbles (forced by Jordan Senn), recovered by Sean Considine at GNB-26 (tackle by John Kuhn)",
        {"entities": [(0, 11, 'PERSON'), (44, 56, 'PERSON'), (95, 107, 'PERSON'), (81, 92, 'PERSON'), (127, 138, 'PERSON'), (154, 168, 'PERSON'), (190, 199, 'PERSON')]}
    ),
    (
        "Russell Wilson pass incomplete deep middle intended for Doug Baldwin is intercepted by Adrian Wilson at ARI-12 and returned for -2 yards, lateral to Patrick Peterson for 18 yards (tackle by Breno Giacomini)",
        {"entities": [(0, 14, 'PERSON'), (56, 68, 'PERSON'), (87, 100, 'PERSON'), (149, 165, 'PERSON'), (190, 205, 'PERSON')]}
    ),
    (
        "Ryan Lindley pass complete short left to John Brown for 5 yards, lateral to Michael Floyd for no gain, lateral to Stepfan Taylor for 2 yards, lateral to John Carlson for no gain, lateral to Lyle Sendlein for -5 yards, lateral to Michael Floyd for -25 yards. Michael Floyd fumbles, recovered by John Brown at ARI-5",
        {"entities": [(0, 12, 'PERSON'), (41, 51, 'PERSON'), (294, 304, 'PERSON'), (76, 89, 'PERSON'), (229, 242, 'PERSON'), (258, 271, 'PERSON'), (114, 128, 'PERSON'), (153, 165, 'PERSON'), (190, 203, 'PERSON')]}
    ),
    (
        "JK Scott punts 49 yards downed by Amen Ogbongbemiga LAC challenged the fumble ruling, and the original play was overturned. JK Scott punts 50 yards, recovered by Amen Ogbongbemiga at DAL-20",
        {"entities": [(0, 8, 'PERSON'), (124, 132, 'PERSON'), (34, 51, 'PERSON'), (162, 179, 'PERSON')]}
    ),
    (
        "Kyler Murray for -5 yards. Kyler Murray fumbles, recovered by Kyler Murray at ARI-6 Kyler Murray pass incomplete. Penalty on Kyler Murray: Intentional Grounding, 11 yards (accepted)",
        {"entities": [(0, 12, 'PERSON'), (27, 39, 'PERSON'), (62, 74, 'PERSON'), (84, 96, 'PERSON'), (125, 137, 'PERSON')]}
    ),
    (
        "Aaron Rodgers pass complete short right to Jordy Nelson for 13 yards, touchdown. Penalty on Su'a Cravens: Roughing the Passer, 15 yards",
        {"entities": [(0, 13, 'PERSON'), (43, 55, 'PERSON'), (92, 104, 'PERSON')]}
    ),
    (
        "Daniel Jones right end for 16 yards (tackle by Jaquiski Tartt). Penalty on Darius Slayton: Offensive Holding (Offsetting) Penalty on SFO: Face Mask (15 Yards) (Offsetting) (no play)",
        {"entities": [(0, 12, 'PERSON'), (47, 61, 'PERSON'), (75, 89, 'PERSON')]}
    ),
    (
        "Jordan Love aborted snap, recovered by Jordan Love at GB-14 Jordan Love for no gain Penalty on GNB: Illegal Motion, 5 yards (declined)",
        {"entities": [(0, 11, 'PERSON'), (39, 50, 'PERSON'), (60, 71, 'PERSON')]}
    ),
    (
        "Mitchell Trubisky for -128 yards. Mitchell Trubisky fumbles, recovered by Mitchell Trubisky at GB-39 Mitchell Trubisky pass complete short middle to David Montgomery for 9 yards (tackle by Oren Burks)",
        {"entities": [(0, 17, 'PERSON'), (34, 51, 'PERSON'), (74, 91, 'PERSON'), (101, 118, 'PERSON'), (149, 165, 'PERSON'), (189, 199, 'PERSON')]}
    ),
    (
        "Philip Rivers pass complete deep left to Austin Ekeler for 30 yards (tackle by Ron Parker). Penalty on Ron Parker: Lowering the Head to Initiate Contact, 15 yards",
        {"entities": [(0, 13, 'PERSON'), (41, 54, 'PERSON'), (79, 89, 'PERSON'), (103, 113, 'PERSON')]}
    ),
    (
        "T.J. Yates for no gain. Chris Myers fumbles, recovered by Chris Myers at HOU-SF",
        {"entities": [(0, 10, 'PERSON'), (24, 35, 'PERSON'), (58, 69, 'PERSON')]}
    ),
    (
        "Aldrick Rosas for -128 yards. Aldrick Rosas fumbles, recovered by Aldrick Rosas at JAC-2 Aldrick Rosas punts 32 yards, returned by James Proche for 3 yards (tackle by Doug Middleton). Penalty on Andrew Wingard: Ineligible Downfield Kick, 5 yards",
        {"entities": [(0, 13, 'PERSON'), (30, 43, 'PERSON'), (66, 79, 'PERSON'), (89, 102, 'PERSON'), (131, 143, 'PERSON'), (167, 181, 'PERSON'), (195, 209, 'PERSON')]}
    ),
    (
        "Two Point Attempt: Dorian Thompson-Robinson rushes, conversion succeeds.",
        {"entities": [(19, 43, 'PERSON')]}
    ),
    (
        "Two Point Attempt: Chad Henne pass incomplete intended for Justin Blackmon, conversion fails",
        {"entities": [(19, 29, 'PERSON'), (59, 74, 'PERSON')]}
    ),
    (
        "Jamie Gillan for no gain. Jamie Gillan fumbles, recovered by Carter Coughlin at NYG-33. Penalty on Jamie Gillan: Illegal Kick/Kicking Loose Ball / Offense, 10 yards (accepted)",
        {"entities": [(0, 12, 'PERSON'), (26, 38, 'PERSON'), (99, 111, 'PERSON'), (61, 76, 'PERSON')]}
    ),
    (
        "Mac Jones pass short middle intended for Hunter Henry is intercepted by P.J. Williams at NO-45 and returned for 46 yards. P.J. Williams fumbles (forced by Brandon Bolden), recovered by Kaden Elliss at NE-1 Replay Assistant challenged the fumble ruling, and the original play was overturned. Mac Jones pass short middle intended for Hunter Henry is intercepted by P.J. Williams at NO-45 and returned for 46 yards (tackle by Brandon Bolden)",
        {"entities": [(0, 9, 'PERSON'), (291, 300, 'PERSON'), (41, 53, 'PERSON'), (332, 344, 'PERSON'), (72, 85, 'PERSON'), (122, 135, 'PERSON'), (363, 376, 'PERSON'), (155, 169, 'PERSON'), (423, 437, 'PERSON'), (185, 197, 'PERSON')]}
    ),
    (
        "Anthony Richardson sacked by Aaron Donald for no gain. Anthony Richardson fumbles (forced by Aaron Donald), recovered by Christian Rozeboom at LA-44. Penalty on Tre'Vius Tomlinson: Defensive Holding, 5 yards (accepted) (no play)",
        {"entities": [(0, 18, 'PERSON'), (55, 73, 'PERSON'), (29, 41, 'PERSON'), (93, 105, 'PERSON'), (121, 139, 'PERSON'), (161, 179, 'PERSON')]}
    ),
    (
        "Jon Ryan fumbles, recovered by Jon Ryan at SEA-13. Jon Ryan punts 45 yards, returned by Perrish Cox for 9 yards (tackle by Ricardo Lockette) Jon Ryan fumbles (forced by Ricardo Lockette), recovered by O'Brien Schofield at SFO-40.",
        {"entities": [(0, 8, 'PERSON'), (31, 39, 'PERSON'), (51, 59, 'PERSON'), (141, 149, 'PERSON'), (88, 99, 'PERSON'), (123, 139, 'PERSON'), (169, 185, 'PERSON'), (201, 218, 'PERSON')]}
    ),
    (
        "Anders Carlson 25 yard field goal good",
        {"entities": [(0, 14, 'PERSON')]}
    ),
    (
        "Anders Carlson 34 yard field goal good",
        {"entities": [(0, 14, 'PERSON')]}
    ),
    (
        "Ty Long 44 yard field goal good",
        {"entities": [(0, 7, 'PERSON')]}
    ),
    (
        "Connor Barth 42 yard field goal good",
        {"entities": [(0, 12, 'PERSON')]}
    ),
    (
        "Sam Bradford pass incomplete short left intended for Zach Ertz (defended by Chris Culliver). Penalty on Ricky Jean-Francois: Roughing the Passer, 15 yards (no play)",
        {"entities": [(0, 12, 'PERSON'), (53, 62, 'PERSON'), (76, 90, 'PERSON'), (104, 123, 'PERSON')]}
    ),
    (
        "Shane Lechler punts 54 yards, returned by Adoree' Jackson for 25 yards (tackle by Alfred Blue). Adoree' Jackson fumbles (forced by Alfred Blue), recovered by Jordan Todman at TEN-41",
        {"entities": [(0, 13, 'PERSON'), (42, 57, 'PERSON'), (96, 111, 'PERSON'), (82, 93, 'PERSON'), (131, 142, 'PERSON'), (158, 171, 'PERSON')]}
    ),
    (
        "Mark Sanchez for no gain. Mark Sanchez fumbles, recovered by Mark Sanchez at NYJ-NYJ",
        {"entities": [(0, 12, 'PERSON'), (26, 38, 'PERSON'), (61, 73, 'PERSON')]}
    ),
    (
        "Fred Jackson up the middle for 1 yard, touchdown. Penalty on OAK: Unsportsmanlike Conduct, 15 yards",
        {"entities": [(0, 12, 'PERSON')]}
    ),
    (
        "Matt Cassel pass complete short right to Leonard Pope for 1 yard, touchdown. Penalty on Leonard Pope: Unsportsmanlike Conduct, 15 yards",
        {"entities": [(0, 11, 'PERSON'), (41, 53, 'PERSON'), (88, 100, 'PERSON')]}
    )
]

get_name_indices(TRAIN_DATA[-1][0], ["Matt Cassel", "Leonard Pope"])

[(0, 11, 'PERSON'), (41, 53, 'PERSON'), (88, 100, 'PERSON')]


In [4]:
random.shuffle(TRAIN_DATA)
for epoch in range(10):
    for text, annotations in TRAIN_DATA:
        example = Example.from_dict(nlp.make_doc(text), annotations)
        nlp.update([example], drop=0.5, losses={})

In [5]:
# test with my model
sentences = [
    "Tyrion Davis-Price right tackle for 4 yards (tackle by Bryan Mone and Quandre Diggs)",
    "Michael Vick pass complete deep right to Jason Avant for 20 yards (tackle by Quintin Mikell). Penalty on C.J. Ah You: Roughing the Passer, 10 yards",
    "Taysom Hill right guard for 35 yards (tackle by Brian Poole). Penalty on Brian Poole: Horse Collar Tackle, 10 yards",
    "Bernard Scott right guard for -2 yards (tackle by Jabaal Sheard). Penalty on T.J. Ward: Unnecessary Roughness, 15 yards"
]
for sentence in sentences:
    doc = nlp(sentence)
    displacy.render(doc, style='ent', jupyter=True)

# print("-----------------------------------------")
# # test with base model
# nlp_1 = spacy.load("en_core_web_sm")
# for sentence in sentences:
#     doc_1 = nlp_1(sentence)
#     displacy.render(doc_1, style='ent', jupyter=True)

In [6]:
regex_dir = "../myRegex/"
file = json.load(open((regex_dir + "test_details.json"), "r"))

for obj in file:
    sentence, names = obj['line'], obj['names']
    doc = nlp(sentence)
    f_names = list(set([ent.text for ent in doc.ents if ent.label_ == "PERSON"]))
    names.sort()
    f_names.sort()
    if names != f_names:
        print(sentence)
        print(f"Expected names: {names}")
        print(f"Found names: {f_names}")
        print()

In [7]:
nlp.to_disk("models/names_model")