In [None]:
pip install flask spacy

In [3]:
import spacy
from spacy.training import Example
from spacy.util import minibatch
import random
import json


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\reza\AppData\Local\Programs\Python\Python311\Lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "c:\Users\reza\AppData\Local\Programs\Python\Python311\Lib\site-packages\traitlets\config\application.py", line 1043, in launch_instance
    app.start()
  File "c:\Users\reza\AppData\Local\Programs\Python\Python311\Lib\site-packages\ipykernel\kernelapp.py", lin

In [None]:
# Start from blank model
nlp = spacy.blank("en")

# Add NER pipe
ner = nlp.add_pipe("ner")

In [None]:
# Load synthetic training data from JSON
with open("Training_Data/train_data.json", "r", encoding="utf-8") as f:
    synthetic_data = json.load(f)


# Load and convert real-world data
real_data = []
with open("Training_Data/real_data.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        entry = json.loads(line)
        real_data.append({
            "text": entry["text"],
            "entities": entry["label"]
        })

# Merge datasets
training_data = synthetic_data + real_data


examples = []
for item in training_data:
    doc = nlp.make_doc(item["text"])
    example = Example.from_dict(doc, {"entities": item["entities"]})
    examples.append(example)

  
print(f"Loaded {len(examples)} total training examples.")

Loaded 550 total training examples.


In [9]:
from spacy.training.example import Example
from spacy.util import minibatch
from spacy.util import compounding
import random

# Train model
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in ("ner")]
with nlp.disable_pipes(*other_pipes):
    optimizer = nlp.initialize(get_examples=lambda: examples)

    n_iter = 20
    for epoch in range(n_iter):
        random.shuffle(examples)
        losses = {}
        batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
        for batch in batches:
            nlp.update(batch, losses=losses)
        print(f"Epoch {epoch+1} | Losses: {losses}")

Epoch 1 | Losses: {'ner': np.float32(1608.0052)}
Epoch 2 | Losses: {'ner': np.float32(108.6021)}
Epoch 3 | Losses: {'ner': np.float32(71.24158)}
Epoch 4 | Losses: {'ner': np.float32(40.642445)}
Epoch 5 | Losses: {'ner': np.float32(37.726444)}
Epoch 6 | Losses: {'ner': np.float32(17.487926)}
Epoch 7 | Losses: {'ner': np.float32(8.985805)}
Epoch 8 | Losses: {'ner': np.float32(9.877969)}
Epoch 9 | Losses: {'ner': np.float32(19.08025)}
Epoch 10 | Losses: {'ner': np.float32(10.53949)}
Epoch 11 | Losses: {'ner': np.float32(18.473715)}
Epoch 12 | Losses: {'ner': np.float32(16.872347)}
Epoch 13 | Losses: {'ner': np.float32(23.177254)}
Epoch 14 | Losses: {'ner': np.float32(22.565231)}
Epoch 15 | Losses: {'ner': np.float32(26.460371)}
Epoch 16 | Losses: {'ner': np.float32(31.286568)}
Epoch 17 | Losses: {'ner': np.float32(48.802986)}
Epoch 18 | Losses: {'ner': np.float32(17.153625)}
Epoch 19 | Losses: {'ner': np.float32(3.335366)}
Epoch 20 | Losses: {'ner': np.float32(7.4134073)}


In [10]:
import os

output_dir = "./football_ner_model"
nlp.to_disk(output_dir)
print(f"Model saved to {output_dir}")

Model saved to ./football_ner_model


In [11]:
import spacy

# Load the saved model
nlp = spacy.load("./football_ner_model")

# Test examples
test_texts = [
    "Jude Bellingham joins Liverpool from Borussia Dortmund for £60 million.",
    "Chelsea complete the signing of Mason Mount from Manchester United on a loan deal.",
    "Barcelona announce the signing of João Félix on a permanent transfer.",
    "Harrogate Town have signed Huddersfield Town defender Eko Solomon on loan until the end of the season.",
    "Bradford City have signed midfielder Tommy Leigh from MK Dons for an undisclosed fee on a two-and-a-half-year deal",
    "Aston Villa have announced the signing of forward Donyell Malen from Borussia Dortmund"
]

# Run the model on each test sentence
for text in test_texts:
    doc = nlp(text)
    print(f"\nText: {text}")
    for ent in doc.ents:
        print(f" - {ent.text} ({ent.label_})")


Text: Jude Bellingham joins Liverpool from Borussia Dortmund for £60 million.
 - Jude Bellingham (PLAYER_NAME)
 - Liverpool (TO_CLUB)
 - Borussia Dortmund (FROM_CLUB)
 - £60 million (TRANSFER_FEE)

Text: Chelsea complete the signing of Mason Mount from Manchester United on a loan deal.
 - Chelsea (TO_CLUB)
 - Mason Mount (PLAYER_NAME)
 - Manchester United (FROM_CLUB)
 - loan (TRANSFER_TYPE)

Text: Barcelona announce the signing of João Félix on a permanent transfer.
 - Barcelona (TO_CLUB)
 - João Félix (PLAYER_NAME)
 - permanent transfer (TRANSFER_TYPE)

Text: Harrogate Town have signed Huddersfield Town defender Eko Solomon on loan until the end of the season.
 - Harrogate Town (TO_CLUB)
 - Huddersfield Town (FROM_CLUB)
 - Eko Solomon (PLAYER_NAME)
 - loan (TRANSFER_TYPE)

Text: Bradford City have signed midfielder Tommy Leigh from MK Dons for an undisclosed fee on a two-and-a-half-year deal
 - Bradford City (TO_CLUB)
 - Tommy Leigh (PLAYER_NAME)
 - MK Dons (FROM_CLUB)
 - undisclosed