In [30]:
import spacy
from spacy import displacy
from spacy.training import Example
from spacy.util import minibatch, compounding

nlp = spacy.load("en_core_web_sm")

In [83]:
# Get the Named Entity Recognizer (NER) component
ner = nlp.get_pipe("ner")

# Add the new label to the NER component
ner.add_label("ORG")

# Prepare the training data
org_names = ["AM", "JPM", "C"]
TRAIN_DATA = []

for org in org_names:
    sentence = f"{org} is a well-known organization."
    entities = [(0, len(org), "ORG")]
    TRAIN_DATA.append((sentence, {"entities": entities}))

# Disable other pipeline components to only train NER
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
with nlp.disable_pipes(*other_pipes):
    optimizer = nlp.resume_training()
    for itn in range(20):  # Number of iterations
        losses = {}
        batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
        for batch in batches:
            for text, annotations in batch:
                doc = nlp.make_doc(text)
                example = Example.from_dict(doc, annotations)
                nlp.update([example], drop=0.5, losses=losses)
        print(f"Iteration {itn}, Losses: {losses}")

# Save the updated model
nlp.to_disk("updated_model")

# Load the updated model
nlp_updated = spacy.load("updated_model")


Iteration 0, Losses: {'ner': 7.201416943149946e-17}
Iteration 1, Losses: {'ner': 1.883535639066477e-16}
Iteration 2, Losses: {'ner': 6.228891279173549e-16}
Iteration 3, Losses: {'ner': 5.986911126168875e-17}
Iteration 4, Losses: {'ner': 4.247728001629515e-15}
Iteration 5, Losses: {'ner': 3.388569460964404e-17}
Iteration 6, Losses: {'ner': 1.896948372245187e-15}
Iteration 7, Losses: {'ner': 2.8166452309638777e-16}
Iteration 8, Losses: {'ner': 5.634325734729906e-16}
Iteration 9, Losses: {'ner': 4.571652416357903e-17}
Iteration 10, Losses: {'ner': 8.085625019538209e-18}
Iteration 11, Losses: {'ner': 7.4183314685895e-18}
Iteration 12, Losses: {'ner': 1.7320714217740552e-16}
Iteration 13, Losses: {'ner': 2.4380051991261585e-17}
Iteration 14, Losses: {'ner': 1.2673550220504221e-15}
Iteration 15, Losses: {'ner': 1.2316886362361417e-16}
Iteration 16, Losses: {'ner': 5.231645501821222e-16}
Iteration 17, Losses: {'ner': 2.795044644327152e-17}
Iteration 18, Losses: {'ner': 5.517258252640279e-18}


In [84]:
sample_text = '''UN Trade and Development (UNCTAD) is an intergovernmental organization within the United Nations Secretariat that promotes the interests of developing countries in world trade.[1] It was established in 1964 by the United Nations General Assembly (UNGA) as the United Nations Conference on Trade and Development but rebranded to its current name on the occasion of its 60th anniversary in 2024.[2] It reports to both the General Assembly and the United Nations Economic and Social Council (ECOSOC).[3] UNCTAD is composed of 195 member states and works with non-governmental organizations worldwide;[4] its permanent secretariat is at UNOG in Geneva, Switzerland.

The primary objective of UNCTAD is to formulate policies relating to all aspects of development, including trade, aid, transport, finance and technology. The newspaper praised about JPM. United nations and JPMC did not have a chance. It was created in response to concerns among developing countries that existing international institutions like GATT (since replaced by the World Trade Organization), the International Monetary Fund (IMF), and the World Bank were not properly organized to handle the particular problems of developing countries; UNCTAD would provide a forum where developing nations could discuss and address problems relating to their economic development.

One of UNCTAD's principal achievements was conceiving and implementing the Generalized System of Preferences (GSP), which promotes the export of manufactured goods from developing countries. In the 1970s and 1980s, UNCTAD was closely associated with the New International Economic Order (NIEO), a set of proposals that sought to reduce economic dependency and inequality between developing and developed countries.

UNCTAD conferences ordinarily take place every four years, with the first occurring in Geneva in 1964; fifteen subsequent meetings have taken place worldwide, with the most recent held in Bridgetown, Barbados, from 38 October 2021 (albeit virtually, due to the COVID-19 pandemic). 

On a discussion with C and ARDT, it was decided to take a step back. 

UNCTAD has 400 staff members and a biannual (20102011) regular budget of US$138 million in core expenditures and US$72 million in extra-budgetary technical assistance funds. The most trending price of AM is its lowest index value. It is a member of the United Nations Sustainable Development Group, a consortium of UN entities that work to promote sustainable socioeconomic development.[5] '''

In [85]:
#sample_text = '''The most trending price of AM is $600'''

In [86]:
doc=nlp(sample_text)

In [87]:

# Test the updated model
# test_text = sample_text
# doc = nlp_updated(test_text)
# for ent in doc.ents:
#     print(ent.text, ent.label_)

In [88]:
# Filter entities to include only organizations
org_entities = [ent for ent in doc.ents if ent.label_ == "ORG"]

In [89]:
# Create a new Doc object with only the ORG entities
doc_ents = [{"start": ent.start_char, "end": ent.end_char, "label": ent.label_} for ent in org_entities]

In [90]:
# Render the visualization
displacy.render({"text": doc.text, "ents": doc_ents, "title": None}, style="ent", manual=True)