In [9]:

import spacy
import pandas as pd

In [10]:
# Load spaCy model
nlp = spacy.load("en_core_web_sm")

In [11]:

# Sample unstructured text
text = """
Apple Inc. is an American multinational technology company headquartered in Cupertino, California.
Apple was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in April 1976.
Paggas Technologies is Bulgarian fintech company that specializes in payment solutions. It was founded in 2017 by Trayan Manolov in Bulgaria.
Daniel Kolev works at Paggas Technologies and likes hiking.
"""

# Sample structured knowledge base as a DataFrame
data = {
    "Entity": [
        "Apple Inc.",
        "Steve Jobs",
        "Steve Wozniak",
        "Ronald Wayne",
        "Trayan Manolov",
        "Daniel Kolev",
    ],
    "Type": ["Organization", "Person", "Person", "Person", "Person", "Person"],
    "Description": [
        "American multinational technology company",
        "Co-founder of Apple Inc.",
        "Co-founder of Apple Inc.",
        "Co-founder of Apple Inc.",
        "Founder of Paggas Technologies LTD",
        "Software engineer at Paggas Technologies",
    ],
}



In [12]:
knowledge_base = pd.DataFrame(data)

# Display the knowledge base
print("Knowledge Base:")
print(knowledge_base)

Knowledge Base:
           Entity          Type                                Description
0      Apple Inc.  Organization  American multinational technology company
1      Steve Jobs        Person                   Co-founder of Apple Inc.
2   Steve Wozniak        Person                   Co-founder of Apple Inc.
3    Ronald Wayne        Person                   Co-founder of Apple Inc.
4  Trayan Manolov        Person         Founder of Paggas Technologies LTD
5    Daniel Kolev        Person   Software engineer at Paggas Technologies


In [13]:
# Part 2 - Perform Named Entity Recognition (NER)

# Process the text with spaCy
doc = nlp(text)

# Extract entities
entities = [(ent.text, ent.label_) for ent in doc.ents]

# Display extracted entities
print("\nExtracted Entities:")
for entity in entities:
    print(entity)


Extracted Entities:
('Apple Inc.', 'ORG')
('American', 'NORP')
('Cupertino', 'GPE')
('California', 'GPE')
('Apple', 'ORG')
('Steve Jobs', 'PERSON')
('Steve Wozniak', 'PERSON')
('Ronald Wayne', 'PERSON')
('April 1976', 'DATE')
('Paggas Technologies', 'ORG')
('Bulgarian', 'NORP')
('2017', 'DATE')
('Trayan Manolov', 'ORG')
('Bulgaria', 'GPE')
('Daniel Kolev', 'PERSON')
('Paggas Technologies', 'ORG')


In [14]:
# Part 3 - Link Entities to Structured Knowledge Base

# Function to link entities to the knowledge base
def link_entities(entities, knowledge_base):
    linked_data = []
    for entity, label in entities:
        match = knowledge_base[knowledge_base["Entity"] == entity]
        if not match.empty:
            linked_data.append(
                {
                    "Entity": entity,
                    "Label": label,
                    "Description": match["Description"].values[0],
                }
            )
    return linked_data


In [15]:
# Link entities
linked_entities = link_entities(entities, knowledge_base)

# Display linked entities
print("\nLinked Entities:")
for linked_entity in linked_entities:
    print(linked_entity)


Linked Entities:
{'Entity': 'Apple Inc.', 'Label': 'ORG', 'Description': 'American multinational technology company'}
{'Entity': 'Steve Jobs', 'Label': 'PERSON', 'Description': 'Co-founder of Apple Inc.'}
{'Entity': 'Steve Wozniak', 'Label': 'PERSON', 'Description': 'Co-founder of Apple Inc.'}
{'Entity': 'Ronald Wayne', 'Label': 'PERSON', 'Description': 'Co-founder of Apple Inc.'}
{'Entity': 'Trayan Manolov', 'Label': 'ORG', 'Description': 'Founder of Paggas Technologies LTD'}
{'Entity': 'Daniel Kolev', 'Label': 'PERSON', 'Description': 'Software engineer at Paggas Technologies'}


In [16]:
# Part 4 - Print the Aligned Data

# Display aligned data
print("\nAligned Data:")
for linked_entity in linked_entities:
    print(
        f"Entity: {linked_entity['Entity']}, Label: {linked_entity['Label']}, Description: {linked_entity['Description']}"
    )


Aligned Data:
Entity: Apple Inc., Label: ORG, Description: American multinational technology company
Entity: Steve Jobs, Label: PERSON, Description: Co-founder of Apple Inc.
Entity: Steve Wozniak, Label: PERSON, Description: Co-founder of Apple Inc.
Entity: Ronald Wayne, Label: PERSON, Description: Co-founder of Apple Inc.
Entity: Trayan Manolov, Label: ORG, Description: Founder of Paggas Technologies LTD
Entity: Daniel Kolev, Label: PERSON, Description: Software engineer at Paggas Technologies
