In [15]:
import spacy
from spacy import displacy
import nltk
from nltk.tokenize import sent_tokenize

In [16]:
nlp = spacy.load("en_core_web_trf")


In [17]:
def read_txt_file(file_path):
    with open(file_path, "r") as file:
        text = file.read()
    return text

file_path = "transcript.txt"
text = read_txt_file(file_path)


In [18]:
sentences = sent_tokenize(text)

In [19]:
def named_entity_recognition(sentences, nlp):
    entities = []
    for sent in sentences:
        doc = nlp(sent)
        displacy.render(doc,style="ent",jupyter=True)
        for ent in doc.ents:
            entities.append((ent.text, ent.label_))
    return entities

entities = named_entity_recognition(sentences, nlp)


In [20]:
print("Named Entities:")
for ent in entities:
    print(f"{ent[0]} ({ent[1]})")


Named Entities:
ARM64 (PRODUCT)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
AArch64 (PRODUCT)
64-bit (QUANTITY)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
GNU Assembler (PRODUCT)
ARM (ORG)
Compiler 6 (PRODUCT)
LLVM (PRODUCT)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
31 (CARDINAL)
ARM64 (PRODUCT)
64-bit (QUANTITY)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
ARM64 (PRODUCT)
the GNU Compiler Collection (ORG)
ARM (ORG)
AArch64 (PRODUCT)
Linux (PRODUCT)
macOS (PRODUCT)
ARM Compiler 6 (PRODUCT)
ARM Ltd. (ORG)
ARM (ORG)
ARMv8 (PRODUCT)
AArch64 (PRODUCT)
ARM (ORG)
Windows (PRODUCT)
Linux (PRODUCT)
macOS (PRODUCT)
ARM64 (PRODUCT)
LLVM (ORG)
ARM (ORG)
AArch64 (PRODUCT)
Windows (PRODUCT)
Linux (PRODUCT)
macOS (PRODUCT)
Keil (ORG)
MDK-ARM (PRODUCT)
ARM Ltd. (ORG)
ARM (PRODUCT)
AArch64 (PRODUCT)
ARM (ORG)
Cortex-A (PRODUCT)
Windows (PRODUCT)
IAR Embedded Workbench (PRODUCT)
ARM (PRODUCT)
AArch64 (PRODUCT)
ARM (ORG)
Cortex-A (PRODUCT)
Windows (PRODU

In [21]:
print("Types Explanation:")
types = list(set([ent[1] for ent in entities]))
for typ in types:
    print(f"{typ}: {spacy.explain(typ)}")

Types Explanation:
ORDINAL: "first", "second", etc.
PRODUCT: Objects, vehicles, foods, etc. (not services)
LAW: Named documents made into laws.
CARDINAL: Numerals that do not fall under another type
LOC: Non-GPE locations, mountain ranges, bodies of water
PERSON: People, including fictional
FAC: Buildings, airports, highways, bridges, etc.
ORG: Companies, agencies, institutions, etc.
QUANTITY: Measurements, as of weight or distance
WORK_OF_ART: Titles of books, songs, etc.


In [22]:
def extract_relations(doc):
    relations = []
    for token in doc:
        if token.dep_ in ("attr", "dobj"):
            subject = [w for w in token.head.lefts if w.dep_ == "nsubj"]
            if subject:
                subject = subject[0]
                relations.append((subject, token.head, token))
        elif token.dep_ == "prep":
            obj = [t for t in token.rights if t.dep_ == "pobj"]
            if obj:
                relations.append((token.head, token, obj[0]))
    return relations

def relation_extraction(sentences, nlp):
    relations = []
    for sent in sentences:
        doc = nlp(sent)
        relations.extend(extract_relations(doc))
    return relations

relations = relation_extraction(sentences, nlp)


In [23]:
print("Relations:")
for rel in relations:
    print(f"{rel[0].text} - {rel[1].text} - {rel[2].text}")

Relations:
Interview - With - ChatGPT
you - tell - me
assembly - for - ARM64
known - as - AArch64
ARM64 - is - architecture
used - in - smartphones
Writing - for - ARM64
code - for - ARM64
available - for - ARM64
assemblers - including - Assembler
that - suits - needs
set - includes - range
range - of - instructions
that - perform - operations
transfer - between - registers
You - find - documentation
documentation - for - ISA
ARM64 - has - registers
X0 - through - X30
store - during - execution
ARM64 - uses - model
byte - of - memory
stored - in - memory
transfer - between - registers
you - learned - that
Assemble - into - file
combine - with - libraries
assembly - be - tool
working - with - hardware
consequences - of - changes
make - to - program
you - tell - more
more - about - assemblers
available - for - ARM64
details - about - assemblers
available - for - ARM64
This - is - assembler
assembler - for - ARM64
part - of - Collection
It - supports - sets
support - for - macros
is - on 