In [1]:
import spacy
from scispacy.abbreviation import AbbreviationDetector
import scispacy
from spacy import displacy
from pprint import pprint

In [2]:
nlp = spacy.load("en_core_sci_lg")##need to install appropriate model to load

In [3]:
abbreviation_pipe = AbbreviationDetector(nlp)
nlp.add_pipe(abbreviation_pipe)

doc = nlp("Spinal and bulbar muscular atrophy (SBMA) is an \
           inherited motor neuron disease caused by the expansion \
           of a polyglutamine tract within the androgen receptor (AR). \
           SBMA can be caused by this easily.")

print("Abbreviation", "\t", "Definition")
for abrv in doc._.abbreviations:
	print(f"{abrv} \t ({abrv.start}, {abrv.end}) {abrv._.long_form}")

Abbreviation 	 Definition
SBMA 	 (33, 34) Spinal and bulbar muscular atrophy
SBMA 	 (6, 7) Spinal and bulbar muscular atrophy
AR 	 (29, 30) androgen receptor


In [4]:
doc = nlp("Spinal and bulbar muscular atrophy (SBMA) is an \
           inherited motor neuron disease caused by the expansion \
           of a polyglutamine tract within the androgen receptor (AR). \
           SBMA can be caused by this easily.")

In [5]:
c = doc.ents[1]

In [6]:
list(doc.sents)

[Spinal and bulbar muscular atrophy (SBMA) is an            inherited motor neuron disease caused by the expansion            of a polyglutamine tract within the androgen receptor (AR).            ,
 SBMA can be caused by this easily.]

In [7]:
#displacy.render(next(doc.sents), style='dep', jupyter=True)

In [8]:
displacy.render(doc, style="ent")#use serve if first call to displacy/render

In [9]:
def display_entites(model, document):
    nlp = spacy.load(model)
    nlp.add_pipe(abbreviation_pipe)
    doc = nlp(document)
    displacy_img = displacy.render(doc, jupyter=True, style='ent')
    entity_and_label = pprint(set([(X.text, X.label_) for X in doc.ents]))
    abbreviation = set([(f'{abrv}: {abrv._.long_form}') for abrv in doc._.abbreviations])
    
    return displacy_img, entity_and_label, abbreviation

In [10]:
doc = "Spinal and bulbar muscular atrophy (SBMA) is an \
           inherited motor neuron disease caused by the expansion \
           of a polyglutamine tract within the androgen receptor (AR). \
           SBMA can be caused by this easily."
display_entites('en_core_sci_lg', doc)

{('AR', 'ENTITY'),
 ('SBMA', 'ENTITY'),
 ('Spinal', 'ENTITY'),
 ('androgen receptor', 'ENTITY'),
 ('bulbar muscular atrophy', 'ENTITY'),
 ('expansion', 'ENTITY'),
 ('inherited', 'ENTITY'),
 ('motor neuron disease', 'ENTITY'),
 ('polyglutamine tract', 'ENTITY')}


(None,
 None,
 {'AR: androgen receptor', 'SBMA: Spinal and bulbar muscular atrophy'})

In [11]:
doc = """Transformation of chronic lymphocytic leukaemia (CLL) to diffuse large B-cell lymphoma (DLCBL) type 
Richter's syndrome (RS) carries a dismal prognosis. Standard-of-care chemoimmunotherapy for de novo RS is 
inadequate with median survival of less than one year. Patients are frequently elderly or have co-morbidities 
limiting dose-intense chemotherapy. Treatment of relapsed/refractory (R/R) RS and RS emerging after CLL-directed 
therapy represent urgent unmet clinical needs. Agents targeting Bruton's tyrosine kinase (BTK) deliver improved 
outcomes for patients with high-risk CLL and expand effective treatments to frailer patients. Acalabrutinib is an 
oral, second-generation BTK inhibitor with a favourable toxicity profile and demonstrated activity in CLL and 
B-cell lymphomas. Combination of acalabrutinib with standard-of-care CHOP-R chemoimmunotherapy offers a sound 
rationale to test in a prospective trial for de novo RS. The prospective multicentre STELLAR study is designed in 
two elements, consisting of a randomised study to evaluate the safety and activity of CHOP-R chemoimmunotherapy in 
combination with acalabrutinib in newly diagnosed RS and single-arm studies of novel agents for other RS patient 
cohorts. Eligible patients with newly diagnosed DLBCL-type RS are randomised between six cycles of CHOP-R therapy 
and six cycles CHOP-R plus acalabrutinib, followed by acalabrutinib maintenance. The primary endpoint of the 
randomised component is progression free survival (PFS). Cohort 1 enrols RS patients with progressive disease 
following chemoimmunotherapy for acalabrutinib monotherapy. Patients with RS diagnosed while on ibrutinib may 
enrol in Cohort 2, a single-arm study of CHOP-R plus acalabrutinib. The primary endpoint for the single-arm 
studies is overall response rate (ORR). Secondary endpoints for all cohorts are overall survival (OS), quality of
life and proportion of patients proceeding to stem cell transplantation. The study will be accompanied by 
exploratory analysis of the mutational landscape of RS and the relationship between dynamic changes in sequential 
circulating tumour DNA samples and clinical outcomes. The STELLAR randomised trial evaluates the role of CHOP-R 
plus acalabrutinib in newly diagnosed RS patients. The single-arm platform studies enable the incorporation of 
promising novel therapies into the protocol. The STELLAR study has potential to identify novel biomarkers of 
treatment response in this high-risk malignancy. EudraCT: 2017-004401-40 , registered on the 31-Oct-2017."""

display_entites('en_ner_bc5cdr_md', doc)

{('Acalabrutinib', 'CHEMICAL'),
 ('B-cell lymphoma', 'DISEASE'),
 ('BTK', 'CHEMICAL'),
 ('CHOP-R', 'CHEMICAL'),
 ('CLL', 'DISEASE'),
 ('DLBCL-type RS', 'DISEASE'),
 ('ORR', 'CHEMICAL'),
 ('R/R', 'CHEMICAL'),
 ('RS', 'DISEASE'),
 ("Richter's syndrome", 'DISEASE'),
 ('STELLAR', 'CHEMICAL'),
 ('acalabrutinib', 'CHEMICAL'),
 ('chronic lymphocytic leukaemia', 'DISEASE'),
 ('ibrutinib', 'CHEMICAL'),
 ('malignancy', 'DISEASE'),
 ('toxicity', 'DISEASE'),
 ('tumour', 'DISEASE'),
 ('tyrosine', 'CHEMICAL')}


(None,
 None,
 {"BTK: Bruton's tyrosine kinase",
  'CLL: chronic lymphocytic leukaemia',
  'ORR: overall response rate',
  'OS: overall survival',
  'PFS: progression free survival',
  'R/R: relapsed/refractory',
  "RS: Richter's syndrome"})

'the, boy to men'

'( BTK )'