In [1]:
import spacy
from spacy import displacy
from collections import Counter
from pprint import pprint
from core_modules.topic_extraction.nlp_utils import NLPUtils

In [2]:
class NamedEntityRecognition:
    def __init__(self, nlp_model=None):
        if nlp_model is not None:
            self.nlp = nlp_model
        else:
            self.nlp = spacy.load("en_core_web_md")

    def named_entity_recognition_process(self, doc_text):
        # collection.insert_one(self.news_json[0])
        doc = self.nlp(doc_text)
        #item_label_freq = {}
        #for x in doc.ents:
        #    item_label_freq[x.text] = x.label_
        #items = [x.text for x in doc.ents]
        #most_common_items = Counter(items).most_common(10)
        return doc

In [3]:
named_entity_recognition = NamedEntityRecognition()
text = """As COVID-19 spreads, public health officials are telling people to stay home if they feel sick. But in jails and prisons, that's not an option. Robert Greifinger is a physician who spent 25 years working on health care issues inside the nation's prisons and jails, and he says the "social distancing" advice we're all hearing right now isn't so simple behind bars. "There are crowding issues, ventilation issues, security issues where people have to be checked and monitored fairly frequently," Greifinger says. "So it's really hard to do." If jails and prisons are complacent about the coronavirus, he says they run the risk of becoming "incubators" for the disease. "Since jail and prison staff and prisoners tend to be younger, one thinks initially that it's not going to be a big problem," he says. "But remember that staffs work shifts, they come in and out of the facility, and they may be bringing that infection home to people who have compromised immune systems." For weeks, Greifinger and other corrections health experts have been urging prison administrators to plan for the coronavirus. But when NPR first started asking jails and prisons about preparations, most pointed to existing plans for other infectious diseases, such as tuberculosis. In Arizona, a spokesperson for the Department of Corrections said employees and inmates were being encouraged to disinfect shared services. But a correctional officer told KJZZ's Jimmy Jenkins nothing new was being done, and said unsanitary conditions threatened the health care of staff and inmates. NPR isn't identifying the officer because speaking to a reporter could cost him his job. The officer told Jenkins, "A lot of units have issues just getting soap and paper towels stocked in the restrooms." Jenkins says he's also been unable to get details on coronavirus preparation from the Maricopa County Sheriff's Office, which houses nearly 7,000 people on a daily basis. But others are taking clear steps to prepare. Near Seattle, the South Correctional Entity, or "SCORE," houses nearly 600 inmates. It's doing extra screening of new inmates, and it's not allowing visitors into the lobby, where family members usually communicate with inmates via video conferencing. The same video-conferencing service is available to families who want to call from home, and SCORE executive director Devon Schrum has asked the company that runs it to offer that service to families for free, during the pandemic. Inside the jail, she says they're doing what they can to keep the virus at bay. "Four times a day we're wiping down the entire jail including the cells, and we're wiping down before and after inmates come in," she says. They're also orienting inmates on good hand hygiene, and she's considering giving them supervised access to hand sanitizer — it's usually contraband in jails, because of the alcohol content. For inmates who eventually do test positive, the jail has identified holding areas with with separate air in-takes for their ventilation. "We have 24/7 medical coverage, so at this point we're prepared to treat in place. And to minimize the risk to the rest of the population," Schrum says. But jails and prisons can expect a subset of their COVID-19 cases to be serious, and many of those patients will be sent to local hospitals, potentially adding to the anticipated surge of demand for critical care. Jose Saldana believes some of the most vulnerable prisoners should be sent home now, before they get sick. He's a former prisoner who now directs an organization called "Release Aging People in Prison Campaign." "It's probably going to be deadly for some of the elderly people that I left behind," Saldana says. He'd like parole and clemency boards to consider which older, sick prisoners could be let out, because he believes they'd stand a better chance of surviving the pandemic at home. "Let's look at it realistically, for what it is. To keep such men in prison, to die — knowing they're going to die! — is just pure revenge. It's not justice," Saldana says. There may be other reasons to let some of them go. Marc Stern is a physician who used to be the top medical officer for the Washington State Department of Corrections, and he says jails and prisons should be planning for the possibility of not having enough staff. "We don't want to be caught behind the ball," he says. "We don't want to find one day that a number of officers have called in sick, and we're having trouble managing the institution." He's recommending corrections administrators communicate now with courts or parole systems about which inmates might be safely released early, in case things get to that point."""

In [4]:
nlp_utils = NLPUtils(lang='en')

In [5]:
parsed_text = nlp_utils.parse_text(text)

In [6]:
parsed_text = " ".join(word for word in parsed_text)

In [7]:
parsed_text

'covid-19 spread public health official tell people stay home feel sick jail prison option Robert Greifinger physician spend 25 year work health care issue inside nation prison jail say social distancing advice hear right simple bar crowding issue ventilation issue security issue people check monitor fairly frequently Greifinger say hard jail prison complacent coronavirus say run risk incubator disease jail prison staff prisoner tend young think initially go big problem say remember staff work shift come facility bring infection home people compromise immune system week Greifinger correction health expert urge prison administrator plan coronavirus NPR start ask jail prison preparation point existing plan infectious disease tuberculosis Arizona spokesperson Department Corrections say employee inmate encourage disinfect shared service correctional officer tell KJZZ Jimmy Jenkins new say unsanitary condition threaten health care staff inmate NPR identify officer speak reporter cost job of

In [14]:
doc = named_entity_recognition.named_entity_recognition_process(text)

In [13]:
doc_parsed = named_entity_recognition.named_entity_recognition_process(parsed_text)

In [9]:
displacy.render(doc, style="ent")

In [15]:
displacy.render(doc_parsed, style="ent")

In [29]:
set_doc = set()
for ent in doc.ents:
    print(ent.text, ent.label_)
    set_doc.add(ent.text.lower())

Robert Greifinger PERSON
25 years DATE
Greifinger PERSON
weeks DATE
Greifinger PERSON
NPR ORG
Arizona GPE
the Department of Corrections ORG
KJZZ ORG
Jimmy Jenkins PERSON
NPR ORG
Jenkins PERSON
Jenkins PERSON
the Maricopa County Sheriff's Office ORG
nearly 7,000 CARDINAL
daily DATE
Seattle GPE
the South Correctional Entity ORG
nearly 600 CARDINAL
SCORE ORG
Devon Schrum PERSON
Four CARDINAL
Schrum PERSON
Jose Saldana PERSON
Release Aging People in Prison Campaign WORK_OF_ART
Saldana PERSON
Saldana PERSON
Marc Stern PERSON
the Washington State Department of Corrections ORG
one day DATE


In [30]:
set_doc_parsed = set()
for ent in doc_parsed.ents:
    print(ent.text, ent.label_)
    set_doc_parsed.add(ent.text.lower())

Robert Greifinger PERSON
25 year DATE
Greifinger PERSON
week DATE
NPR ORG
Arizona GPE
Department Corrections ORG
KJZZ Jimmy Jenkins ORG
NPR ORG
Jenkins PERSON
Jenkins PERSON
Maricopa County GPE
nearly 7,000 CARDINAL
daily DATE
Seattle South Correctional Entity ORG
nearly 600 CARDINAL
Devon Schrum PERSON
Schrum PERSON
Jose Saldana PERSON
Saldana PERSON
Saldana PERSON
Marc Stern PERSON
Washington State Department Corrections ORG


In [31]:
len(set_doc_parsed)

20

In [32]:
len(set_doc)

26

In [33]:
set_doc - set_doc_parsed

{'25 years',
 'four',
 'jimmy jenkins',
 'kjzz',
 'one day',
 'release aging people in prison campaign',
 'score',
 'seattle',
 'the department of corrections',
 "the maricopa county sheriff's office",
 'the south correctional entity',
 'the washington state department of corrections',
 'weeks'}