In [103]:
import spacy
from spacy.tokens import Doc
import re

In [104]:
input_string = 'Diversity in workforce is important. Continental is aware that legal and cultural requirements can vary in a global market. We expect all of our suppliers to be guided by fairness, honesty and responsibility in all aspects of their business. Our supplier code establishes important standards that match the Continental corporate values. Every supplier must comply strictly with these standards. We use them to define requirements for good working conditions, then check compliance with these requirements through our on-site audits.'
# input_string = 'At Bosch, we value diversity. The company intends to increase workforce diversity by 40%'

coref_model = spacy.load('en_coreference_web_trf')
doc = coref_model(input_string) 

def resolve_references(doc):
    token_mention_mapper = {}
    output_string = ''
    clusters = [val for key, val in doc.spans.items() if key.startswith('coref_cluster')]

    for cluster in clusters:
        first_mention = cluster[0]
        for mention_span in list(cluster)[1:]:
            token_mention_mapper[mention_span[0].idx] = first_mention.text + mention_span[0].whitespace_
            for token in mention_span[1:]:
                token_mention_mapper[token.idx] = ''

    for token in doc:
        if token.idx in token_mention_mapper:
            output_string += token_mention_mapper[token.idx]
        else:
            output_string += token.text + token.whitespace_

    return output_string

resolved_string = resolve_references(doc)
print(resolved_string)

Diversity in workforce is important. Continental is aware that legal and cultural requirements can vary in a global market. Continental expect all of Continental suppliers to be guided by fairness, honesty and responsibility in all aspects of all of our suppliers business. Continental supplier code establishes important standards that match the Continental corporate values. Every supplier must comply strictly with important standards that match the Continental corporate values . Continental use important standards that match the Continental corporate values to define requirements for good working conditions, then check compliance with requirements for good working conditions through Continental on-site audits.


In [105]:
nlp = spacy.load('en_core_web_trf')
ner_op_string = resolved_string

def show_ents(doc):
    global ner_op_string
    if doc.ents:
        for ent in doc.ents:
            # print(ent.text+' - ' +str(ent.start_char) +' - '+ str(ent.end_char) +' - '+ent.label_+ ' - '+str(spacy.explain(ent.label_)))
            if ent.label_ == 'ORG':
                word_length = ent.end_char - ent.start_char
                ner_op_string = ner_op_string[:ent.start_char] + 'O'*(word_length) + ner_op_string[ent.end_char:]
    else:
        print('No named entities found.')
    return ner_op_string
    
doc = nlp(resolved_string)
show_ents(doc)
ner_op_string = re.sub(r'O{2,}', 'ORG',ner_op_string)
print(ner_op_string)

Diversity in workforce is important. ORG is aware that legal and cultural requirements can vary in a global market. ORG expect all of ORG suppliers to be guided by fairness, honesty and responsibility in all aspects of all of our suppliers business. ORG supplier code establishes important standards that match the ORG corporate values. Every supplier must comply strictly with important standards that match the ORG corporate values . ORG use important standards that match the ORG corporate values to define requirements for good working conditions, then check compliance with requirements for good working conditions through ORG on-site audits.


In [106]:
def get_org_sentences(ner_op_string):
    temp_list = ner_op_string.split('. ')
    return '. '.join(list(filter(lambda x: 'ORG' in x, temp_list)))

print(get_org_sentences(ner_op_string))

ORG is aware that legal and cultural requirements can vary in a global market. ORG expect all of ORG suppliers to be guided by fairness, honesty and responsibility in all aspects of all of our suppliers business. ORG supplier code establishes important standards that match the ORG corporate values. Every supplier must comply strictly with important standards that match the ORG corporate values . ORG use important standards that match the ORG corporate values to define requirements for good working conditions, then check compliance with requirements for good working conditions through ORG on-site audits.
