In [1]:
from transformers import pipeline
import json

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load the NER model
ner_pipeline = pipeline("ner", model="./local_model", aggregation_strategy="simple")

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [13]:
def test_is_human_name(name):
    """
    Checks if a given name is recognized as a human name by the DistilBERT NER model.

    @Param name: str, the name to check

    Returns:
    bool: True if classified as a person, False otherwise.
    """
    result = ner_pipeline(name)

    # Check if any recognized entity is labeled as a PERSON
    return any(ent['entity_group'] == 'PER' for ent in result)

# Example: Checking a list of names
names = ["Becky Browne", "Bonnie Flowers", "Reverends Jim Miller", "NEW YEAR", "Cincinnati New Year’s Day"]
results = {name: test_is_human_name(name) for name in names}

print(results)  # Output: {'Becky Browne': True, 'Bonnie Flowers': True, 'Reverends Jim Miller': True, 'NEW YEAR': False, 'Cincinnati New Year’s Day': False}


{'Becky Browne': True, 'Bonnie Flowers': True, 'Reverends Jim Miller': True, 'NEW YEAR': False, 'Cincinnati New Year’s Day': False}


In [8]:

def is_human_name(name):
    """
    Checks if a given name is recognized as a human name by the DistilBERT NER model.

    @Param name: str, the name to check

    Returns:
    bool: True if classified as a person, False otherwise.
    """
    result = ner_pipeline(name)
    return any(ent['entity_group'] == 'PER' for ent in result)


In [16]:
# Input and output file names
input_file = "peaceLogs/peace_cleanedNames_log.txt"
output_file = "peaceLogs/peace_cleaned_human_names.txt"

# Step 1: Collect all names into a list
with open(input_file, "r") as f:
    names = [line.strip() for line in f if line.strip()]

names

['Billy Williams',
 'Billy Williams',
 'Alpha Black',
 'Alpha Black',
 'Corene White',
 'Corene White',
 'Stella Pratt',
 'Stella Pratt',
 'Aubrey Tyree',
 'Aubrey Tyree',
 'Aubrey Tyree',
 'L. E. Hannah',
 'L. E. Hannah',
 'L. E. Hannah',
 'Eron White',
 'Eron White',
 'Eron White',
 'B. B. Mattox',
 'B. B. Mattox',
 'B. B. Mattox',
 'Reedie Powell',
 'Reedie Powell',
 'Reedie Powell',
 'Billy Williams',
 'Mae Conwill',
 'Mae Conwill',
 'Mae Conwill',
 'Carmon Brothers',
 'Carmon Brothers',
 'Carmon Brothers',
 'Lola Roberson',
 'Lola Roberson',
 'John Hocutt',
 'John Hocutt',
 'Elmer Conwill',
 'Billy Williams',
 'Margaret Keeton',
 'Bradley Allen',
 'Margaret Keeton',
 'Bradley Allen',
 'Margaret Keeton',
 'Bradley Allen',
 'Josie Hyde',
 'Josie Hyde',
 'Josie Hyde',
 'Ester Brown',
 'Ester Brown',
 'John Hyde',
 'John Hyde',
 'John Hyde',
 'Ada Godsey',
 'Ada Godsey',
 'Ada Godsey',
 'Elmer Conwill',
 'Elmer Conwill',
 'Amanda Denson',
 'Amanda Denson',
 'Charley McCoy',
 'Charley 

In [None]:
# cleaned_human_names = {name: is_human_name(name) for name in names}
batch_size = 32
results = ner_pipeline(names, batch_size=batch_size)

cleaned_human_names = {}
for name, result in zip(names, results):
    cleaned_human_names[name] = any(ent['entity_group'] == 'PER' for ent in result)

print(cleaned_human_names)

In [None]:
with open(output_file, "w") as out_f:
    for name, is_human in cleaned_human_names.items():
        out_f.write(f"{name}: {is_human}\n")