In [1]:
from transformers import pipeline
import json

# Load the NER model
ner_pipeline = pipeline("ner", model="./local_model", aggregation_strategy="simple")

  from .autonotebook import tqdm as notebook_tqdm
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [3]:
def test_is_human_name(name):
    """
    Checks if a given name is recognized as a human name by the DistilBERT NER model
    and returns a True/False result along with a confidence score.

    @Param name: str, the name to check

    Returns:
    tuple: (bool, float)
        - True if classified as a person, False otherwise.
        - Confidence score (between 0 and 1) of the entity being a human name, or 0 if not detected.
    """
    result = ner_pipeline(name)

    # Find the entity recognized as a PERSON with the highest confidence score
    person_score = max([ent['score'] for ent in result if ent['entity_group'] == 'PER'], default=0)
    
    # Return True/False with the score
    return person_score > 0, person_score

In [None]:

# names = ["Will Black", "Bonnie Flowers", "Reverends Jim Miller", "NEW YEAR", "Cincinnati New Year’s Day"]
names=["Eugene Hult Center"]
results = {name: (test_is_human_name(name)[0], test_is_human_name(name)[1]) for name in names}

# Printing the results in the desired format
for name, (is_human_name, score) in results.items():
    print(f'"{name}": {is_human_name}, {score}')

"Eugene Hult Center": False, 0


In [5]:
import numpy as np

spacy_non_name_words = np.array([
    "Aileen Woodley", "Calum Woods", "The Harper", "Frits DeBohr", "Rain Haynes",
    "Aoife O’Grady", "During Saturday", "The Registration", "The Saturday", "Claudine Townsend",
    "P.M.", "In Roh Hye", "Delores Canaday", "At John Fawcett’s", "MB Gowans",
    "Aileen Vance", "Iain McLean", "All Dublin", "Destiny Woods", "Trent Frick",
    "Floy Driskill", "Depths We Cry", "Will Ahrenhole", "The Beasley Family", "Tommye Mauldin",
    "Piers Cawley", "delRe", "Emily Cargile", "Life’s Journey’", "The Roots",
    "All Day", "Brother Stavros Winner", "Clarice Kjeralff", "But The Blood", "Art Hargrove",
    "Co-Chairman Rodney Ivey", "The Creel", "Pam Dunning", "While Syble", "At Doddridge Chapel",
    "DeMars", "Celtic World’", "WECP", "Annalise Perone", "Sage Chaser-Dempsey",
    "névé", "Emily Venable", "Clearcy Ruttledge", "Elise Meyer Bothling", "In The",
    "The Kerr Family", "The Lacy Family-Reba Windom", "McGraw", "Eimear O’ Donovan",
    "On Saturday", "The Oliver", "Tottie Harken", "The Hat Parade--Betty Shepherd", "Wing Mui",
    "The Eddins", "Tussaint Burnette", "Died For Me", "Dolly Hudgins", "Even Venditti",
    "Dutchified German Spirituals", "Teacher--Dan Brittain", "Clarence Cates", "The Union",
    "Brent Devore", "The Green", "The Eva Striebeck", "The Silverton Grange All-Day", "Mercy Teichert",
    "Blakely Stewart", "Serenity Manning", "Robyn Bundy", "The London", "Rebecca Over",
    "Anders Fahey", "Tigre Lusardi", "Daire O’Sullivan", "Layla Slaughter", "Case O’Dell",
    "Will Black", "Ai Chin", "McCool", "In Cagle’s", "The Tuesday Night Singers",
    "Tori Gundlach", "Bascum Mattox", "Elise DelMas", "The Youth Boys", "Chip Ward",
    "The Ladykillers", "The Arranger", "Emily O’Dell", "Dynamics Chapter V. The", "The Supertonic",
    "Tell Me Of The Angels", "Won’t Turn Back", "Chair-person Sarah Davie", "The Chairladies",
    "Founders Ted", "Web Sink", "The Rudiments", "Beautiful Land", "Leaders Steve Cackley",
    "The Green Family", "Our Savior", "Table Grace", "Myrtice Meeks", "Whilst Sheila Girling Macadam",
    "Masti-Denise Maynard", "But The Blood Of Jesus", "Art Peddle", "My Soul", "Pocket Harmonist",
    "Leaders Jamie Yeats", "Sage Brownlee", "The Alexander", "The Jamulus Singers", "The Landing",
    "The Friday", "Chapter XI", "All Scottish", "Dolores Holbrook", "Trent Peachy",
    "The Kerr", "Greggory Stockert", "McLeod", "The Trumpet", "Thich Nu Chon Duc",
    "Will Price", "My Friends", "The Pontotoc Winter", "Tanner Frazier", "Co-Chairman Susan Miller-Coulter",
    "The Levites", "Co-Chairman David Rust", "Will Means", "NEW YEAR", "Life’s Journey",
    "The Bristol", "Johannes Saver", "The Celebration Lesson", "Scots-Irish", "Price Walden",
    "The Beech Tree", "Co-Chairman Ian Smiley", "Elise Eskew Sparks", "The Avery Family", "Elise Englert",
    "Rain Morical", "Teacher-Rachel Hall Rachel Hall", "Sula O’Duffy", "Leaders Katherine Glatter",
    "Vernice Calvert", "The Founders’", "Leaders Don Clark", "Will All Go Home", "Four Mile Community",
    "Ruker McMullen", "Roll Call", "Ima Rowland", "Chery Stroud", "Hester Edwards",
    "The Geneva All-Day", "Johannes Kunst", "Elise Cavicchi", "Molly-Claire Gillett", "Elise delMas",
    "Connor O’Toole", "The Montréal", "Era Howton", "Lukasz Zóltek", "By And By",
    "The Olney Hymns", "Boys Youth", "Boundless God", "Molly Mixon", "Eamonn O’Neill",
    "Sunny Young", "The Sunday Session", "Price L. Worrell", "The Wakefield Singers", "Reverends Jim Miller",
    "Have Had Singing", "His Blessings", "Shone Around", "Secretaries--Fiona Nugent", "The Beasley",
    "The Shenandoah Harmony", "Loves Me", "Pro-tem Liz Owen", "The Chicago", "Daire O’ Sullivan",
    "Pam Sims", "Allsion Ivey", "The Locating", "Piers Blewitt", "Rosalind Woods",
    "Keillor More", "But God’s", "Sungwoo Yoon", "Dolly Sarley", "The Danby",
    "The Rogers Children", "Leaders Nick Hall", "Edmund Golloday", "The Sydneysiders Caitlin Jay", "The Dallas",
    "Co-chairmen Benedicta Hardy", "Nate Mathews", "The Founders’ Lesson", "The Dayton Harmonist", "The Harris Family",
    "Dolly Bush", "Eliza See", "At Rosslyn Chapel", "Aileen Cheng", "Chloë Spreadborough",
    "Wener Ullah", "Thérèse Power", "Edmund Galladay", "The Gilmore Family", "At Home",
    "The Rogers", "River Skrenes", "Teacher-Dan Brittain Dan", "The Cork Singers", "Dolores Canerday",
    "Teacher--Sam Sommers", "Annefloor Van Landaghen", "The Great Roll Call", "Edmund Golladay", "Will Stallings",
    "Chresten DNC", "Elise Fairless", "Day That Will Be", "The Munich", "Arnfried Mähler",
    "The Rock", "Iain Paxon", "Audress Gurley", "The Coweta", "All Frank",
    "Isis Arslan", "The Wallowa Valley All-Day", "The IV", "Founders Ted Johnson", "Will Townsend",
    "This Levitical", "McFate", "Chip Westbrook", "Explorers’ Group", "The Sheppards",
    "Sean O’Donnell", "One They Are Falling Asleep", "Rosalind Oldham", "The New Year’s Day", "Gilly Campbell",
    "Chip Wise", "Teacher-Judy Caudle Judy Caudle", "DeLong", "Who Must Be Heard", "Every Guest",
    "The Man With", "Emily Veniable", "Celestial Watering", "See Me Now", "The Sherbrooke",
    "The Texas", "The Brady", "Rye Skelton", "Car Bear", "Tanner Pfeiffer",
    "Elise Eskew", "The Lacy", "At Old Temple Kirk", "Loved One Home", "Twenty Year Club",
    "Clarence McCool", "Will Dove", "Henning Baltruschat", "Made With Hands", "The Butterfly’",
    "Will D.", "Heather Ikeler", "Alpha Black", "E. Vernice", "Light At The River",
    "The Morton All-Day", "The Cullman", "Of Thee", "On Sunday", "Frances D’Andrea",
    "Will Spendlove", "The Monday", "Willy Logan", "Rosalind Wood", "Gaylon Barrow",
    "Dolores Wilks", "The Sunday", "Co-Chairman David Ivey", "Molly-Claire Gillet", "The Introductory Lesson",
    "Molly Black", "Norm Howe", "The Founder’s Lesson", "Heather Purdy", "Performing Arts",
    "Parting Hand", "Devout Life", "The Ivey Family", "Conor O’Hanlon", "Will Peebles",
    "Co-Chairman Terry Wootten", "Emily Sigmon", "Eimear O’Donovan", "Case O’dell", "May Witt",
    "The Rock Ranch", "Spiritual Wisdom", "The Lonnie Rogers", "Alonzo Edwards", "Sinead O’Mahoney",
    "PVADS", "Per-Olof Swing"
])

print(len(spacy_non_name_words))


328


In [6]:
results_array = np.array([test_is_human_name(name) for name in spacy_non_name_words])

# Convert results to a dictionary
results = dict(zip(spacy_non_name_words, results_array))


In [7]:
print(results)

{'Aileen Woodley': array([1.        , 0.99910355]), 'Calum Woods': array([1.       , 0.4885526]), 'The Harper': array([1.        , 0.69456798]), 'Frits DeBohr': array([0., 0.]), 'Rain Haynes': array([1.        , 0.98460019]), 'Aoife O’Grady': array([1.        , 0.99410796]), 'During Saturday': array([0., 0.]), 'The Registration': array([0., 0.]), 'The Saturday': array([0., 0.]), 'Claudine Townsend': array([1.       , 0.9978587]), 'P.M.': array([1.        , 0.76086211]), 'In Roh Hye': array([0., 0.]), 'Delores Canaday': array([0., 0.]), 'At John Fawcett’s': array([1.       , 0.9329164]), 'MB Gowans': array([1.        , 0.75013041]), 'Aileen Vance': array([1.        , 0.99840444]), 'Iain McLean': array([1.        , 0.99864066]), 'All Dublin': array([0., 0.]), 'Destiny Woods': array([1.        , 0.70230103]), 'Trent Frick': array([1.        , 0.98071903]), 'Floy Driskill': array([1.        , 0.69245595]), 'Depths We Cry': array([0., 0.]), 'Will Ahrenhole': array([1.        , 0.99749076]),

In [8]:
with open("peaceLogs/peace_huggingface_Spacy_nonNames.txt", "w") as file:
    for name, values in results.items():
        status = "True" if values[0] == 1.0 else "False"
        file.write(f'"{name}": {status}, {values[1]:.6f}\n')