In [1]:
# Used try method to check if the response of chatgpt is a dictionary format
# Used spacy to extract NER in case the chat gpt response are not in dictionary format
# Also used spacy if chat gpt returns and an empty list for all named entities 
# for ex 'PERSON': [], 'ORG': [], 'GPE': [], 'EVENTS': [], 'DATE': [], 'LAW': []
# as I encountered a case once.


from llama_index.llms import OpenAI as LlamaOpenAI
import spacy
import ast

def perform_spacy_ner(input_string):
    # Initialize spaCy NER model
    NER = spacy.load("en_core_web_sm")

    # Process the input string with spaCy NER
    ner = NER(input_string)
    exclude_entities = [ "TIME", "PERCENT", "MONEY", "QUANTITY", "ORDINAL", "CARDINAL"]

    named_entities=[]
    for ent in ner.ents:
        if ent.label_ not in exclude_entities:
            tup=(ent.label_, ent.text )
            named_entities.append(tup)
    
    return named_entities


def response_parse(data: dict) -> list:
    """
    Takes in the dictionary from OpenAI Response.
    Note - Each Key in this dictionary is a list of word.
    Output - List of tuples, Each tuple is a entity and word pair.
    """
    entities = []

    # Check if all lists in the dictionary are empty
    if all(not data[key] for key in data):
        return perform_spacy_ner(data)
    
    return [(key, item) if data[key] is not None else (key, None) for key in data for item in data[key]]


def run_llama_ner_task(input_text:str, labels, model:str='gpt-3.5'):
    llm=LlamaOpenAI(temperature=0, model=model)
    response= llm.complete(
        f"""You are an expert in Natural Language Processing. Your task is to identify common Named Entities (NER) in a given text: {input_text}.
        The possible common Named Entities (NER) types are exclusively: ({", ".join(labels)}).
        --------
        Follow the Entity defination as below:
        1. PERSON: Short name or full name of a person from any geographic regions.
        2. ORG: Organizations, Companies, Agencies or Institutions.
        3. GPE: Geopolitical entities like name of countries, continents, cities, states, district.
        4. EVENTS: Scientific milestones, historical events, future events.
        5. DATE: Any format of dates. Dates can also be in natural language.
        6. LAW : Mention of any article, treaty, law, agenda, roadmap.
        --------
        Output Format:
        {{"PERSON": [list of entities present], "ORG": [list of entities present], "GPE": [list of entities present], "EVENTS": [list of entities present], "DATE": [list of entities present], "LAW": [list of entities present]}}
        If no entities are presented in any categories keep it an empty list.
        Your output response MUST only contain a Dictionary object as specified in output format. If it is not in the format strip the string until you encounter a dictionary in the format specified.\n
        --------
        EXAMPLE:
        Text: 'To this end, internet restrictions should be considered not merely as technical measures affecting the logical or physical layers of the networks, but also as a tool used to impair the means of expression of individuals (see below, Avoid internet fragmentation). Therefore, internet shutdowns and content blocking may constitute violations of the principles contained in several international human rights instruments, in particular article 19 (right to freedom of expression, the scope of which is clarified by General Comment 34 of the Human Rights Committee) and article 21 (right of assembly) of the International Covenant on Civil and Political Rights (ICCPR). Regional conventions for the protection of human rights generally include similar provisions.
        This perspective is supported by the practice of several organs and international institutions, notably resolutions adopted by the Human Rights Council on 'The promotion, protection and fulfillment of human rights on the Internet' (A/HRC/32/L.20, June 27th, 2016). The Council condemns measures that seek to ' intentionally prevent or disrupt access to or dissemination of information online in violation of international human rights law, and calls upon all States to refrain from and cease such measures ', relying on a substantial number of secondary legislation not specifically related to the issue of Internet access. Several UN Special Rapporteurs have recognized the fundamental nature of the right of access to the Internet through the prism of freedom of expression or the right to peaceful assembly. However, this right of access to the internet should not be construed as a duty of States to provide access without any charge.'
        {{"PERSON": [], "ORG": ["Human Rights Committee", "International Covenant on Civil and Political Rights (ICCPR)", "Human Rights Council", "UN", "Special Rapporteurs"], "GPE": ["States"],  "EVENTS": [], "DATE": ["June 27th, 2016"], "LAW": ["article 19 (right to freedom of expression", "article 21 (right of assembly)", "A/HRC/32/L.20", "international human rights law"] }}
        
        """
    )
    return response


if __name__ == "__main__":

    from dotenv import load_dotenv, find_dotenv

    # needs an openai api key
    load_dotenv(find_dotenv())

    input_text = """
        
    Under The Common Agenda, the UN Secretary General called for a Global Digital Compact to be agreed at the Summit of the Future (September 2024) to “outline 
    shared principles for an open, free and secure digital future for all.” Ensuring that 
    women’s rights organizations in all their diversity feed into this process is critical to 
    shape a vision of digital cooperation and governance based on human rights and 
    feminist principles.  In his report to UNGA77, on the Intensification of efforts to eliminate all forms of violence against women and girls, the Secretary General proposed a high level consultation on violence against women and girls in digital 
    contexts to inform the Global Digital Compact.
        """

    LABELS = [
        "PERSON",   
        "ORG",         
        "GPE",        
        "EVENTS",     
        "DATE",      
        "LAW",        
    ]

    NER_openai_llama=run_llama_ner_task(input_text, LABELS, "gpt-4-1106-preview")
    
    try:
        extracted_dict = ast.literal_eval(NER_openai_llama.text)
        
        if isinstance(extracted_dict, dict):
            result= response_parse(extracted_dict)
            
        else:
            raise ValueError("Could not extract a valid dictionary using eval.")
    except (SyntaxError, ValueError) as e:
        print(f"Error with eval: {e}")
        result= perform_spacy_ner(input_string)
    
    print(NER_openai_llama)

    print(result)

{"PERSON": ["UN Secretary General"], "ORG": ["UNGA77"], "GPE": [], "EVENTS": ["The Common Agenda", "Summit of the Future", "Global Digital Compact"], "DATE": ["September 2024"], "LAW": []}
[('PERSON', 'UN Secretary General'), ('ORG', 'UNGA77'), ('EVENTS', 'The Common Agenda'), ('EVENTS', 'Summit of the Future'), ('EVENTS', 'Global Digital Compact'), ('DATE', 'September 2024')]
