# Wikification

In [1]:
import json
import openai
import os
import sys

from dotenv import load_dotenv
from pprint import pprint

In [2]:
load_dotenv("azure.env")

openai.api_type: str = "azure"
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("OPENAI_API_VERSION")

In [3]:
model = "text-davinci-003"

In [4]:
def wikification(text, model, max_tokens=500):
    prompt = (
        """You will read a text, and you will detect named entities contained in it 
--- 
TEXT """
        + text
        + """  
---
Now you will connect the named entities to a corresponding Wikipedia Page. 
If there are several wikipedia pages corresponding, select wikipedia pages that have a logic connection

Finally you will return the result in a json format like the one below:

{
    "links" : [
      { 
        "wikipedia_page":"HERE PUT THE WIKIPEDIA PAGE URL",
        "wikipedia_page_title":"HERE PUT THE WIKIPEDIA PAGE TITLE",
        "text":"HERE PUT THE DETECTED TEXT"
      },
       ... ]
}

Answer:
"""
    )
    result = openai.Completion.create(
        prompt=prompt,
        temperature=0.8,
        max_tokens=max_tokens,
        engine=model,
    )

    results = json.loads(result["choices"][0]["text"].strip("\n").replace("\n", ""))

    return results

In [5]:
def print_wikification(results):
    """
    Print the wikification results
    """
    print("\033[1;31;32m")
    for link in results['links']:
        text = link['text']
        wikipedia_page = link['wikipedia_page']
        wikipedia_page_title = link['wikipedia_page_title']
    
        print(f'Text: {text}')
        print(f'Wikipedia Page: {wikipedia_page}')
        print(f'Wikipedia Page Title: {wikipedia_page_title}')
        print()

In [6]:
text1 = "The OpenAI API can be applied to virtually any task that requires understanding or \
generating natural language and code. The OpenAI API can also be used to generate and edit \
images or convert speech into text. We offer a range of models with different capabilities and \
price points, as well as the ability to fine-tune custom models."

In [7]:
res1 = wikification(text1, model=model)

pprint(res1)

{'links': [{'text': 'OpenAI API',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/OpenAI',
            'wikipedia_page_title': 'OpenAI'},
           {'text': 'natural language',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/Natural_language_processing',
            'wikipedia_page_title': 'Natural language processing'},
           {'text': 'generate and edit images',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/Computer_graphics',
            'wikipedia_page_title': 'Computer graphics'},
           {'text': 'convert speech into text',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/Speech_recognition',
            'wikipedia_page_title': 'Speech recognition'}]}


In [8]:
print_wikification(res1)

[1;31;32m
Text: OpenAI API
Wikipedia Page: https://en.wikipedia.org/wiki/OpenAI
Wikipedia Page Title: OpenAI

Text: natural language
Wikipedia Page: https://en.wikipedia.org/wiki/Natural_language_processing
Wikipedia Page Title: Natural language processing

Text: generate and edit images
Wikipedia Page: https://en.wikipedia.org/wiki/Computer_graphics
Wikipedia Page Title: Computer graphics

Text: convert speech into text
Wikipedia Page: https://en.wikipedia.org/wiki/Speech_recognition
Wikipedia Page Title: Speech recognition



In [9]:
text2 = "Microsoft plans event to share AI announcements for OneDrive"

In [10]:
res2 = wikification(text2, model=model)
pprint(res2)

{'links': [{'text': 'Microsoft',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/Microsoft',
            'wikipedia_page_title': 'Microsoft'},
           {'text': 'AI',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/AI',
            'wikipedia_page_title': 'AI'},
           {'text': 'OneDrive',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/OneDrive',
            'wikipedia_page_title': 'OneDrive'}]}


In [11]:
print_wikification(res2)

[1;31;32m
Text: Microsoft
Wikipedia Page: https://en.wikipedia.org/wiki/Microsoft
Wikipedia Page Title: Microsoft

Text: AI
Wikipedia Page: https://en.wikipedia.org/wiki/AI
Wikipedia Page Title: AI

Text: OneDrive
Wikipedia Page: https://en.wikipedia.org/wiki/OneDrive
Wikipedia Page Title: OneDrive



In [12]:
text3 = """Wikification is a fundamental task in natural language processing that involves linking \
named entities in a given text to their corresponding entries in a knowledge base, such as Wikipedia. \
In this notebook, we will explore how to leverage OpenAI's powerful language models to perform wikification \
using the LLM model.
Entity linking and annotation are crucial for a wide range of applications, including information retrieval, \
question answering systems, text summarization, and more. By linking entities to knowledge bases, \
we can enrich the understanding of text and facilitate further analysis.
Throughout this notebook, we will guide you through the process of setting up the environment, \
installing the necessary libraries, and implementing the steps involved in performing wikification \
with OpenAI's LLM model."""

In [13]:
res3 = wikification(text3, model=model, max_tokens=1000)
pprint(res3)

{'links': [{'text': 'Wikification',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/Wikification',
            'wikipedia_page_title': 'Wikification'},
           {'text': 'natural language processing',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/Natural_language_processing',
            'wikipedia_page_title': 'Natural language processing'},
           {'text': 'OpenAI',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/OpenAI',
            'wikipedia_page_title': 'OpenAI'},
           {'text': 'LLM model',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/Language_model',
            'wikipedia_page_title': 'Language model'},
           {'text': 'entity linking',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/Entity_linking',
            'wikipedia_page_title': 'Entity linking'},
           {'text': 'information retrieval',
            'wikipedia_page': 'https://en.wikipedia.org/wiki/Information_retrieval',
           

In [14]:
print_wikification(res3)

[1;31;32m
Text: Wikification
Wikipedia Page: https://en.wikipedia.org/wiki/Wikification
Wikipedia Page Title: Wikification

Text: natural language processing
Wikipedia Page: https://en.wikipedia.org/wiki/Natural_language_processing
Wikipedia Page Title: Natural language processing

Text: OpenAI
Wikipedia Page: https://en.wikipedia.org/wiki/OpenAI
Wikipedia Page Title: OpenAI

Text: LLM model
Wikipedia Page: https://en.wikipedia.org/wiki/Language_model
Wikipedia Page Title: Language model

Text: entity linking
Wikipedia Page: https://en.wikipedia.org/wiki/Entity_linking
Wikipedia Page Title: Entity linking

Text: information retrieval
Wikipedia Page: https://en.wikipedia.org/wiki/Information_retrieval
Wikipedia Page Title: Information retrieval

Text: question answering systems
Wikipedia Page: https://en.wikipedia.org/wiki/Question_answering
Wikipedia Page Title: Question answering

Text: text summarization
Wikipedia Page: https://en.wikipedia.org/wiki/Text_summarization
Wikipedia Page