#### Prompt Classification

In [1]:
# Importing necessary libraries
import torch  # For tensor computations
from transformers import AutoTokenizer, AutoModelForSequenceClassification  # To load the tokenizer and model for prompt classification

# Loading the pre-trained tokenizer and model for prompt classification
tokenizer = AutoTokenizer.from_pretrained("rishika0704/promptClassifcation")  # Load the tokenizer for tokenizing the input prompt
model = AutoModelForSequenceClassification.from_pretrained("rishika0704/promptClassifcation")  # Load the pre-trained classification model

# Defining the possible class labels for prompt classification
class_labels = ["description", "explanation", "definition", "comparison"]  # Class labels corresponding to the model's output classes

# Function to classify the input prompt
def classify_prompt(prompt):
    # Tokenizing the input prompt into tensors (input format for the model)
    # 'pt' ensures PyTorch tensors are used, 'padding' and 'truncation' ensure the prompt fits the required input size
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
    
    # Disabling gradient calculation as we are in inference mode (no training)
    with torch.no_grad():
        # Passing the tokenized inputs through the model to get predictions (logits)
        outputs = model(**inputs)
    
    # Extracting the raw logits (un-normalized scores) from the model's output
    logits = outputs.logits
    
    # Finding the index of the highest score (predicted class) from the logits
    predicted_class = torch.argmax(logits, dim=1).item()
    
    # Returning the class label corresponding to the predicted index
    return class_labels[predicted_class]

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# testing the above code 
prompt = input("Enter a prompt : ")
prompt_classification = classify_prompt(prompt)
print(f"The classification of the prompt is '{prompt_classification}'.")

The classification of the prompt is 'definition'


#### Extracting the keyword from the prompt

In [33]:
from rake_nltk import Rake
from nltk.corpus import stopwords

# Initialize RAKE with NLTK stop words for English
rake = Rake(stopwords=stopwords.words('english'))

# Define a list of words/terms to filter out if they appear in the extracted keywords
filter_words = ["explain", "describe", "define", "definition", "difference"]

def extract_keywords(prompt):
    """
    Extract keywords from the given prompt using RAKE, and filter out any keywords
    that contain the terms like 'explain', 'describe', 'define', etc.
    """
    
    # Step 1: Use RAKE to extract keywords from the text
    rake.extract_keywords_from_text(prompt)
    
    # Step 2: Get the ranked keywords/phrases extracted by RAKE
    keywords = rake.get_ranked_phrases()

    # Step 3: Filter out any keywords that contain unwanted words
    # Check if any filter word (from filter_words) appears in the keyword, ignoring case
    filtered_keywords = [kw for kw in keywords if not any(fw in kw.lower() for fw in filter_words)]
    
    # Step 4: Return the filtered list of keywords
    return filtered_keywords


In [35]:
prompt = "Explain me the difference between natural language processing and aritificial intelligence"
keywords = extract_keywords(prompt)
print(f"Keywords from the prompt : {keywords}")

Keywords from the prompt : ['natural language processing', 'aritificial intelligence']


#### Web scraping for the relevant words

In [64]:
import wikipediaapi
from bs4 import BeautifulSoup
import requests

def get_wikipedia_url(keyword):
    """
    Fetch the Wikipedia URL for a given keyword using the Wikipedia API.
    
    Args:
        keyword (str): The keyword to search for on Wikipedia.
        
    Returns:
        str: The full URL of the Wikipedia page, or None if the page does not exist.
    """
    # Create a Wikipedia API object
    wiki_wiki = wikipediaapi.Wikipedia("MyProject", 'en')
    # Fetch the Wikipedia page for the keyword
    page = wiki_wiki.page(keyword)

    # Check if the page exists
    if page.exists():
        return page.fullurl
    else:
        return None


def scrape_wikipedia_terms(keyword):
    """
    Scrape headings from the Wikipedia page of a given keyword.
    
    Args:
        keyword (str): The keyword to scrape from Wikipedia.
        
    Returns:
        list: A list of relevant headings from the Wikipedia page.
    """
    # Get the Wikipedia URL for the given keyword
    url = get_wikipedia_url(keyword)

    # If no valid URL is found, return an empty list
    if url is None:
        return []

    # Fetch the page content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all headings (h2 tags in this case)
    paragraphs = soup.find_all('h2')

    # Extract text from headings and filter out irrelevant ones
    keywords = [para.get_text() for para in paragraphs if para.get_text() not in ['Overview', 'Contents', 'See also', 'References', 'External links', 'Further reading']]

    return keywords


def get_keywords_for_all(keywords):
    """
    Find keywords for all given keywords and aggregate them into a single list.
    
    Args:
        keywords (list): A list of keywords to scrape.
        
    Returns:
        list: A combined list of keywords scraped from Wikipedia pages.
    """
    all_keywords = []  # Initialize a list to hold all keywords

    # Iterate over each keyword and scrape terms
    for keyword in keywords:
        scraped_keywords = scrape_wikipedia_terms(keyword)  # Get keywords for the current keyword
        all_keywords.extend(scraped_keywords)  # Add the scraped keywords to the overall list
    all_keywords = list(set(all_keywords))
        # print(all_keywords)
    return all_keywords

In [65]:
prompt = "Explain me the difference between natural language processing and artificial intelligence"
keywords = extract_keywords(prompt)
keywordsFromNet = get_keywords_for_all(keywords)
print(f"All the keywords from the internet : {keywordsFromNet}")


#### Refining the prompt in a manual way

In [71]:
import google.generativeai as genai
import requests 

def generate_refined_prompt(prompt, classification, keywords, related_terms):
    """
    Generate a refined prompt based on the classification, keywords, and related terms.

    Args:
        prompt (str): The initial user prompt.
        classification (str): The classification of the prompt (e.g., description, explanation).
        keywords (list): List of extracted keywords.
        related_terms (list): List of related terms from Wikipedia.

    Returns:
        str: A refined prompt.
    """
    # Join all keywords into a string for use in the prompt
    all_keywords = ', '.join(keywords)

    if len(related_terms) == 0:
        # Generate prompts based on classification without related terms
        if classification == "description":
            return f"Can you describe in detail {all_keywords}?"
        elif classification == "explanation":
            return f"Can you explain how {all_keywords} work?"
        elif classification == "definition":
            return f"Please provide clear definitions of {all_keywords}."
        elif classification == "comparison":
            return f"Can you compare {all_keywords}?"
    else:
        # Generate prompts based on classification with related terms
        if classification == "description":
            return f"Can you describe in detail {all_keywords} and their related terms: {', '.join(related_terms)}?"
        elif classification == "explanation":
            return f"Can you explain how {all_keywords} work and their relationships to {', '.join(related_terms)}?"
        elif classification == "definition":
            return f"Please provide clear definitions of {all_keywords}, including {', '.join(related_terms)}."
        elif classification == "comparison":
            return f"Can you compare {all_keywords} with {', '.join(related_terms)}?"


In [75]:
manual_prompt = generate_refined_prompt(prompt, prompt_classification, keywords, keywordsFromNet)
print(f"Manual Prompt : \n{manual_prompt}")

Manual Prompt : 
Please provide clear definitions of natural language processing, artificial intelligence, including Applications, In fiction, Explanatory notes, Common NLP tasks, Future, Ethics, Philosophy, Goals, General tendencies and (possible) future directions, Approaches: Symbolic, statistical, neural networks, History, Techniques.


#### Prompt refining from Gemini

In [73]:
# Configure the Google Generative AI with the provided API key
genai.configure(api_key="AIzaSyD0O-LCb0wL4Q7ESz13IM_gXfNaYuxm_yw")

def send_to_gemini(initial_prompt, extracted_keywords):
    """
    Send the initial prompt and extracted keywords to the Gemini API for refinement.

    Args:
        initial_prompt (str): The initial user prompt.
        extracted_keywords (list): The list of keywords extracted from the prompt.

    Returns:
        str: The refined prompt received from the Gemini API or an error message.
    """
    try:
        # Initialize the Gemini model
        model = genai.GenerativeModel('gemini-pro')
        response = model.generate_content(f"""You are a prompt engineer; your task is to refine and enhance the initial prompt so that the output of the refined prompt should be better while staying true to the intent of the initial prompt. Use the important keywords to refine the initial prompt. 
                                            initial_prompt = {initial_prompt}
                                            extracted_keywords = {extracted_keywords}
                                            refined_prompt =""")
        return response.text
    except requests.exceptions.RequestException as e:
        # Handle API call errors
        return "Error occurred while contacting the Gemini API."

In [74]:
gemini_prompt = send_to_gemini(prompt, keywordsFromNet)
print(f"Gemini Prompt : \n{gemini_prompt}")

Gemini Prompt : 
Provide a detailed explanation of the key differences between natural language processing (NLP) and artificial intelligence (AI), highlighting their applications, common tasks, and future directions.
