In [1]:
import spacy
from transformers import pipeline, BertTokenizer, BertForQuestionAnswering

nlp = spacy.load("en_core_web_sm")
nlp


  from .autonotebook import tqdm as notebook_tqdm


<spacy.lang.en.English at 0x71cdb1fe3fd0>

In [2]:
text_generation_pipeline = pipeline("text-generation", model="gpt2")
text_generation_pipeline

<transformers.pipelines.text_generation.TextGenerationPipeline at 0x71cdb08cf340>

In [5]:
# Helper functions
def identify_main_verb(query):
    doc = nlp(query)
    for token in doc:
        if token.pos_ == "VERB":
            return token.text
    return None

def identify_subject(query):
    doc = nlp(query)
    for token in doc:
        if token.dep_ == "nsubj":
            return token.text
    return None

def match_subject_verb(subject, verb):
    # Simplified subject-verb agreement logic
    if subject and verb:
        if subject.endswith('s'):
            if not verb.endswith('s'):
                return verb + 's'
        else:
            if verb.endswith('s'):
                return verb[:-1]
    return verb

def extract_keywords(query):
    doc = nlp(query)
    keywords = [chunk.text for chunk in doc.noun_chunks]
    return keywords

def match_with_database(keywords):
    # Placeholder function to simulate keyword matching
    # In a real application, this would involve database queries
    return keywords

def generate_context(correct_verb_form, keywords):
    return f"Perform the action '{correct_verb_form}' on {', '.join(keywords)}."

def LLM_model_generate_response(context):
    # Generate response using Hugging Face text generation pipeline
    generated = text_generation_pipeline(context, max_length=50)
    return generated[0]["generated_text"]

def interpret_verb(query):
    # Use natural language processing (NLP) to identify the main verb
    verb = identify_main_verb(query)
    return verb

def subject_verb_agreement(subject, verb):
    # Ensure correct subject-verb agreement
    correct_verb_form = match_subject_verb(subject, verb)
    return correct_verb_form

def discover_keywords(query):
    # Extract key terms from the query
    keywords = extract_keywords(query)
    # Match keywords with database terminologies
    matched_keywords = match_with_database(keywords)
    return matched_keywords

def formulate_response(query):
    verb = interpret_verb(query)
    subject = identify_subject(query)
    correct_verb_form = subject_verb_agreement(subject, verb)
    keywords = discover_keywords(query)
    context = generate_context(correct_verb_form, keywords)
   
    response = LLM_model_generate_response(context)
    return response

In [6]:
user_query = "How do I create a new document?"
response = formulate_response(user_query)
response

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


"Perform the action 'create' on I, a new document.\n\n# I'm about to create a document with these two columns (in the format 'example.html'). write_content('Hello world ', document.write"