In [None]:
# Complete interaction trough GUI

import llm_class
import model_inference
import prompts

from constants import PHI_MODEL_NAME, EMBEDDING_MODEL_NAME
from datetime import datetime
from embedding import Embedder
from model_eval import ModelEval
from scraper import Scraper
from searching import PageSearch
from webpage import Webpage

import gradio as gr
import pandas as pd

model = llm_class.LanguageModel()
inference = model_inference.ModelInferencing(model)


# Function to update the interface components
def select_webpage(webpage_url, page_num):
    pages = Scraper.scrape_from_link(webpage_url, page_num)
    new_pages = []

    for i in range(len(pages)):
        print(pages[i].link)
        try:
            text = pages[i].get_text()
            phrases = inference.get_multiple_phrases(text)
            emb_phrases = Embedder().get_embedding(phrases)  # Use instance method
            pages[i].set_embedding(emb_phrases)
            print(phrases)
        except Exception as e:
            print(f"Page skipped due to error: {e}")
            continue
        new_pages.append(pages[i])
    
    page_collection = PageSearch(new_pages)
    
    return (
        gr.update(visible=False),
        gr.update(visible=False),
        gr.update(interactive=False),
        gr.update(visible=True),
        gr.update(visible=True),
        page_collection
    )

def query_webpage(query, page_collection):
    results = page_collection.search_pages(query, inference)
    results.reverse()
    links = []
    scores = []
    for item in results:
        links.append(item['page'].link)
        scores.append(item['relevance'])
    
    df = pd.DataFrame.from_dict({'links': links, 'scores': scores})
    return df

# Initial setup of the Gradio interface
with gr.Blocks() as demo:
    # Initial state
    text = gr.Textbox(label="Webpage url", interactive=True, visible=True)
    page_num = gr.Slider(
        label="Number of webpages to extract",
        value=1,
        interactive=True,
        minimum=1,
        maximum=100,
        step=1
    )
    button = gr.Button("Scrape the website", visible=True)

    text2 = gr.Textbox(label="Search box", interactive=True, visible=False)
    button2 = gr.Button("Search pages", visible=False)
    result_df = gr.Dataframe(
        row_count=(20, "fixed"),
        col_count=(2, "fixed"),
        label="Most relevant pages",
        headers=['links', 'scores']
    )
    
    # State to keep track of webpages
    page_collection = gr.State(None)

    # Click event to update components
    button.click(
        fn=select_webpage,
        inputs=[text, page_num],
        outputs=[page_num, button, text, text2, button2, page_collection]
    )
    
    button2.click(
        fn=query_webpage,
        inputs=[text2, page_collection],
        outputs=[result_df]
    )

demo.launch(share=True)

In [None]:
# Init

import keyphrase_dataset
import llm_class
import model_inference
import output_parser
import phrase_extraction_evaluation
import prompts

from datetime import datetime
from constants import PHI_MODEL_NAME, EMBEDDING_MODEL_NAME
from model_eval import ModelEval

import gradio as gr

model = llm_class.LanguageModel()
keyphrase_set = keyphrase_dataset.KeyphraseDataset()
inference = model_inference.ModelInferencing(model)


In [None]:


def gradio_func(text_prompt: str, samples: int, experiment_name: str) -> str:
    """
    Gradio function to run multiple samples and evaluate them.
    Args:
        text_prompt (str): The text prompt for generating key phrases.
        samples (int): The number of samples to test.
        experiment_name (str): The name of the experiment.
    Returns:
        str: The average metrics from the evaluation.
    """
    prompts.multiple_keyphrases_prompt = text_prompt
    tests = keyphrase_set.get_samples(samples)
    results = ModelEval.multiple_samples(inference, tests, save_file=experiment_name, print_counts=True)

    return (
        f"Avg cosine: {results['cosine']} | "
        f"Avg labels matched: {results['matchings']} | "
        f"Avg redundancy: {results['redundancy']} | "
        f"Avg groundness: {results['groundness']}"
    )


# Create the Gradio interface for prompt testing
interface = gr.Interface(
    fn=gradio_func,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your text prompt here...", label="Text Prompt"),
        gr.Slider(minimum=1, maximum=500, step=1, label="Number of tests"),
        gr.Textbox(lines=2, placeholder="Enter your experiment name here...", label="Experiment name"),
    ],
    outputs="text",
    title="Prompt Testing Interface",
)

# Launch the interface
interface.launch(share=True)

In [None]:
# Debug

sample=keyphrase_set.train_set[1]

#print(model.prompt_text)

print(sample['text'])
print("\n")
print(sample['label'])
print("\n")
print(get_metrics(sample['text'],sample['label']))

In [11]:
# Debug

model.set_prompt("""You are provided with the text extracted from a webpage, delimited by < for start and > for end. Your task is to extract the key phrases from the text that best characterize the webpage. You should extract at most 10 such phrases, but may extract less. Ensure the key phrases are relevant and provide a good summary of the content. Present the key phrases in JSON format, with each key phrase being an item in a list. Do not output anything but json of extracted keyphrases.

Example webpage to extract from: <&>

Your response should look like this:&

The text from the webpage: <&>""")

In [None]:
def visualize(text: str) -> str:
    """
    Gradio function to extract and visualize key phrases from the given text.
    Args:
        text (str): The input text to extract key phrases from.
    Returns:
        str: The extracted key phrases formatted for display.
    """
    formatted_phrases = inference.get_multiple_phrases(text)
    output_text = "Key phrases in the text are:\n- " + "\n- ".join(formatted_phrases)
    
    return output_text


# Create the Gradio interface for key phrase extraction
interface = gr.Interface(
    fn=visualize,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your text prompt here...", label="Text to extract the keywords from"),
    ],
    outputs="text",
    title="Key Phrase Extractor",
)

# Launch the interface
interface.launch(share=True)

In [None]:
import gradio as gr

def visualize2(text: str) -> str:
    """
    Gradio function to extract and visualize key phrases from the given text.
    Args:
        text (str): The input text to extract key phrases from.
    Returns:
        str: The extracted key phrases formatted for display.
    """
    formatted_phrases = inference.get_search_phrases(text)
    output_text = "Search phrases in the text are:\n- " + "\n- ".join(formatted_phrases)
    
    return output_text


# Create the Gradio interface for search phrase extraction
interface = gr.Interface(
    fn=visualize2,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your text prompt here...", label="Text to extract the keywords from"),
    ],
    outputs="text",
    title="Key Phrase Extractor",
)

# Launch the interface
interface.launch(share=True)