In [1]:
!unzip /content/Model_Test.zip

Archive:  /content/Model_Test.zip
   creating: news_stat_ui/
   creating: news_stat_ui/fake/
  inflating: news_stat_ui/fake/best.ckpt  
  inflating: news_stat_ui/fake/template.txt  
 extracting: news_stat_ui/fake/verbalizer.txt  
  inflating: news_stat_ui/main.py    
   creating: news_stat_ui/sentim/
  inflating: news_stat_ui/sentim/best.ckpt  
  inflating: news_stat_ui/sentim/template.txt  
  inflating: news_stat_ui/sentim/verbalizer.txt  
   creating: news_stat_ui/topic_class/
  inflating: news_stat_ui/topic_class/best.ckpt  
  inflating: news_stat_ui/topic_class/template.txt  
  inflating: news_stat_ui/topic_class/verbalizer.txt  
   creating: news_stat_ui/topic_gen/
  inflating: news_stat_ui/topic_gen/best.ckpt  
  inflating: news_stat_ui/topic_gen/template.txt  
  inflating: news_stat_ui/topic_gen/verbalizer.txt  


In [2]:
!pip install dill
!pip install gradio
!pip install openprompt
!pip install sentence-transformers
!pip install transformers==4.19.0

Collecting dill
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Downloading dill-0.3.9-py3-none-any.whl (119 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/119.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.4/119.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dill
Successfully installed dill-0.3.9
Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading Mark

In [18]:
#!/usr/bin/env python3

import dill
import gradio as gr
import requests
import torch

from bs4 import BeautifulSoup
from openprompt import PromptForClassification, PromptForGeneration, PromptDataLoader
from openprompt.data_utils import InputExample
from openprompt.plms import load_plm
from openprompt.prompts  import MixedTemplate, SoftTemplate, ManualVerbalizer
from os import path

def main():
    with gr.Blocks() as demo:
        gr.Markdown("## News Analysis Tool")

        with gr.Row():
            with gr.Column(scale=1):
                url_input = gr.Textbox(placeholder="Paste a news article URL here", label="News URL", lines=1)
                scrape_button = gr.Button("Start Reading")

        with gr.Row():
            with gr.Column(scale=1):
                output_text = gr.Textbox(label="Scraped Text", lines=5)
                analyze_button = gr.Button("Start Analyzing")

        with gr.Row():
            with gr.Column(scale=1):
                sentiment_output = gr.Textbox(label="Sentiment Scores", lines=2, interactive=False)
                fake_news_output = gr.Textbox(label="Fake News Scores", lines=2, interactive=False)
                topic_class_output = gr.Textbox(label="Topics (Classification)", lines=2, interactive=False)
                #topic_gen_output = gr.Textbox(label="Topics(Generation)", lines=2, interactive=False)

        scrape_button.click(
            fn=scrape_text_from_url,
            inputs=[url_input],
            outputs=[output_text]
        )

        analyze_button.click(
            fn=run_analyze_onclick,
            inputs=[output_text],
            outputs=[sentiment_output, fake_news_output, topic_class_output] #, topic_gen_output]
        )

    #demo.launch(debug=True)
    demo.launch()


def run_analyze_onclick(text: str):
    sentiment_score = analyze_sentiment(text)
    realness_score = analyze_realness(text)
    top_topic_class = class_topic(text)
    #topic_gen = class_gen(text)

    return sentiment_score, realness_score, top_topic_class #, topic_gen

def scrape_text_from_url(url):
    # """Fetches and extracts the main content text from a URL."""
    # try:
    #     response = requests.get(url)
    #     soup = BeautifulSoup(response.text, 'html.parser')
    #     return soup.get_text()
    # except Exception as e:
    #     return str(e)
    """Fetches and extracts the main content text from a URL."""
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        paragraphs = soup.find_all('p')
        return ' '.join(p.text for p in paragraphs)
    except Exception as e:
        return str(e)

def analyze_sentiment(text: str) -> list[float]:
    prompt_model, tokenizer, wrapper_class, template = load_prompt_model('./news_stat_ui/sentim','Classification',True)
    inputs = custom_tokenizer_wrapper(text, tokenizer, wrapper_class, template)

    prompt_model.eval()
    logits = prompt_model(inputs)
    results = torch.softmax(logits, dim=1).squeeze().tolist()

    with open('./news_stat_ui/sentim/verbalizer.txt', "r") as f:
        class_labels = f.read().splitlines()

    max_index = results.index(max(results))
    top_label = class_labels[max_index]
    top_probability = round(results[max_index] * 100, 2)

    return f"{top_label}: {top_probability}%"

def analyze_realness(text: str) -> list[float]:
    prompt_model, tokenizer, wrapper_class, template = load_prompt_model('./news_stat_ui/fake','Classification',True)
    inputs = custom_tokenizer_wrapper(text, tokenizer, wrapper_class, template)

    prompt_model.eval()
    logits = prompt_model(inputs)
    results = torch.softmax(logits, dim=1).squeeze().tolist()

    with open('./news_stat_ui/fake/verbalizer.txt', "r") as f:
        class_labels = f.read().splitlines()

    max_index = results.index(max(results))
    top_label = class_labels[max_index]
    top_probability = round(results[max_index] * 100, 2)

    return f"{top_label}: {top_probability}%"

def class_gen(text: str) -> str:

    prompt_model, tokenizer, wrapper_class, template = load_prompt_model('./news_stat_ui/topic_gen', 'Generation', False)

    inputs = custom_tokenizer_wrapper(text, tokenizer, wrapper_class, template)
    prompt_model.eval()

    with torch.no_grad():
        generated_tokens = prompt_model.generate(inputs, max_length=50, num_return_sequences=1)

    print("Generated Tokens:", generated_tokens)
    if isinstance(generated_tokens, list) and len(generated_tokens) > 0:
        numerical_tokens = generated_tokens[0]
        if isinstance(numerical_tokens, list):
            generated_text = tokenizer.decode(numerical_tokens, skip_special_tokens=True)
        else:
            generated_text = "Error: Invalid token format"
    else:
        generated_text = "Error: No output generated"
    print("Decoded Text:", generated_text)
    return generated_text


def class_topic(text: str) -> str:
    prompt_model, tokenizer, wrapper_class, template = load_prompt_model('./news_stat_ui/topic_class','Classification',False)

    inputs = custom_tokenizer_wrapper(text, tokenizer, wrapper_class, template)

    prompt_model.eval()
    logits = prompt_model(inputs)
    probabilities = torch.softmax(logits, dim=1).squeeze().tolist()

    with open('./news_stat_ui/topic_class/verbalizer.txt', "r") as f:
        class_labels = f.read().splitlines()

    max_index = probabilities.index(max(probabilities))
    top_topic = class_labels[max_index]
    top_probability = round(probabilities[max_index] * 100, 2)

    return f"{top_topic}: {top_probability}%"

def custom_tokenizer_wrapper(txt: str, tokenizer, tokenizer_wrapper_class, template, decoder_max_length=128, batch_size=1):
    input_example = [InputExample(text_a=txt), ]

    data_loader = PromptDataLoader(
        dataset=input_example,
        tokenizer=tokenizer,
        tokenizer_wrapper_class=tokenizer_wrapper_class,
        template=template,
        decoder_max_length=decoder_max_length,
        batch_size=batch_size,
    )

    return list(data_loader)[0]

def load_prompt_model(dir_path ,task ,mode ,template_type='mixed' ,model_name='t5', model_path='t5-small'):
    plm, tokenizer, model_config, wrapper_class = load_plm(model_name, model_path)

    template_constructors = {
        'mixed' : MixedTemplate,
        'soft' : SoftTemplate,
    }

    template = template_constructors[template_type](
        model=plm,
        tokenizer=tokenizer,
    ).from_file(path.join(dir_path, 'template.txt'), choice=0)

    with open(path.join(dir_path, 'verbalizer.txt'), "r") as f:
        class_labels = ' '.join(f.readlines()).split()

    if(task == 'Classification'):
      verbalizer = ManualVerbalizer(
          tokenizer = tokenizer,
          classes = class_labels,
      ).from_file(path.join(dir_path, 'verbalizer.txt'), choice=0)

      prompt_model = PromptForClassification(
          template = template,
          plm = plm,
          verbalizer = verbalizer,
      )
    '''
    elif(task == 'Generation'):
      prompt_model = PromptForGeneration(
          template = template,
          plm = plm,
      )
    '''

    checkpoint = torch.load(path.join(dir_path, 'best.ckpt'), pickle_module=dill, map_location= "cpu", weights_only=False)
    prompt_model.load_state_dict(checkpoint['state_dict'], strict = mode)

    return prompt_model, tokenizer, wrapper_class, template

if __name__ == "__main__":
    main()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://eece5ae2a5f507b3b4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
