In [2]:
import os
import requests
from bardapi import BardCookies
from sydney import SydneyClient
from dotenv import load_dotenv
import markdown
import re
import random
import string
import nltk

nltk.download('punkt')

load_dotenv()

[nltk_data] Downloading package punkt to /Users/miguel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
models = [
    ('summarize', 'facebook/bart-large-cnn'),
    ('summarize', 'google/pegasus-large'),
    ('summarize', 'marianna13/flan-t5-base-summarization'),
    ('toxic', 's-nlp/roberta_toxicity_classifier'),
    ('toxic', 'citizenlab/distilbert-base-multilingual-cased-toxicity', 'inputs'),
    ('toxic', 'martin-ha/toxic-comment-model'),
    ('spam', 'rafacost/bert_base_pt_en_cased_email_spam'),
    ('spam', 'h-e-l-l-o/email-spam-classification-merged'),
    ('spam', 'dima806/email-spam-detection-roberta'),
    ('translate', 't5-base'),
    ('translate', 'allenai/wmt16-en-de-12-1'),
    ('translate', 'facebook/wmt19-en-de'),
    ('fillmask', 'bert-base-uncased', '[MASK]'),
    ('fillmask', 'vinai/bertweet-base', '<mask>'),
    ('fillmask', 'roberta-base', '<mask>'),
]

In [4]:
def request_to_model(model, input):

    BASE_URL = "https://api-inference.huggingface.co/models/"
    headers = {"Authorization": f"Bearer {os.getenv('HUGGING_FACE_API_KEY')}"}
    new_input = input
    parameters = {}

    if model[0] == 'fillmask':
        new_input = input.replace('<mask>', model[2])
    elif model[0] == 'translate':
        parameters = {"src_lang": "en_XX", "tgt_lang": "tgt_XX"}

    query = {"inputs": new_input, "wait_for_model": True, "parameters":parameters}

    try:
        response = requests.post(BASE_URL + model[1], json=query, headers=headers)
    except Exception as exception:
        print(exception)
    
    return response.json()

In [9]:
res = request_to_model(models[10], "The tower is 324 metres (1,063 ft) tall") # "The goal of life is <mask>."
print(res)

{'error': 'Model allenai/wmt16-en-de-12-1 is currently loading', 'estimated_time': 20.0}


In [5]:
def convert_to_plain_text(input_text):
    input_text = re.sub(r'\[\^\d+\^\]', '', input_text)
    plain_text = markdown.markdown(input_text, output_format='html')
    plain_text = re.sub(r'<[^>]*>\s*', '', plain_text)
    return plain_text

In [6]:
async def request_to_bing(question, type="q&a"):

    prompt = ""

    if (type == "q&a"):
        prompt = "Answer me the following question in plain text without using quotes: "
    elif (type == "change_order"):
        prompt = "Change the order of the next sentence: "
    elif (type == "replace_word_synonyms"):
        prompt = "Use synonyms to change the words in the sentence, without changing the meaning. Give me back just one sentence: "
    elif (type == "replace_word_antonyms"):    
        prompt = 'Use antonyms to change the meaning of the sentence. Give me back just one sentence between "": '
    elif (type == "replace_sentences"):    
        prompt = "Replace one of the sentences with another sentence that has nothing to do with the context. Return me as a result what the text would look like after the transformation. Give me back the result between {}: "
    async with SydneyClient() as sydney:
        response = await sydney.ask(prompt + question, citations=False)
        response = convert_to_plain_text(response)
        return response

In [9]:
response = await request_to_bing("When was Bing Chat released?")
print(response)

Bing Chat was released on February 8th, 2023.


In [16]:
response = await request_to_bing("How many people live in Berlin? I live in Spain", "change_order")
print(response)

To change the order of the sentence "How many people live in Berlin? I live in Spain", you can simply swap the positions of the two clauses to get "I live in Spain. How many people live in Berlin?".


In [7]:
def request_to_bard(question):

    cookie_dict = {
        "__Secure-1PSID": os.getenv('SECURE_1PSID'),
        "__Secure-1PSIDTS": os.getenv('SECURE_1PSIDTS'),
        "__Secure-1PSIDCC": os.getenv('SECURE_1PSIDCC'),
    }

    bard = BardCookies(cookie_dict=cookie_dict)
    response = bard.get_answer(question)['content']
    return response

In [None]:
print(request_to_bard("When was Bard released?"))

### Funciones auxiliares

### 1. Perturbaciones

#### 1.1. Perturbación a nivel de caracteres

##### 1.1.1. Borrado de caracteres

In [11]:
def delete_characters(input, level):
    if level < 1 or level > 10:
        return "Level must be between 1 and 10."

    characters = list(input)
    indexes_to_delete = [i for i in range(len(characters)) if characters[i] in string.ascii_letters]
    num_characters_to_delete = int(len(indexes_to_delete) * level / 20)
    indexes_to_delete = random.sample(indexes_to_delete, num_characters_to_delete)

    for i in sorted(indexes_to_delete, reverse=True):
        del characters[i]

    return ''.join(characters)

In [12]:
# Example of use
input_text = "This is an example string."
level = 3
res = delete_characters(input_text, level)
print(res)

Thi  an example string.


##### 1.1.2. Reemplazo de palabras por sinónimos

In [13]:
async def replace_words_with_synonyms(input):
    response = await request_to_bing(input, "replace_word_synonyms")
    response = response.split(':')[1].strip().replace('"', '').replace("'", '')
    return response

In [14]:
# Example of use
input_text = "This is an example string."
res = await replace_words_with_synonyms(input_text)
print(res)

The following is a sample text.


##### 1.1.3. Reemplazo de palabras por antónimos

In [26]:
async def replace_words_with_antonyms(input):
    response = await request_to_bing(input, "replace_word_antonyms")
    response = response.split(':')[1].strip().replace('"', '').replace("'", '')
    return response

In [29]:
# Example of use
input_text = "This exercise is very easy."
res = await replace_words_with_antonyms(input_text)
print(res)

This exercise is extremely difficult.


#### 1.2. Perturbación a nivel de oraciones

##### 1.2.1. Reemplazo de oraciones

In [18]:
async def replace_sentences(input):
    response = await request_to_bing(input, "replace_sentences")
    response = response.split('{')[1].strip().replace('}', '')
    return response

In [19]:
# Example of use
input_text = "This exercise is very easy. I will do it in 5 minutes. I am very happy."
res = await replace_sentences(input_text)
print(res)

The cat is sleeping on the couch. This exercise is very easy. I am very happy.


##### 1.2.2. Eliminar oración

In [15]:
def delete_sentences(input_text, level):
    if level < 1 or level > 10:
        return "El nivel debe estar entre 1 y 10."

    sentences = nltk.sent_tokenize(input_text)
    num_sentences_to_delete = int(len(sentences) * level / 20)

    if num_sentences_to_delete >= len(sentences):
        return "The level is too high and the text would be empty. Try a lower level."

    indexes_to_delete = random.sample(range(len(sentences)), num_sentences_to_delete)
    sentences_to_keep = [sentences[i] for i in range(len(sentences)) if i not in indexes_to_delete]
    
    res = ' '.join(sentences_to_keep)

    return res

In [17]:
# Example of use
input_text = "Here's a sentence that has the same meaning as 'This is an example string' but uses different words. This is another sentence. And here is one more sentence for demonstration purposes. This is a new sentence. Another example sentence."
level = 5
res = delete_sentences(input_text, level)
print(res)

Here's a sentence that has the same meaning as 'This is an example string' but uses different words. And here is one more sentence for demonstration purposes. This is a new sentence. Another example sentence.
