In [4]:
import logging
import os
import re
from typing import List

from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import HuggingFaceTextGenInference


In [23]:
mixtral_config = {
    "inference_server_url":"http://10.10.78.11:8081/",
    "max_new_tokens":5,
    "top_k":10,
    "top_p":0.95,
    "typical_p":0.95,
    "temperature":0.01,
}

In [14]:
def execute_chain(simplified_text:str, prompt_text:str)-> str:
    """Executes a chain using the Mixtral model and the given input text and prompt."""
    llm = HuggingFaceTextGenInference(**mixtral_config)
    
    # set prompt
    prompt_template = PromptTemplate(
        template=prompt_text,
        input_variables=['text'],
    )
    chain = LLMChain(llm=llm, prompt=prompt_template)
    llm_response = chain({'text':simplified_text})
    llm_response = llm_response["text"].strip()

    return llm_response

In [33]:
simplified_text = "The Savanna is a type of ecosystem.\nThe Savanna is found in tropical regions.\nThere is grass and some trees in the Savanna.\nSavannas typically have warm temperatures.\nSavannas have both wet and dry seasons.\nLots of animals live in the Savannas.\nAnimales like zebras and lions live in the Savanna."
simplified_text_2= "The Savanna is not a type of ecosystem.\nThe Savanna is not found in tropical regions.\nThere is no grass and no trees in the Savanna."
simplified_text_3 = "The Savanna is not a type of ecosystem.\nThe Savanna is found in tropical regions.\nThere is grass and trees in the Savanna."

Evaluation requirements

1. Each sentence expresses only one idea.
2. The subject of each sentence must be explicit. Do not use pronouns as subjects.
3. Use short sentences and simple vocabulary. Use simple and direct language.
4. Do not use negations.
5. Do not use too many numbers or mathematical denominations. If there is no choice but to insert a number, always use digits.
6. Do not use confusing metaphors or complex comparisons.
7. Be clear, concise and direct.
8. Do not use technical terms, abbreviations and initials.
9. Text should be clear and coherent.
10. Avoid unnecessary ideas, words, sentences or phrases.


In [42]:
prompt_text= """[INST] Assign a score (0-10 points) to the translation of the following text from English to Simple English.
    One requirement of the Simple English is to ''Do not use negations''.
    Below, there are some examples of Simple English text and the output will be 10 if it meets the requirement of ''Do not use negations'' in every sentence, and will be 0 if it does not meet it in any sentence. 
    Text 1: "The capybara is a big rodent.\nThe capybara lives in South America.\nThe capybara is the biggest rodent alive.\n It lives in South America.\nCapybaras are part of the Hydrochoerus family.\n The capybara lives in savannas and rainforests.\nThe capybara lives near places with water.\n" 
    The assigned score to Text 1 is (0-10): 10 points
    Text 2: "The capybara is not a big rodent.\nThe capybara does not live in South America.\nThe capybara is not the biggest rodent alive.\n It does not live in South America.\nCapybaras are not part of the Hydrochoerus family.\n The capybara does not live in savannas and rainforests.\nThe capybara does not live near places with water.\n" 
    The assigned score to Text 2 is (0-10): 0 points
    Text 3: "The capybara is a big rodent.\nThe capybara does live in South America.\nThe capybara is the biggest rodent alive.\n It does not live in South America.\nCapybaras are not part of the Hydrochoerus family.\n The capybara does not live in savannas and rainforests.\n" 
    The assigned score to Text 3 is (0-10): 5 points
    Text 4: "{text}"
    The assigned score to Text 4 is (0-10): [/INST] 
"""

In [43]:
for i,text in enumerate([simplified_text, simplified_text_2, simplified_text_3]):
    resp = execute_chain(simplified_text=text, prompt_text=prompt_text)
    print(f'SCORE TEXT {i}: {resp}')

SCORE TEXT 0: 10 points. The
SCORE TEXT 1: 0 points

Text
SCORE TEXT 2: 5 points. The first
