In [46]:
import logging
import os
import re
from typing import List

from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import HuggingFaceTextGenInference


In [156]:
mixtral_config = {
    "inference_server_url":"http://10.10.78.11:8081/",
    "max_new_tokens":5,
    "top_k":5,
    "top_p":0.95,
    "typical_p":0.95,
    "temperature": 0.01,
}

In [157]:
def execute_chain(simplified_text:str, prompt_text:str)-> str:
    """Executes a chain using the Mixtral model and the given input text and prompt."""
    llm = HuggingFaceTextGenInference(**mixtral_config)
    
    # set prompt
    prompt_template = PromptTemplate(
        template=prompt_text,
        input_variables=['text'],
    )
    chain = LLMChain(llm=llm, prompt=prompt_template)
    llm_response = chain({'text':simplified_text})
    llm_response = llm_response["text"].strip()

    return llm_response

In [171]:
simplified_text = "The Savanna is a type of ecosystem.\nThe Savanna is found in tropical regions.\nSavannas typically have warm temperatures.\nSavannas have both wet and dry seasons.\nLots of animals live in the Savannas.\nAnimales like zebras and lions live in the Savanna."
simplified_text_2= "The Savanna is not a type of ecosystem.\nThe Savanna is not found in tropical regions.\nThe Savanna typically have not warm temperatures.\nThe Savanna has not both wet and dry seasons."
simplified_text_3 = "The Savanna is a type of ecosystem.\nThe Savanna is found in tropical regions.\nSavannas typically have not warm temperatures.\nThere no grass and trees in the Savanna."

Evaluation requirements

1. Each sentence expresses only one idea.
2. The subject of each sentence must be explicit. Do not use pronouns as subjects.
3. Use short sentences and simple vocabulary. Use simple and direct language.
4. Do not use negations.
5. Do not use too many numbers or mathematical denominations. If there is no choice but to insert a number, always use digits.
6. Do not use confusing metaphors or complex comparisons.
7. Be clear, concise and direct.
8. Do not use technical terms, abbreviations and initials.
9. Text should be clear and coherent.
10. Avoid unnecessary ideas, words, sentences or phrases.


In [176]:
prompt_text= """<s>[INST] Assign a score to the given text. The output must be a number 0, 5, 10.
    The output will be '10' if it meets the requirement of ''Do not use negations'' in every sentence, and will be '0' if it does not meet it in any sentence. If some sentences meet the requirement, but other not meet it, set a score of '5'.

    Text 1: "The capybara is a big rodent.\nThe capybara lives in South America.\nThe capybara is the biggest rodent alive.\n It lives in South America.\nCapybaras are part of the Hydrochoerus family.\n The capybara lives in savannas and rainforests.\nThe capybara lives near places with water.\n" 
    The assigned score to Text 1 is (0-10): 10 points

    Text 2: "The capybara is not a big rodent.\nThe capybara does not live in South America.\nThe capybara is not the biggest rodent alive.\n It does not live in South America.\nCapybaras are not part of the Hydrochoerus family.\n The capybara does not live in savannas and rainforests.\nThe capybara does not live near places with water.\n" 
    The assigned score to Text 2 is (0-10): 0 points

    Text 3: "The capybara is not a big rodent.\nThe capybara does live in South America.\nThe capybara is the biggest rodent alive.\n It does not live in South America.\nCapybaras are not part of the Hydrochoerus family.\n The capybara does not live in savannas and rainforests.\n" 
    The assigned score to Text 3 is (0-10): 5 points
    [/INST]</s> 

    [INST] 
    Text 4: "{text}"
    The assigned score to Text 4 is (0-10): 
    [/INST]
"""

In [177]:
for i,text in enumerate([simplified_text, simplified_text_2, simplified_text_3]):
    resp = execute_chain(simplified_text=text, prompt_text=prompt_text)
    print(f'SCORE TEXT {i}: {resp}')

SCORE TEXT 0: 10 points.
SCORE TEXT 1: 0 points.
SCORE TEXT 2: 0 points.
