In [7]:
import logging
import os
import re
from typing import List

from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import HuggingFaceTextGenInference


In [8]:
mixtral_config = {
    "inference_server_url":"http://10.10.78.11:8081/",
    "max_new_tokens":10,
    "top_k":7,
    "top_p":0.95,
    "typical_p":0.95,
    "temperature": 0.1,
}

In [9]:
def execute_chain(simplified_text:str, prompt_text:str)-> str:
    """Executes a chain using the Mixtral model and the given input text and prompt."""
    llm = HuggingFaceTextGenInference(**mixtral_config)
    
    # set prompt
    prompt_template = PromptTemplate(
        template=prompt_text,
        input_variables=['text'],
    )
    chain = LLMChain(llm=llm, prompt=prompt_template)
    llm_response = chain({'text':simplified_text})
    llm_response = llm_response["text"].strip()

    return llm_response

In [18]:
simplified_text = "The Savanna is a type of ecosystem.\nThe Savanna is found in tropical regions.\nSavannas typically have warm temperatures.\nSavannas have both wet and dry seasons.\nLots of animals live in the Savannas.\n"
simplified_text_2=  "It is a big place with tall grass and 1,000,000 trees, where many animals like lions and giraffes live there.\n It does not get very hot and dry temperatures.\n"

Evaluation requirements

1. Each sentence expresses only one idea.
2. The subject of each sentence must be explicit. Do not use pronouns as subjects.
3. Use short sentences and simple vocabulary. Use simple and direct language.
4. Do not use negations.
5. Do not use too many numbers or mathematical denominations. If there is no choice but to insert a number, always use digits.
6. Do not use confusing metaphors or complex comparisons.
7. Be clear, concise and direct.
8. Do not use technical terms, abbreviations and initials.
9. Text should be clear and coherent.
10. Avoid unnecessary ideas, words, sentences or phrases.


In [19]:
prompt_text= """"<s>[INST] Assign a score to the given text. The requirement of each sentence is 'The subject of each sentence must be explicit, that means, does not begin with a pronoun'.

    The output will be '1' if it meets the requirement in every sentence, will be '0' if it does not meet it in any sentence.

    Text 1: "The capybara is a big rodent.\nThe capybara lives in South America.\nThe capybara is the biggest rodent alive.\n Capybaras are part of the Hydrochoerus family.\n The capybara lives in savannas and rainforests.\nThe capybara lives near places with water." 
    Score (0-1): 1 points
    
    Text 2: "It is a big rodent.\nIt lives in South America.\nIt is the biggest rodent alive.\n They are part of the Hydrochoerus family.\n It lives in savannas and rainforests.\nIt lives near places with water." 
    Score (0-1): 0 points
    
    [/INST]</s> 

    [INST] 
    Text: "{text}"
    Score (0-1): 
    The output must be a number without explanations. 
    [/INST]
"""

In [20]:
for i,text in enumerate([simplified_text, simplified_text_2]):
    resp = execute_chain(simplified_text=text, prompt_text=prompt_text)
    print(f'SCORE TEXT {i}: {resp}')

SCORE TEXT 0: 1 points
SCORE TEXT 1: 0 points
