In [1]:
from IPython.display import clear_output

In [8]:
!pip install langchain pydantic langchain_community langchain-mistralai -q
clear_output()
!pip list

Package                          Version
-------------------------------- ---------------------
absl-py                          1.4.0
accelerate                       0.32.1
aiohappyeyeballs                 2.4.0
aiohttp                          3.10.5
aiosignal                        1.3.1
alabaster                        0.7.16
albucore                         0.0.13
albumentations                   1.4.14
altair                           4.2.2
annotated-types                  0.7.0
anyio                            3.7.1
argon2-cffi                      23.1.0
argon2-cffi-bindings             21.2.0
array_record                     0.5.1
arviz                            0.18.0
asn1crypto                       1.5.1
astropy                          6.1.2
astropy-iers-data                0.2024.8.26.0.31.57
astunparse                       1.6.3
async-timeout                    4.0.3
atpublic                         4.1.0
attrs                            24.2.0
audioread              

In [3]:
import os
from google.colab import userdata
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = userdata.get('LANGCHAIN_API_KEY')
os.environ["MISTRAL_API_KEY"] = userdata.get('MISTRAL_API')
os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get('HF_TOKEN')
os.environ["ACTIVELOOP_TOKEN"] = userdata.get('ACTIVELOOP_API')

In [22]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, field_validator
from typing import List

class Suggestions(BaseModel):
  words: List[str] = Field(description="list of substitute words based on context")

  @field_validator('words')
  def not_start_with_number(cls, field):
    for item in field:
      if item[0].isnumeric():
        raise ValueError("The word cannot start with numbers!")
    return field

parser = PydanticOutputParser(pydantic_object=Suggestions)

In [5]:
from langchain.prompts import PromptTemplate

template = """
Offer a list of suggestions to substitute the specified target_word based the presented context.
{format_instructions}
target_word={target_word}
context={context}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = {
			'target_word':"behaviour",
			'context':"The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson."
}

In [6]:
model_input

{'target_word': 'behaviour',
 'context': 'The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.'}

In [9]:
from langchain_mistralai.chat_models import ChatMistralAI

llm = ChatMistralAI(
)

In [10]:
chain = prompt | llm | parser
output = chain.invoke(model_input)

In [12]:
output

Suggestions(words=['conduct', 'behavior', 'actions', 'attitude', 'deportment'])

In [14]:
template = """
Offer a list of suggestions to substitute the specified target_word based on the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

In [18]:
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

    @field_validator('words')
    def not_start_with_number(cls, field):
      for item in field:
        if item[0].isnumeric():
          raise ValueError("The word can not start with numbers!")
      return field

    @field_validator('reasons')
    def end_with_dot(cls, field):
      for idx, item in enumerate( field ):
        if item[-1] != ".":
          field[idx] += "."
      return field

parser = PydanticOutputParser(pydantic_object=Suggestions)

In [19]:
prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

chain = prompt | llm | parser
output = chain.invoke(model_input)
output

Suggestions(words=['conduct', 'actions', 'attitude'], reasons=["Conduct is a similar word to behavior and can refer to the way something happens or is done, which fits the context of the students' actions in the classroom.", 'Actions is a broader term that can encompass both the individual and collective things that the students do, which fits the context of disruptive activities in the classroom.', "Attitude can refer to the students' overall demeanor or mindset, which fits the context of a disruptive classroom environment."])

In [25]:
from langchain.output_parsers import CommaSeparatedListOutputParser

parser = CommaSeparatedListOutputParser()

In [26]:
template = """
Offer a list of suggestions to substitute the word '{target_word}' based the presented the following text: {context}.
{format_instructions}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

chain = prompt | llm | parser
output = chain.invoke(model_input)
output

['conduct',
 'actions',
 'conductance',
 'demeanor',
 'attitude',
 'behavior',
 'behavioral pattern',
 'activities',
 'antics',
 'carrying-on']

In [28]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

response_schema = [
    ResponseSchema(name="words", description='A substitute word based on context'),
    ResponseSchema(name="reasons", description='The reasoning why this word fits the context.')
]

parser = StructuredOutputParser.from_response_schemas(response_schema)

In [29]:
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

parser = PydanticOutputParser(pydantic_object=Suggestions)

missformatted_output = '{"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}'

parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}. Got: 1 validation error for Suggestions
reasons
  Field required [type=missing, input_value={'words': ['conduct', 'ma...particular situation.']}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.8/v/missing

In [30]:
from langchain.output_parsers import OutputFixingParser

outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)
outputfixing_parser.parse(missformatted_output)

Suggestions(words=['conduct', 'manner'], reasons=["'conduct' refers to the way someone acts in a particular situation.", "'manner' refers to the way someone behaves in a particular situation."])

In [33]:
from langchain.output_parsers import RetryWithErrorOutputParser

class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

parser = PydanticOutputParser(pydantic_object=Suggestions)

# Define prompt
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt.format_prompt(target_word="behaviour", context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.")


missformatted_output = '{"words": ["conduct", "manner"]}'

retry_parser = RetryWithErrorOutputParser.from_llm(parser=parser, llm=llm)

retry_parser.parse_with_prompt(missformatted_output, model_input)

Suggestions(words=['conduct', 'manner'], reasons=["conduct: This word is suggested as it is already present in the sentence and can replace 'behaviour' to indicate the action of the students that was disruptive. It maintains the consistency and coherence of the sentence.", "manner: This word is an alternative to 'behaviour' as it can describe the way the students acted in class. It can help convey the disruptive nature of their actions."])