<a href="https://colab.research.google.com/github/mertcan-basut/nlp/blob/main/translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!echo "AZURE_OPENAI_API_KEY=editme" > .env
!echo "AZURE_OPENAI_ENDPOINT=editme" >> .env
!echo "OPENAI_API_VERSION=editme" >> .env

In [None]:
!pip install -q langchain langchain-openai
!pip install -q python-dotenv

In [57]:
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain.prompts import PromptTemplate
from langchain_openai.chat_models import AzureChatOpenAI

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv(), override=True) # read local .env file

In [175]:
output_parser = StructuredOutputParser.from_response_schemas([
  ResponseSchema(
    name='org_lang',
    type='string',
    description="Detected language of the provided text."
  ),
  ResponseSchema(
    name='translation',
    type='string',
    description="Translation of the given text into the desired destination language."
  ),
  ResponseSchema(
    name='dest_lang',
    type='string',
    description="Desired destination language, to translate the given text into."
  )
])

In [176]:
PROMPT = """For the provided text below, detect which language it is written \
in and translate it to the desired destination language. If the text is \
already written in the destination language, return the text as the \
`translation` without any modifications.

Text: {text}
Destination language: {dest_lang}

{format_instructions}"""

prompt = PromptTemplate.from_template(PROMPT, partial_variables={'format_instructions': output_parser.get_format_instructions()})

In [177]:
llm = AzureChatOpenAI(model="gpt-35-turbo", temperature=0.0)
llm.predict("Hello!")

'Hello! How can I assist you today?'

In [178]:
llm_translate = prompt | llm | output_parser
llm_translate.invoke({'text': "Hello, world!", 'dest_lang': "Turkish"})

{'org_lang': 'English',
 'translation': 'Merhaba, dünya!',
 'dest_lang': 'Turkish'}

In [179]:
languages = {'TR': "Turkish", 'EN': "English", 'FR': "French", 'ES': "Spanish", 'DE': "German"}

def translate(text: str, dest_lang: str):
  if not text:
    raise ValueError("Empty string provided to be translated, text cannot be empty.")

  if dest_lang in languages.keys():
    dest_lang = languages[dest_lang]
  elif dest_lang in languages.values():
    dest_lang = dest_lang
  else:
    raise ValueError(f"Invalid destination language provided: {dest_lang}")

  response = llm_translate.invoke({'text': text, 'dest_lang': dest_lang})
  return response['translation']

In [183]:
translate("Merhaba, nasılsın?", "DE")

'Hallo, wie geht es dir?'