# Setup

## Install dependencies

In [1]:
import sys

!{sys.executable} -m pip install langchain openai tqdm

Collecting langchain
  Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/e8/67/17a732ee99a7a383b5fd51ca671030e9f9d22e6e85af8873d20e2e01f7fd/langchain-0.1.1-py3-none-any.whl.metadata
  Downloading langchain-0.1.1-py3-none-any.whl.metadata (13 kB)
Collecting openai
  Obtaining dependency information for openai from https://files.pythonhosted.org/packages/f1/d8/590a68d390501faf48f4e57b098076df02afd003ac880f50d3b0704f7773/openai-1.8.0-py3-none-any.whl.metadata
  Downloading openai-1.8.0-py3-none-any.whl.metadata (18 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Obtaining dependency information for jsonpatch<2.0,>=1.33 from https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl.metadata
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-community<0.1,>=0.0.13 (from langchain)
  Obtaining dependency information for la

In [32]:
OPENAI_API_KEY = "API_KEY"

## First step: use LLM to generate problem solution

In [33]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model='gpt-4-1106-preview'
)

In [34]:
system_prompt = """You are a math tutor skilled in Russian and English."""
my_magic_prompt = """
TASK:
Translate PROBLEM TEXT into English. Please do not change or solve this task and format.
Omit "PROBLEM TEXT:" caption in your response.

PROBLEM TEXT:
{problem_text}"""

In [35]:
from langchain.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("user", my_magic_prompt),
])

In [36]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

chain_solution = prompt | llm | output_parser

In [37]:
import pandas as pd

train = pd.read_csv('/kaggle/input/prompt-engineering-math/train.csv')

## Example input (LaTeX format)

In [38]:
sample = train.problem_text.sample()
sample = sample.values[0]
print(sample)

Площадь треугольника со сторонами~$a$, $b$, $c$ можно найти по формуле Герона $S=\sqrt{p\left(p-a\right)\left(p-b\right)\left(p-c\right)} $, где $p=\dfrac{a+b+c}{2} $. Найдите площадь треугольника,\rule{0pt}{14pt} если длины его сторон равны 13, 14, 15.


# Solution for this input

In [39]:
print(my_magic_prompt)


TASK:
Translate PROBLEM TEXT into English. Please do not change or solve this task and format.
Omit "PROBLEM TEXT:" caption in your response.

PROBLEM TEXT:
{problem_text}


In [40]:
translation = chain_solution.invoke({"problem_text": sample})

In [41]:
print(translation)

The area of a triangle with sides $a$, $b$, $c$ can be found using Heron's formula $S=\sqrt{p(p-a)(p-b)(p-c)}$, where $p=\frac{a+b+c}{2}$. Find the area of the triangle if the lengths of its sides are 13, 14, 15.


# Processing a train set

## Write a function to process one problem and train it with 3 random problems

In [42]:
from langchain.callbacks import get_openai_callback
from tqdm.auto import tqdm

tqdm.pandas()
train[['translation']] = None

def get_answer_llm(problem_text, chain_solution=chain_solution):
    translation = chain_solution.invoke({
        "problem_text": problem_text
    })
    return pd.Series({'translation': translation})

with get_openai_callback() as cb:
    train.loc[train['translation'].isna(), ['translation']] = (
        train.loc[train['translation'].isna(), 'problem_text'].sample(3).progress_apply(get_answer_llm)
    )
    print(cb)

  0%|          | 0/3 [00:00<?, ?it/s]

Tokens Used: 601
	Prompt Tokens: 440
	Completion Tokens: 161
Successful Requests: 3
Total Cost (USD): $0.00923


## Output results for these three problems: solution and answer

In [43]:
train[~train.translation.isna()]

Unnamed: 0,problem_id,problem_text,answer,hint,translation
37,9603,"Цифры четырёхзначного числа, кратного 5, запис...","[8065, 8155, 8245, 8335, 8425, 8515, 8605]",перебор или уравнение в цифрах,The digits of a four-digit number that is a mu...
50,12130,На прилавке цветочного магазина стоят 3 вазы с...,12,формула включений - исключений,There are 3 vases with roses on the flower sho...
74,7113,"Набор полотенец, который стоил 280 рублей, про...",476,процент от числа,A set of towels that cost 280 rubles is being ...


## Process the rest of the dataset. This will take some time

In [44]:
with get_openai_callback() as cb:
    train.loc[train['translation'].isna(), ['translation']] = (
        train.loc[train['translation'].isna(), 'problem_text'].progress_apply(get_answer_llm)
    )
    print(cb)

  0%|          | 0/97 [00:00<?, ?it/s]

Tokens Used: 27967
	Prompt Tokens: 19165
	Completion Tokens: 8802
Successful Requests: 97
Total Cost (USD): $0.4557099999999998


In [45]:
train.head(5)

Unnamed: 0,problem_id,problem_text,answer,hint,translation
0,2374,Найдите значение выражения $\dfrac{17}{5} :\df...,1.6,Десятичную в обыкновенную,Find the value of the expression $\dfrac{17}{5...
1,4723,В компании из 30 человек 25 пользуются социаль...,24.0,Диаграмма Венна,"In a company of 30 people, 25 use the social n..."
2,7135,Число дорожно-транспортных происшествий (ДТП) ...,32.0,дробь в проценты,The number of road traffic accidents (RTAs) in...
3,5814,Найдите значение выражения $\dfrac{2\strut^{-5...,256.0,свойства степени,Find the value of the expression $\dfrac{2^{-5...
4,9237,Путешественник из Москвы хочет посетить четыре...,53.0,перебор,A traveler from Moscow wants to visit four cit...


In [None]:
train.set_index('problem_id', drop=True).to_csv('train_with_translation.csv')

## Output problem text, generated solution, and answer for a single problem

In [47]:
from IPython.display import display_latex

problem_train_index = 0
sample = train.iloc[problem_train_index]

display_latex(sample.problem_text, raw=True)
print('-' * 80)

display_latex(sample.translation, raw=True)

--------------------------------------------------------------------------------


In [52]:
train_translate = pd.read_csv('/kaggle/working/train_with_translation.csv')

In [53]:
train_translate.head(5)

Unnamed: 0,problem_id,problem_text,answer,hint,translation
0,2374,Найдите значение выражения $\dfrac{17}{5} :\df...,1.6,Десятичную в обыкновенную,Find the value of the expression $\dfrac{17}{5...
1,4723,В компании из 30 человек 25 пользуются социаль...,24.0,Диаграмма Венна,"In a company of 30 people, 25 use the social n..."
2,7135,Число дорожно-транспортных происшествий (ДТП) ...,32.0,дробь в проценты,The number of road traffic accidents (RTAs) in...
3,5814,Найдите значение выражения $\dfrac{2\strut^{-5...,256.0,свойства степени,Find the value of the expression $\dfrac{2^{-5...
4,9237,Путешественник из Москвы хочет посетить четыре...,53.0,перебор,A traveler from Moscow wants to visit four cit...
