In [82]:
from FlagEmbedding import BGEM3FlagModel, FlagReranker
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.schema import Document
from langchain.text_splitter import MarkdownTextSplitter
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

import torch
import os
import tempfile
import pymupdf4llm
import arxiv
import requests
import re
import shutil
from typing import Optional, Tuple, List

from tqdm import tqdm
import pandas as pd
import time
import numpy as np

from rouge_score import rouge_scorer, scoring
from sacrebleu import corpus_bleu, BLEU

In [16]:
references = ["–≠—Ç–æ —ç—Ç–∞–ª–æ–Ω–Ω–∞—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è."]
hypotheses = ["–≠—Ç–æ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–Ω–∞—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è."]
bleu = sacrebleu.corpus_bleu(hypotheses, [references], tokenize='intl')
print("BLEU:", bleu.score)

BLEU: 35.35533905932737


# –û—Ü–µ–Ω–∫–∞ Retriever

In [2]:
def extract_arxiv_id(url: str) -> str:
    """
    –ò–∑–≤–ª–µ–∫–∞–µ—Ç arXiv ID –∏–∑ URL.

    Args:
        url (str): –°—Å—ã–ª–∫–∞ –Ω–∞ —Å—Ç–∞—Ç—å—é arXiv.

    Returns:
        str: –ò–∑–≤–ª–µ—á—ë–Ω–Ω—ã–π ID –∏–ª–∏ None, –µ—Å–ª–∏ –Ω–µ —É–¥–∞–ª–æ—Å—å.
    """
    match = re.search(r'arxiv\.org/(abs|pdf)/([0-9]+\.[0-9]+)(v\d+)?', url)
    return match.group(2) if match else None


def download_pdf(arxiv_id: str, save_path: str = "article.pdf") -> str:
    """
    –°–∫–∞—á–∏–≤–∞–µ—Ç PDF-—Ñ–∞–π–ª –ø–æ arXiv ID –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ—Ç –Ω–∞ –¥–∏—Å–∫.

    Args:
        arxiv_id (str): –ò–¥–µ–Ω—Ç–∏—Ñ–∏–∫–∞—Ç–æ—Ä —Å—Ç–∞—Ç—å–∏ –Ω–∞ arXiv.
        save_path (str): –ü—É—Ç—å, –ø–æ –∫–æ—Ç–æ—Ä–æ–º—É —Å–æ—Ö—Ä–∞–Ω–∏—Ç—å —Ñ–∞–π–ª.

    Returns:
        Tuple[str, str]: –ü—É—Ç—å –¥–æ —Å–æ—Ö—Ä–∞–Ω—ë–Ω–Ω–æ–≥–æ —Ñ–∞–π–ª–∞ –∏ –Ω–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏.
    """
    client = arxiv.Client()
    search = arxiv.Search(id_list=[arxiv_id])
    result = next(client.results(search))
    pdf_url = result.pdf_url

    response = requests.get(pdf_url)
    with open(save_path, "wb") as f:
        f.write(response.content)
    return save_path, result.title
    

def trim_markdown_after_section(md_text: str, section="references", aliases: Optional[List[str]] = None) -> str:
    """
    –£–¥–∞–ª—è–µ—Ç –≤—Å—ë –∏–∑ markdown-—Ç–µ–∫—Å—Ç–∞ –Ω–∞—á–∏–Ω–∞—è —Å —É–∫–∞–∑–∞–Ω–Ω–æ–π —Å–µ–∫—Ü–∏–∏ (–ø–æ —É–º–æ–ª—á–∞–Ω–∏—é ‚Äî References).

    Args:
        md_text (str): –ò—Å—Ö–æ–¥–Ω—ã–π Markdown.
        section (str): –û—Å–Ω–æ–≤–Ω–æ–π –∑–∞–≥–æ–ª–æ–≤–æ–∫ –¥–ª—è –ø–æ–∏—Å–∫–∞.
        aliases (List[str]): –ê–ª—å—Ç–µ—Ä–Ω–∞—Ç–∏–≤–Ω—ã–µ –≤–∞—Ä–∏–∞–Ω—Ç—ã –∑–∞–≥–æ–ª–æ–≤–∫–∞.

    Returns:
        str: Markdown –¥–æ –Ω–∞–π–¥–µ–Ω–Ω–æ–≥–æ –∑–∞–≥–æ–ª–æ–≤–∫–∞.
    """
    all_keys = [section.lower()] + [a.lower() for a in aliases or []]

    pattern = re.compile(
        rf'^#+\s*\**({"|".join(map(re.escape, all_keys))})\**.*$',
        re.IGNORECASE | re.MULTILINE
    )
    match = pattern.search(md_text)
    return md_text[:match.start()] if match else md_text


def clean_markdown_for_rag(md_raw: str) -> str:
    """
    –û—á–∏—â–∞–µ—Ç markdown –æ—Ç –≤–∏–∑—É–∞–ª—å–Ω–æ–≥–æ –º—É—Å–æ—Ä–∞, –ª–∏—à–Ω–∏—Ö —Å–∏–º–≤–æ–ª–æ–≤ –∏ –ø—É—Å—Ç—ã—Ö —Å—Ç—Ä–æ–∫.

    Args:
        md_text (str): –ò—Å—Ö–æ–¥–Ω—ã–π Markdown —Ç–µ–∫—Å—Ç.

    Returns:
        str: –û—á–∏—â–µ–Ω–Ω—ã–π –∏ –Ω–æ—Ä–º–∞–ª–∏–∑–æ–≤–∞–Ω–Ω—ã–π Markdown.
    """
    lines = md_raw.splitlines()
    cleaned_lines = []

    for line in lines:
        stripped = line.strip()

        if stripped == "-----":
            continue

        if re.fullmatch(r'[\d\s\.,%]+', stripped):
            continue

        line = re.sub(r'\*\[(,|\*|\d+)\]\*', '', line)
        line = re.sub(r'\*(,+|\s*[,\.])\*', '', line)
        line = re.sub(r'^(#{1,6})\s*\*\*(.*?)\*\*\s*$', r'\1 \2', line)
        line = re.sub(r'\*{3,}', '**', line)
        line = re.sub(r'\s{2,}', ' ', line)

        cleaned_lines.append(line)

    # –°–∫–ª–µ–∏–≤–∞–µ–º –æ–±—Ä–∞—Ç–Ω–æ –∏ –Ω–æ—Ä–º–∞–ª–∏–∑—É–µ–º –º–Ω–æ–∂–µ—Å—Ç–≤–µ–Ω–Ω—ã–µ –ø—É—Å—Ç—ã–µ —Å—Ç—Ä–æ–∫–∏
    cleaned_text = "\n".join(cleaned_lines)

    # –ó–∞–º–µ–Ω—è–µ–º 3+ –ø—É—Å—Ç—ã—Ö —Å—Ç—Ä–æ–∫ –ø–æ–¥—Ä—è–¥ –Ω–∞ 2
    cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text)

    return cleaned_text

def extract_section(md_text: str, section: str, aliases: Optional[List[str]] = None) -> str:
    """
    –ò–∑–≤–ª–µ–∫–∞–µ—Ç —Ç–µ–∫—Å—Ç —Å–µ–∫—Ü–∏–∏ –ø–æ –∑–∞–≥–æ–ª–æ–≤–∫—É, —É—á–∏—Ç—ã–≤–∞—è –∞–ª—å—Ç–µ—Ä–Ω–∞—Ç–∏–≤–Ω—ã–µ –≤–∞—Ä–∏–∞–Ω—Ç—ã.

    Args:
        md_text (str): Markdown-–¥–æ–∫—É–º–µ–Ω—Ç.
        section (str): –ù–∞–∑–≤–∞–Ω–∏–µ —Å–µ–∫—Ü–∏–∏.
        aliases (List[str]): –í–æ–∑–º–æ–∂–Ω—ã–µ –∞–ª—å—Ç–µ—Ä–Ω–∞—Ç–∏–≤—ã (–Ω–∞–ø—Ä–∏–º–µ—Ä, 'summary' –¥–ª—è 'abstract').

    Returns:
        str: –ò–∑–≤–ª–µ—á—ë–Ω–Ω—ã–π —Ç–µ–∫—Å—Ç —Å–µ–∫—Ü–∏–∏.
    """
    lines = md_text.splitlines()
    start_idx, end_idx = None, None

    all_keys = [section.lower()] + [a.lower() for a in aliases or []]

    # 1. –ù–∞–π–¥—ë–º –∑–∞–≥–æ–ª–æ–≤–æ–∫ —Å–µ–∫—Ü–∏–∏
    for i, line in enumerate(lines):
        line_clean = re.sub(r'[\*\#]', '', line).strip().lower()
        for key in all_keys:
            if re.match(rf'^(\d+[\.\d+]*)?\s*{re.escape(key)}$', line_clean):
                start_idx = i + 1
                break
        if start_idx:
            break

    if start_idx is None:
        return ""

    # 2. –ü–æ–∏—Å–∫ –∫–æ–Ω—Ü–∞ —Å–µ–∫—Ü–∏–∏ ‚Äî —Å–ª–µ–¥—É—é—â–µ–≥–æ –∑–∞–≥–æ–ª–æ–≤–∫–∞
    for j in range(start_idx, len(lines)):
        next_line_clean = re.sub(r'[\*\#]', '', lines[j]).strip().lower()
        if re.match(r'^(\d+[\.\d+]*)?\s+\w+', next_line_clean):
            end_idx = j
            break

    section_lines = lines[start_idx:end_idx] if end_idx else lines[start_idx:]
    return "\n".join(section_lines).strip()



def split_into_documents(md_cleaned: str, arxiv_id: str, title: str):
    splitter = MarkdownTextSplitter(chunk_size=800, chunk_overlap=100)
    docs = splitter.create_documents([md_cleaned])
    for doc in docs:
        doc.metadata = {"arxiv_id": arxiv_id, "title": title}
    return docs



def get_vector_store(docs, persist_directory="test_chroma_db"):
    model_name = "BAAI/bge-m3"
    embedding_model = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"})
    vectordb = Chroma.from_documents(documents=docs, embedding=embedding_model, persist_directory=persist_directory)
    return vectordb



def retrieve_and_rerank(question, vectordb, arxiv_id, top_k=5, top_n=2):
    """
    –ò–∑–≤–ª–µ–∫–∞–µ—Ç top_k –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤ –∏–∑ Chroma –∏ —Ä–µ—Ä–∞–Ω–∂–∏—Ä—É–µ—Ç –∏—Ö —Å –ø–æ–º–æ—â—å—é FlagReranker.
    –í–æ–∑–≤—Ä–∞—â–∞–µ—Ç top_n –Ω–∞–∏–±–æ–ª–µ–µ —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã—Ö —Ñ—Ä–∞–≥–º–µ–Ω—Ç–æ–≤.
    
    :param question: –í–æ–ø—Ä–æ—Å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è
    :param vectordb: –û–±—ä–µ–∫—Ç Chroma
    :param top_k: –°–∫–æ–ª—å–∫–æ —Ñ—Ä–∞–≥–º–µ–Ω—Ç–æ–≤ –∏–∑–≤–ª–µ–∫–∞—Ç—å –∏–∑ Chroma
    :param top_n: –°–∫–æ–ª—å–∫–æ –ª—É—á—à–∏—Ö –≤–æ–∑–≤—Ä–∞—â–∞—Ç—å –ø–æ—Å–ª–µ rerank
    :return: –°–ø–∏—Å–æ–∫ –∫–æ—Ä—Ç–µ–∂–µ–π (Document, score)
    """
    retriever = vectordb.as_retriever(search_kwargs={"k": top_k, "filter": {"arxiv_id": arxiv_id}})
    candidates = retriever.get_relevant_documents(question)

    reranker = FlagReranker("BAAI/bge-reranker-v2-m3", use_fp16=True)
    pairs = [[question, doc.page_content] for doc in candidates]
    scores = reranker.compute_score(pairs, normalize=True)

    reranked = sorted(zip(candidates, scores), key=lambda x: -x[1])
    
    # print("üîç Rerank —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã:")
    # for i, (doc, score) in enumerate(reranked):
    #     print(f"#–ß–∞–Ω–∫{i+1} ‚Äî {doc.page_content}...")

    return reranked[:top_n]

### –°—Ç–∞—Ç—å—è Attention is all you need

In [9]:
# —Å–∫–∞—á–∏–≤–∞–µ–º –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º —Å—Ç–∞—Ç—å—é attention is all you need
arxiv_url = "https://arxiv.org/abs/1706.03762"
arxiv_id = extract_arxiv_id(arxiv_url)

pdf_path = f'./articles/{arxiv_id}.pdf'
pdf_path, title = download_pdf(arxiv_id, pdf_path)
md_raw = pymupdf4llm.to_markdown(pdf_path)
md_trimmed = trim_markdown_after_section(md_raw)
md_cleaned = clean_markdown_for_rag(md_trimmed)
abstract = extract_section(md_cleaned, "abstract", ["absctract", "summary"])
conclusion = extract_section(md_cleaned, "conclusion", ["conclusions", "closing remarks"])

In [12]:
# —Ä–∞–∑–±–∏–≤–∞–µ–º –Ω–∞ —á–∞–Ω–∫–∏
docs = split_into_documents(md_cleaned, arxiv_id, title)

In [13]:
# –¥–æ–±–∞–≤–ª—è–µ–º –≤ –±–¥
vectordb = get_vector_store(docs)

  embedding_model = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"})


In [27]:
questions = ['–ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?',
            '–ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?',
             '–ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?',
             '–ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?',
             '–ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –æ–¥–Ω–æ–π –≥–æ–ª–æ–≤–æ–π –≤–Ω–∏–º–∞–Ω–∏—è?',
             '–ö–∞–∫–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –∏ –ø–æ—á–µ–º—É –±—ã–ª–∏ –≤—ã–±—Ä–∞–Ω—ã —Å–∏–Ω—É—Å–æ–∏–¥—ã?',
             '–í —á–µ–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º–∏ –∏ —Å–≤–µ—Ä—Ç–æ—á–Ω—ã–º–∏ —Å–ª–æ—è–º–∏ –≤ –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –∏ –¥–ª–∏–Ω—ã –ø—É—Ç–∏ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–µ–π?',
             '–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer?',
             '–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Transformer –ø–æ–∫–∞–∑–∞–ª –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíDE –∏ EN‚ÜíFR –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–∏–º–∏ SOTA?',
             '–ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—é –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏?'
            ]

In [59]:
n=9
user_question = questions[n]
top_chunks = retrieve_and_rerank(user_question, vectordb, arxiv_id, top_k=10, top_n=2)
context = f'–í–æ–ø—Ä–æ—Å: {user_question}' + "\n\n" + "\n\n".join(f'–ß–∞–Ω–∫ {i+1}: {doc.page_content}' for i, (doc, _) in enumerate(top_chunks))

print(context)

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—é –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏?

–ß–∞–Ω–∫ 1: Attention mechanisms have become an integral part of compelling sequence modeling and transduction models in various tasks, allowing modeling of dependencies without regard to their distance in
the input or output sequences [ 2, 19 ]. In all but a few cases [ 27 ], however, such attention mechanisms
are used in conjunction with a recurrent network.

In this work we propose the Transformer, a model architecture eschewing recurrence and instead
relying entirely on an attention mechanism to draw global dependencies between input and output.
The Transformer allows for significantly more parallelization and can reach a new state of the art in
translation quality after being trained for as little as twelve hours on eight P100 GPUs.
### 2 Background

–ß–∞–Ω–∫ 2: [ 16 ], ByteNet [ 18 ] and ConvS2S [ 9 ], all of which use convolutional ne

In [70]:
vector_marks_1 = [1, 1, 1, 1, 1, 1, 1, 0, 1, 1]

In [81]:
import numpy as np
print(f"–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 10 –≤–æ–ø—Ä–æ—Å–∞—Ö: {np.mean(vector_marks_1) * 100}%")

–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 10 –≤–æ–ø—Ä–æ—Å–∞—Ö: 90.0%


### –°—Ç–∞—Ç—å—è TTRL

In [60]:
# —Å–∫–∞—á–∏–≤–∞–µ–º –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º —Å—Ç–∞—Ç—å—é 
arxiv_url = "https://arxiv.org/pdf/2504.16084"
arxiv_id = extract_arxiv_id(arxiv_url)

pdf_path = f'./articles/{arxiv_id}.pdf'
pdf_path, title = download_pdf(arxiv_id, pdf_path)
md_raw = pymupdf4llm.to_markdown(pdf_path)
md_trimmed = trim_markdown_after_section(md_raw)
md_cleaned = clean_markdown_for_rag(md_trimmed)
abstract = extract_section(md_cleaned, "abstract", ["absctract", "summary"])
conclusion = extract_section(md_cleaned, "conclusion", ["conclusions", "closing remarks"])

In [63]:
# —Ä–∞–∑–±–∏–≤–∞–µ–º –Ω–∞ —á–∞–Ω–∫–∏
docs = split_into_documents(md_cleaned, arxiv_id, title)

In [64]:
# –¥–æ–±–∞–≤–ª—è–µ–º –≤ –±–¥
vectordb = get_vector_store(docs)

In [65]:
questions = ['–í —á–µ–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –æ—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL)?',
             '–ö–∞–∫–∏–º –±—Ä–∞–∑–æ–º –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –æ—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º?',
             '–ö–∞–∫—É—é —Ä–æ–ª—å –∏–≥—Ä–∞–µ—Ç –º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ (majority voting) –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL?',
             '–ö–∞–∫–∏–µ —É–ª—É—á—à–µ–Ω–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã —Å –ø–æ–º–æ—â—å—é TTRL –Ω–∞ –±–µ–Ω—á–º–∞—Ä–∫–µ AIME 2024?',
             '–ö–∞–∫ TTRL —Å—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç—Å—è —Å RL –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –ø–æ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏?',
             '–ü–æ—á–µ–º—É TTRL —Å–ø–æ—Å–æ–±–µ–Ω —Ä–∞–±–æ—Ç–∞—Ç—å –¥–∞–∂–µ –ø—Ä–∏ –Ω–µ—Ç–æ—á–Ω—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö –º–µ—Ç–æ–∫?',
             '–ö–∞–∫–∏–µ –º–æ–¥–µ–ª–∏ –∏ –±–µ–Ω—á–º–∞—Ä–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è –æ—Ü–µ–Ω–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?',
             '–ö–∞–∫–∏–µ —Ñ–∞–∫—Ç–æ—Ä—ã –º–æ–≥—É—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ —Å–±–æ—é –∏–ª–∏ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?',
             '–ö–∞–∫ TTRL –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?',
             '–° –∫–∞–∫–∏–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å–æ–≤–º–µ—Å—Ç–∏–º TTRL –∏ –∫–∞–∫–∏–µ –∏–∑ –Ω–∏—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö?'
            ]

In [76]:
n=9
user_question = questions[n]
top_chunks = retrieve_and_rerank(user_question, vectordb, arxiv_id, top_k=10, top_n=2)
context = f'–í–æ–ø—Ä–æ—Å: {user_question}' + "\n\n" + "\n\n".join(f'–ß–∞–Ω–∫ {i+1}: {doc.page_content}' for i, (doc, _) in enumerate(top_chunks))

print(context)

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


–í–æ–ø—Ä–æ—Å: –° –∫–∞–∫–∏–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å–æ–≤–º–µ—Å—Ç–∏–º TTRL –∏ –∫–∞–∫–∏–µ –∏–∑ –Ω–∏—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö?

–ß–∞–Ω–∫ 1: **TTRL is compatible with different RL algorithms.** Figure 4 presents the results. We
apply **TTRL** using PPO (Schulman et al., 2017) on MATH-500 to assess its compatibility
with different reinforcement learning algorithms. The performance trajectories of PPO and
GRPO are closely aligned. Compared to GRPO, PPO yields more stable outcomes while
achieving similar overall performance.

**3.3** **Training Dynamics**

–ß–∞–Ω–∫ 2: In this paper, we propose Test-Time Reinforcement Learning ( **TTRL** ), a novel framework
for training large language models with Reinforcement Learning (RL) on test data without
access to ground-truth labels. A key component of **TTRL** is its majority voting reward
function, which generates rule-based rewards based on consensus among model p

In [69]:
vector_marks_2 = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [83]:
import numpy as np
print(f"–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 10 –≤–æ–ø—Ä–æ—Å–∞—Ö: {np.mean(vector_marks_2) * 100}%")

–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 10 –≤–æ–ø—Ä–æ—Å–∞—Ö: 100.0%


### –°—Ç–∞—Ç—å—è Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks

In [43]:
# —Å–∫–∞—á–∏–≤–∞–µ–º –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º —Å—Ç–∞—Ç—å—é 
arxiv_url = "https://arxiv.org/pdf/2005.11401"
arxiv_id = extract_arxiv_id(arxiv_url)

pdf_path = f'./articles/{arxiv_id}.pdf'
pdf_path, title = download_pdf(arxiv_id, pdf_path)
md_raw = pymupdf4llm.to_markdown(pdf_path)
md_trimmed = trim_markdown_after_section(md_raw)
md_cleaned = clean_markdown_for_rag(md_trimmed)
abstract = extract_section(md_cleaned, "abstract", ["absctract", "summary"])
conclusion = extract_section(md_cleaned, "conclusion", ["conclusions", "closing remarks"])

In [44]:
# —Ä–∞–∑–±–∏–≤–∞–µ–º –Ω–∞ —á–∞–Ω–∫–∏
docs = split_into_documents(md_cleaned, arxiv_id, title)

In [45]:
# –¥–æ–±–∞–≤–ª—è–µ–º –≤ –±–¥
vectordb = get_vector_store(docs)

  embedding_model = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"})


In [46]:
questions = ['–ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –≤ RAG?',
             '–ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –∏ –∫–∞–∫ –æ–Ω–∞ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ—Ç—Å—è?',
             '–ß–µ–º –æ—Ç–ª–∏—á–∞—é—Ç—Å—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã RAG-Token –∏ RAG-Sequence?',
             '–ö–∞–∫–æ–π –æ–±—ä–µ–º –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –≤–µ–∫—Ç–æ—Ä–Ω–æ–º –∏–Ω–¥–µ–∫—Å–µ Wikipedia?',
             '–ö–∞–∫–∏–µ –∑–∞–¥–∞—á–∏ –±—ã–ª–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω—ã –¥–ª—è –æ—Ü–µ–Ω–∫–∏ RAG-–º–æ–¥–µ–ª–µ–π?',
             '–ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ–º –ø—Ä–∏ –æ—Ç–≤–µ—Ç–µ –Ω–∞ –≤–æ–ø—Ä–æ—Å—ã?',
             '–ö–∞–∫—É—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ–∫–∞–∑–∞–ª RAG –Ω–∞ –∑–∞–¥–∞—á–µ Jeopardy Question Generation –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å BART?',
             '–ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å–æ–≤–º–µ—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG?',
             '–ö–∞–∫–æ–π —ç—Ñ—Ñ–µ–∫—Ç –¥–∞—ë—Ç "hot-swapping" –∏–Ω–¥–µ–∫—Å–∞ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ RAG?',
             '–ö–∞–∫–∏–µ –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –ø—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –≤ RAG-Sequence –∏ RAG-Token?'
            ]

In [66]:
# —Ç—É—Ç –ø–æ–≤—Ç–æ—Ä–Ω–æ –∑–∞–ø–∏—Å–∞–ª —Å—Ç–∞—Ç—å—é –ø–æ—ç—Ç–æ–º—É –≤—ã–∫–∏–¥—ã–≤–∞—é –¥—É–±–ª–∏–∫–∞—Ç—ã –ø–æ —á–∞–Ω–∫–∞–º

n=9
user_question = questions[n]
top_chunks = retrieve_and_rerank(user_question, vectordb, arxiv_id, top_k=10, top_n=4)
del top_chunks[1:3]
context = f'–í–æ–ø—Ä–æ—Å: {user_question}' + "\n\n" + "\n\n".join(f'–ß–∞–Ω–∫ {i+1}: {doc.page_content}' for i, (doc, _) in enumerate(top_chunks))

print(context)

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –ø—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –≤ RAG-Sequence –∏ RAG-Token?

–ß–∞–Ω–∫ 1: **2.5** **Decoding**

At test time, RAG-Sequence and RAG-Token require different ways to approximate arg max *y* *p* ( *y|x* ) .

**RAG-Token** The RAG-Token model can be seen as a standard, autoregressive seq2seq generator with transition probability: *p* *[‚Ä≤]* *Œ∏* [(] *[y]* *[i]* *[|][x, y]* [1:] *[i][‚àí]* [1] [) =][ ÔøΩ] *z‚àà* top- *k* ( *p* ( *¬∑|x* )) *[p]* *[Œ∑]* [(] *[z]* *[i]* *[|][x]* [)] *[p]* *[Œ∏]* [(] *[y]* *[i]* *[|][x, z]* *[i]* *[, y]* [1:] *[i][‚àí]* [1] [)] [ To]

decode, we can plug *p* *[‚Ä≤]* *Œ∏* [(] *[y]* *[i]* *[|][x, y]* [1:] *[i][‚àí]* [1] [)][ into a standard beam decoder.]

–ß–∞–Ω–∫ 2: **2.1** **Models**

**RAG-Sequence Model** The RAG-Sequence model uses the same retrieved document to generate
the complete *sequence* . Technically, it treats the retrieved document as a single latent variable that
is m

In [67]:
vector_marks_3 = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [68]:
import numpy as np
print(f"–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 10 –≤–æ–ø—Ä–æ—Å–∞—Ö: {np.mean(vector_marks_3) * 100}%")

–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 10 –≤–æ–ø—Ä–æ—Å–∞—Ö: 100.0%


### –°—Ç–∞—Ç—å—è Efficient Estimation of Word Representations in Vector Space

In [72]:
# —Å–∫–∞—á–∏–≤–∞–µ–º –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º —Å—Ç–∞—Ç—å—é 
arxiv_url = "https://arxiv.org/pdf/1301.3781"
arxiv_id = extract_arxiv_id(arxiv_url)

In [82]:
questions = ['–ö–∞–∫–∏–µ –¥–≤–µ –º–æ–¥–µ–ª–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä –±—ã–ª–∏ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –∞–≤—Ç–æ—Ä–∞–º–∏ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤?',
             '–í —á—ë–º –æ—Å–Ω–æ–≤–Ω–æ–µ —Ä–∞–∑–ª–∏—á–∏–µ –º–µ–∂–¥—É –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏ CBOW –∏ Skip-gram?',
             '–ö–∞–∫–∏–µ —Ç–∏–ø—ã –æ—Ç–Ω–æ—à–µ–Ω–∏–π –≤–∫–ª—é—á–µ–Ω—ã –≤ Semantic-Syntactic Word Relationship test set?',
             '–ö–∞–∫–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –ø–æ–∫–∞–∑–∞–ª–∞ –º–æ–¥–µ–ª—å Skip-gram –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –ø–æ–¥–∑–∞–¥–∞—á–∞—Ö?',
             '–ö–∞–∫–æ–π –ø–æ–¥—Ö–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ Skip-gram –º–æ–¥–µ–ª–∏ –¥–ª—è –≤—ã–±–æ—Ä–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω—ã—Ö —Å–ª–æ–≤?',
             '–ö–∞–∫–æ–≤–∞ —Ñ–æ—Ä–º—É–ª–∞ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ CBOW?',
             '–ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –≤ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö?',
             '–ö–∞–∫ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∞—Å—å —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∞ –º–æ–¥–µ–ª–µ–π –≤ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω–æ–π —Å–∏—Å—Ç–µ–º–µ DistBelief?',
             '–ö–∞–∫ –±—ã–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω—ã –≤–æ–ø—Ä–æ—Å—ã –≤ —Ç–µ—Å—Ç–æ–≤–æ–º –Ω–∞–±–æ—Ä–µ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ word vectors?',
             '–ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –æ—Ç–≤–µ—Ç–∞ –Ω–∞ –∞–Ω–∞–ª–æ–≥–∏–∏ –≤—Ä–æ–¥–µ ‚Äúking - man + woman = ?‚Äù?'
            ]

In [92]:
# —Ç—É—Ç –ø–æ–≤—Ç–æ—Ä–Ω–æ –∑–∞–ø–∏—Å–∞–ª —Å—Ç–∞—Ç—å—é –ø–æ—ç—Ç–æ–º—É –≤—ã–∫–∏–¥—ã–≤–∞—é –¥—É–±–ª–∏–∫–∞—Ç—ã –ø–æ —á–∞–Ω–∫–∞–º

n=9
user_question = questions[n]
top_chunks = retrieve_and_rerank(user_question, vectordb, arxiv_id, top_k=10, top_n=2)
context = f'–í–æ–ø—Ä–æ—Å: {user_question}' + "\n\n" + "\n\n".join(f'–ß–∞–Ω–∫ {i+1}: {doc.page_content}' for i, (doc, _) in enumerate(top_chunks))

print(context)

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –æ—Ç–≤–µ—Ç–∞ –Ω–∞ –∞–Ω–∞–ª–æ–≥–∏–∏ –≤—Ä–æ–¥–µ ‚Äúking - man + woman = ?‚Äù?

–ß–∞–Ω–∫ 1: Somewhat surprisingly, it was found that similarity of word representations goes beyond simple
syntactic regularities. Using a word offset technique where simple algebraic operations are performed on the word vectors, it was shown for example that *vector(‚ÄùKing‚Äù) - vector(‚ÄùMan‚Äù) + vec-*
*tor(‚ÄùWoman‚Äù)* results in a vector that is closest to the vector representation of the word *Queen* [20].

–ß–∞–Ω–∫ 2: We evaluate the overall accuracy for all question types, and for each question type separately (semantic, syntactic). Question is assumed to be correctly answered only if the closest word to the
vector computed using the above method is exactly the same as the correct word in the question;
synonyms are thus counted as mistakes. This also means that reaching 100% accuracy is likely
to be impossible, as the current models 

In [101]:
vector_marks_4 = [0, 1, 0, 1, 1, 1, 1, 1, 1, 1]

In [102]:
import numpy as np
print(f"–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 10 –≤–æ–ø—Ä–æ—Å–∞—Ö: {np.mean(vector_marks_4) * 100}%")

–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 10 –≤–æ–ø—Ä–æ—Å–∞—Ö: 80.0%


### –°—Ç–∞—Ç—å—è LADDER: SELF-IMPROVING LLMS THROUGH RECURSIVE PROBLEM DECOMPOSITION

In [111]:
# —Å–∫–∞—á–∏–≤–∞–µ–º –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º —Å—Ç–∞—Ç—å—é 
arxiv_url = "https://arxiv.org/pdf/2503.00735"
arxiv_id = extract_arxiv_id(arxiv_url)

In [112]:
questions = ['–í —á—ë–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –∫–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER?',
             '–ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER?',
             '–ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –≤ LADDER?',
             '–ß—Ç–æ —Ç–∞–∫–æ–µ TTRL –∏ —á–µ–º –æ–Ω –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç LADDER?',
             '–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã –º–æ–¥–µ–ª—å—é Qwen2.5 7B –Ω–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee –ø–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL?',
             '–ö–∞–∫–∞—è —Ñ—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ GRPO –≤ —Ä–∞–º–∫–∞—Ö –æ–±—É—á–µ–Ω–∏—è –≤ LADDER?',
             '–ü–æ—á–µ–º—É RL –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –ø–ª–æ—Ö–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è?',
             '–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏—è –≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã—Ö –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?',
             '–ö–∞–∫ LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ –±–µ–∑ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?',
             '–ö–∞–∫–∏–µ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –∏–ª–∏ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –∑–∞–º–µ—á–µ–Ω—ã –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?'
            ]

In [122]:
n=9
user_question = questions[n]
top_chunks = retrieve_and_rerank(user_question, vectordb, arxiv_id, top_k=10, top_n=2)
context = f'–í–æ–ø—Ä–æ—Å: {user_question}' + "\n\n" + "\n\n".join(f'–ß–∞–Ω–∫ {i+1}: {doc.page_content}' for i, (doc, _) in enumerate(top_chunks))

print(context)

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –∏–ª–∏ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –∑–∞–º–µ—á–µ–Ω—ã –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?

–ß–∞–Ω–∫ 1: Analysis of the remaining unsolved problems reveals nuanced challenges in the model‚Äôs mathematical integration
capabilities. The model demonstrates difficulty with integrals requiring multiple technique combinations and novel
substitution patterns. These complex scenarios highlight limitations in the current approach, suggesting that while the
variant generation successfully teaches individual techniques, synthesizing these methods for more intricate problems
remains a challenge.

–ß–∞–Ω–∫ 2: *x* [2] + 1
Original problem:
ÔøΩ *x* [4] + 2 *x* [2] + 1 *[dx]*

*x* [2]
Level 1 variant:
ÔøΩ *x* [4] + 1 *[dx]*

Level 2 variant:
ÔøΩ *x* [2] + 1 *[dx]*

Level 3 variant:
ÔøΩ *x* [2] *[ dx]*

Quality control presented a significant challenge in our variant generation process. While we prompted the model to
generate "easier" or

In [123]:
vector_marks_5 = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [124]:
import numpy as np
print(f"–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 10 –≤–æ–ø—Ä–æ—Å–∞—Ö: {np.mean(vector_marks_5) * 100}%")

–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 10 –≤–æ–ø—Ä–æ—Å–∞—Ö: 100.0%


## –û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –Ω–∞ 5 —Å—Ç–∞—Ç—å—è—Ö (50 –≤–æ–ø—Ä–æ—Å–æ–≤)

In [126]:
print(f"–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞: {np.mean(vector_marks_1 + vector_marks_2 + vector_marks_3 + vector_marks_4 + vector_marks_5) * 100}%")

–û—Ü–µ–Ω–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞: 94.0%


# –û—Ü–µ–Ω–∫–∞ —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏

## –°–±–æ—Ä –∏ –∑–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö –≤ ChromaDB

In [3]:
urls = ['https://arxiv.org/pdf/2504.16084',
        'https://arxiv.org/abs/1706.03762',
        'https://arxiv.org/pdf/2112.02242',
        'https://arxiv.org/pdf/2405.12819',
        'https://arxiv.org/pdf/2005.11401',
        'https://arxiv.org/pdf/1301.3781',
        'https://arxiv.org/pdf/2501.07391',
        'https://arxiv.org/pdf/2503.00735',
        'https://arxiv.org/pdf/2501.19399',
        'https://arxiv.org/pdf/2205.14135']

data_lst = []

In [4]:
for arxiv_url in tqdm(urls):
    arxiv_id = extract_arxiv_id(arxiv_url)
    
    pdf_path = f'./articles/{arxiv_id}.pdf'
    pdf_path, title = download_pdf(arxiv_id, pdf_path)
    md_raw = pymupdf4llm.to_markdown(pdf_path)
    md_trimmed = trim_markdown_after_section(md_raw)
    md_cleaned = clean_markdown_for_rag(md_trimmed)
    abstract = extract_section(md_cleaned, "abstract", ["absctract", "summary"])
    conclusion = extract_section(md_cleaned, "conclusion", ["conclusions", "closing remarks"])
    
    final = abstract + "\n\n" + conclusion
    data_lst.append(final)

    docs = split_into_documents(md_cleaned, arxiv_id, title)
    vectordb = get_vector_store(docs)

  embedding_model = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"})
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:44<00:00, 10.44s/it]


In [13]:
# –≠—Ç–∞–ª–æ–Ω–Ω—ã–µ —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏
gt = ['–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforcement Learning (TTRL) ‚Äî –Ω–æ–≤—ã–π –ø–æ–¥—Ö–æ–¥ –∫ –æ–±—É—á–µ–Ω–∏—é –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLMs) —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º –Ω–∞ –Ω–µ—Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö. –ö–ª—é—á–µ–≤–∞—è –∏–¥–µ—è TTRL –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–∏ majority voting –¥–ª—è –æ—Ü–µ–Ω–∫–∏ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º. –ú–µ—Ç–æ–¥ –ø–æ–∑–≤–æ–ª—è–µ—Ç LLM —Å–∞–º–æ—Å—Ç–æ—è—Ç–µ–ª—å–Ω–æ —É–ª—É—á—à–∞—Ç—å —Å–≤–æ–∏ –æ—Ç–≤–µ—Ç—ã, –æ–ø–∏—Ä–∞—è—Å—å –Ω–∞ —Å–æ–±—Å—Ç–≤–µ–Ω–Ω—ã–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑—ã–≤–∞—é—Ç, —á—Ç–æ TTRL –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω—ã–π –ø—Ä–∏—Ä–æ—Å—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏, –≤–∫–ª—é—á–∞—è —É–≤–µ–ª–∏—á–µ–Ω–∏–µ pass@1 –Ω–∞ 159% –Ω–∞ AIME 2024, –∏ –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç —Å–æ–ø–æ—Å—Ç–∞–≤–∏–º—ã–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã —Å –º–æ–¥–µ–ª—è–º–∏, –æ–±—É—á–µ–Ω–Ω—ã–º–∏ –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö. –¢–∞–∫–∏–º –æ–±—Ä–∞–∑–æ–º, TTRL –æ—Ç–∫—Ä—ã–≤–∞–µ—Ç –ø—É—Ç—å –∫ –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ–º–æ–º—É RL —Å —Å–∞–º–æ–≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã–º–∏ –Ω–∞–≥—Ä–∞–¥–∞–º–∏ –∏ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—é –≤ –±–æ–ª–µ–µ —à–∏—Ä–æ–∫–æ–º —Å–ø–µ–∫—Ç—Ä–µ –∑–∞–¥–∞—á.',
      '–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Transformer ‚Äî –ø–µ—Ä–≤–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –¥–ª—è –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–µ–π, –æ—Å–Ω–æ–≤–∞–Ω–Ω–∞—è –∏—Å–∫–ª—é—á–∏—Ç–µ–ª—å–Ω–æ –Ω–∞ –º–µ—Ö–∞–Ω–∏–∑–º–µ –≤–Ω–∏–º–∞–Ω–∏—è, –±–µ–∑ —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã—Ö –∏ —Å–≤–µ—Ä—Ç–æ—á–Ω—ã—Ö —Å–ª–æ—ë–≤. Transformer –¥–æ—Å—Ç–∏–≥–∞–µ—Ç –Ω–æ–≤–æ–≥–æ —É—Ä–æ–≤–Ω—è –∫–∞—á–µ—Å—Ç–≤–∞ –≤ –º–∞—à–∏–Ω–Ω–æ–º –ø–µ—Ä–µ–≤–æ–¥–µ –Ω–∞ –∑–∞–¥–∞—á–∞—Ö WMT 2014 (28.4 BLEU –¥–ª—è EN‚ÜíDE –∏ 41.8 BLEU –¥–ª—è EN‚ÜíFR), –ø—Ä–∏ —ç—Ç–æ–º –æ–±—É—á–∞–µ—Ç—Å—è –±—ã—Å—Ç—Ä–µ–µ –∏ –ø–∞—Ä–∞–ª–ª–µ–ª–∏—Ç—Å—è –ª—É—á—à–µ, —á–µ–º –ø—Ä–µ–¥—ã–¥—É—â–∏–µ –º–æ–¥–µ–ª–∏. –ê–≤—Ç–æ—Ä—ã –ø–æ–¥—á–µ—Ä–∫–∏–≤–∞—é—Ç –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã –¥–ª—è —à–∏—Ä–æ–∫–æ–≥–æ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è, –≤–∫–ª—é—á–∞—è –Ω–µ—Ä–µ—á–µ–≤—ã–µ –º–æ–¥–∞–ª—å–Ω–æ—Å—Ç–∏, –∏ –ø–ª–∞–Ω–∏—Ä—É—é—Ç –¥–∞–ª—å–Ω–µ–π—à–∏–µ –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏—è –≤ –æ–±–ª–∞—Å—Ç–∏ –ª–æ–∫–∞–ª—å–Ω–æ–≥–æ –≤–Ω–∏–º–∞–Ω–∏—è –∏ –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–π –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏',
      '–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è –ø–æ–¥—Ö–æ–¥ –∫ –æ–±—É—á–µ–Ω–∏—é —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ç–µ–ª—å–Ω—ã—Ö —Å–∏—Å—Ç–µ–º —Å —É—á—ë—Ç–æ–º –¥–æ–ª–≥–æ—Å—Ä–æ—á–Ω–æ–π –ø–∞–º—è—Ç–∏ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –Ω–∞ –æ—Å–Ω–æ–≤–µ –Ω–µ—è–≤–Ω–æ–≥–æ —Ñ–∏–¥–±–µ–∫–∞. –ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –æ–Ω–ª–∞–π–Ω-–∞–ª–≥–æ—Ä–∏—Ç–º, –æ–±–Ω–æ–≤–ª—è—é—â–∏–π –º–æ–¥–µ–ª—å –ø–æ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å—Å–∫–∏–º –±–ª–æ–∫–∞–º –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏–π. –û—Å–Ω–æ–≤–Ω–æ–π –≤–∫–ª–∞–¥ –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ —Å—Ç—Ä–∞—Ç–µ–≥–∏–∏ —Ñ–∏–ª—å—Ç—Ä–∞—Ü–∏–∏ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –ø–æ —Å—Ç–∞—Ü–∏–æ–Ω–∞—Ä–Ω–æ—Å—Ç–∏ –∏ —É—Å—Ç–æ–π—á–∏–≤–æ—Å—Ç–∏ –ø–æ–≤–µ–¥–µ–Ω–∏—è, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç —Å—É—â–µ—Å—Ç–≤–µ–Ω–Ω–æ –ø–æ–≤—ã—Å–∏—Ç—å –∫–∞—á–µ—Å—Ç–≤–æ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–π (MAP, NDCG) –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ –Ω–∞ —Å–∏–ª—å–Ω–æ —Å–æ–∫—Ä–∞—â–µ–Ω–Ω–æ–º, –Ω–æ –±–æ–ª–µ–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ç–∏–≤–Ω–æ–º –ø–æ–¥–º–Ω–æ–∂–µ—Å—Ç–≤–µ –¥–∞–Ω–Ω—ã—Ö. –ú–µ—Ç–æ–¥ –ø–æ–∫–∞–∑–∞–ª –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω—ã–µ —É–ª—É—á—à–µ–Ω–∏—è, –æ—Å–æ–±–µ–Ω–Ω–æ –Ω–∞ –±–æ–ª—å—à–∏—Ö –¥–∞—Ç–∞—Å–µ—Ç–∞—Ö',
      '–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –æ–±–∑–æ—Ä –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLMs) –≤ –∑–∞–¥–∞—á–∞—Ö NLP, –ø—Ä–µ–¥–ª–∞–≥–∞—è –µ–¥–∏–Ω—É—é —Ç–∞–∫—Å–æ–Ω–æ–º–∏—é –ø–æ–¥—Ö–æ–¥–æ–≤: —Å –∑–∞–º–æ—Ä–æ–∂–µ–Ω–Ω—ã–º–∏ –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º–∏ (parameter-frozen) –∏ —Å –∞–¥–∞–ø—Ç–∞—Ü–∏–µ–π (parameter-tuning). –†–∞–±–æ—Ç–∞ —Ç–∞–∫–∂–µ –≤—ã–¥–µ–ª—è–µ—Ç –Ω–æ–≤—ã–µ –∏—Å—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å—Å–∫–∏–µ –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–∏—è –∏ –≤—ã–∑–æ–≤—ã, —Å —Ü–µ–ª—å—é —Å—Ç–∏–º—É–ª–∏—Ä–æ–≤–∞—Ç—å –¥–∞–ª—å–Ω–µ–π—à–∏–µ –ø—Ä–æ—Ä—ã–≤—ã –≤ –æ–±–ª–∞—Å—Ç–∏. –í –¥–æ–ø–æ–ª–Ω–µ–Ω–∏–µ, —Å–æ–∑–¥–∞–Ω –ø—É–±–ª–∏—á–Ω—ã–π —Ä–µ—Å—É—Ä—Å –¥–ª—è –æ—Ç—Å–ª–µ–∂–∏–≤–∞–Ω–∏—è –ø–æ—Å–ª–µ–¥–Ω–∏—Ö –ø—É–±–ª–∏–∫–∞—Ü–∏–π, —á—Ç–æ –¥–µ–ª–∞–µ—Ç –æ–±–∑–æ—Ä –Ω–µ —Ç–æ–ª—å–∫–æ –∞–Ω–∞–ª–∏—Ç–∏—á–µ—Å–∫–∏–º, –Ω–æ –∏ –ø—Ä–∞–∫—Ç–∏—á–µ—Å–∫–∏–º –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–æ–º –¥–ª—è –∏—Å—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª–µ–π',
      '–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –æ–±—â–∏–π –ø–æ–¥—Ö–æ–¥ –∫ –¥–æ–æ–±—É—á–µ–Ω–∏—é RAG-–º–æ–¥–µ–ª–µ–π, —Å–æ—á–µ—Ç–∞—é—â–∏—Ö –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫—É—é –ø–∞–º—è—Ç—å (–ø—Ä–µ–¥–æ–±—É—á–µ–Ω–Ω—ã–µ seq2seq –º–æ–¥–µ–ª–∏) –∏ –Ω–µ–ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫—É—é (–≤–µ–∫—Ç–æ—Ä–Ω—ã–π –∏–Ω–¥–µ–∫—Å –í–∏–∫–∏–ø–µ–¥–∏–∏, –¥–æ—Å—Ç—É–ø–Ω—ã–π —á–µ—Ä–µ–∑ –Ω–µ–π—Ä–æ–Ω–Ω—ã–π —Ä–µ—Ç—Ä–∏–≤–µ—Ä). –û–Ω–∏ —Å—Ä–∞–≤–Ω–∏–≤–∞—é—Ç –¥–≤–µ –≤–µ—Ä—Å–∏–∏: —Å —Ñ–∏–∫—Å–∏—Ä–æ–≤–∞–Ω–Ω—ã–º–∏ –∏ —Ç–æ–∫–µ–Ω-–∑–∞–≤–∏—Å–∏–º—ã–º–∏ –∏–∑–≤–ª–µ—á–µ–Ω–∏—è–º–∏. –ú–æ–¥–µ–ª—å –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É–µ—Ç –Ω–æ–≤–æ–µ —Å–æ—Å—Ç–æ—è–Ω–∏–µ –∏—Å–∫—É—Å—Å—Ç–≤–∞ –≤ –∑–∞–¥–∞—á–∞—Ö –æ—Ç–∫—Ä—ã—Ç–æ–≥–æ –≤–æ–ø—Ä–æ—Å–Ω–æ-–æ—Ç–≤–µ—Ç–Ω–æ–≥–æ –ø–æ–∏—Å–∫–∞ –∏ –≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç –±–æ–ª–µ–µ —Ç–æ—á–Ω—ã–π –∏ —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–Ω—ã–π —Ç–µ–∫—Å—Ç, —á–µ–º —á–∏—Å—Ç–æ –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫–∏–µ seq2seq –º–æ–¥–µ–ª–∏, –ø–æ–∫–∞–∑—ã–≤–∞—è –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª RAG –≤ –∑–∞–¥–∞—á–∞—Ö, —Ç—Ä–µ–±—É—é—â–∏—Ö –¥–æ—Å—Ç—É–ø–∞ –∫ –∑–Ω–∞–Ω–∏—è–º',
      '–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –¥–≤–∞ –ø—Ä–æ—Å—Ç—ã—Ö –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö –ø–æ–¥—Ö–æ–¥–∞ ‚Äî CBOW –∏ Skip-gram ‚Äî –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤ –Ω–∞ –º–∞—Å—à—Ç–∞–±–Ω—ã—Ö –∫–æ—Ä–ø—É—Å–∞—Ö. –≠—Ç–∏ –º–æ–¥–µ–ª–∏ –æ–±–µ—Å–ø–µ—á–∏–≤–∞—é—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ –∫–∞—á–µ—Å—Ç–≤–∞ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –∏ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π –ø—Ä–∏ –Ω–∏–∑–∫–∏—Ö –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω—ã—Ö –∑–∞—Ç—Ä–∞—Ç–∞—Ö, –ø–æ–∑–≤–æ–ª—è—è –æ–±—É—á–∞—Ç—å –≤–µ–∫—Ç–æ—Ä—ã –¥–∞–∂–µ –Ω–∞ —Ç—Ä–∏–ª–ª–∏–æ–Ω–µ —Å–ª–æ–≤. –í–µ–∫—Ç–æ—Ä—ã –¥–æ—Å—Ç–∏–≥–∞—é—Ç state-of-the-art —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ –≤ –∑–∞–¥–∞—á–∞—Ö –æ—Ü–µ–Ω–∫–∏ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–æ–π –±–ª–∏–∑–æ—Å—Ç–∏ –∏ –Ω–∞—Ö–æ–¥—è—Ç –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–µ –≤ —Ä—è–¥–µ NLP-–∑–∞–¥–∞—á, –≤–∫–ª—é—á–∞—è –º–∞—à–∏–Ω–Ω—ã–π –ø–µ—Ä–µ–≤–æ–¥, –∞–Ω–∞–ª–∏–∑ —Ç–æ–Ω–∞–ª—å–Ω–æ—Å—Ç–∏ –∏ —Ä–∞—Å—à–∏—Ä–µ–Ω–∏–µ –∑–Ω–∞–Ω–∏–π –≤ –±–∞–∑–∞—Ö –¥–∞–Ω–Ω—ã—Ö. –†–∞–±–æ—Ç–∞ –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É–µ—Ç –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª –ø—Ä–æ—Å—Ç—ã—Ö –º–æ–¥–µ–ª–µ–π –∫–∞–∫ –±–∞–∑–æ–≤–æ–≥–æ —ç–ª–µ–º–µ–Ω—Ç–∞ –¥–ª—è –±—É–¥—É—â–∏—Ö –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–π –≤ NLP',
      '–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä –∏ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç–æ–≤ Retrieval-Augmented Generation (RAG) —Å–∏—Å—Ç–µ–º. –ê–≤—Ç–æ—Ä—ã –∏–∑—É—á–∞—é—Ç –≤–ª–∏—è–Ω–∏–µ —Ç–∞–∫–∏—Ö —Ñ–∞–∫—Ç–æ—Ä–æ–≤, –∫–∞–∫ —Ä–∞–∑–º–µ—Ä –º–æ–¥–µ–ª–∏, –¥–∏–∑–∞–π–Ω –ø—Ä–æ–º–ø—Ç–∞, —Ä–∞–∑–º–µ—Ä —á–∞–Ω–∫–æ–≤, –æ–±—ä—ë–º –±–∞–∑—ã –∑–Ω–∞–Ω–∏–π, —Å—Ç—Ä–∞—Ç–µ–≥–∏–∏ –∏–∑–≤–ª–µ—á–µ–Ω–∏—è, query expansion –∏ Contrastive In-Context Learning, –≤ —Ç–æ–º —á–∏—Å–ª–µ –≤ –º—É–ª—å—Ç–∏—è–∑—ã—á–Ω–æ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑—ã–≤–∞—é—Ç, –∫–∞–∫ –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏–∏ RAG –≤–ª–∏—è—é—Ç –Ω–∞ –∫–∞—á–µ—Å—Ç–≤–æ –æ—Ç–≤–µ—Ç–æ–≤, –ø—Ä–µ–¥–ª–∞–≥–∞—è –ø—Ä–∞–∫—Ç–∏—á–µ—Å–∫–∏–µ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏ –ø–æ —Å–æ–∑–¥–∞–Ω–∏—é —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö –∏ –∞–¥–∞–ø—Ç–∏—Ä—É–µ–º—ã—Ö RAG-—Å–∏—Å—Ç–µ–º –¥–ª—è —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –∑–∞–¥–∞—á. –ö–æ–¥ –∏ —Ä–µ–∞–ª–∏–∑–∞—Ü–∏–∏ –æ–ø—É–±–ª–∏–∫–æ–≤–∞–Ω—ã –≤ –æ—Ç–∫—Ä—ã—Ç–æ–º –¥–æ—Å—Ç—É–ø–µ.',
      '–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç LADDER ‚Äî —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞—é—â–∏–π —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏ —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π –∫ —Ä–µ—à–µ–Ω–∏—é –∑–∞–¥–∞—á —Å –ø–æ–º–æ—â—å—é —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–π –¥–µ–∫–æ–º–ø–æ–∑–∏—Ü–∏–∏, –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å –≤–µ—Ä–∏—Ñ–∏—Ü–∏—Ä—É–µ–º—ã–º–∏ –Ω–∞–≥—Ä–∞–¥–∞–º–∏ –∏ –µ–≥–æ —Ä–∞—Å—à–∏—Ä–µ–Ω–∏—è –Ω–∞ –∏–Ω—Ñ–µ—Ä–µ–Ω—Å ‚Äî TTRL. –ú–µ—Ç–æ–¥ –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª—è–º —Å–∞–º–æ—Å—Ç–æ—è—Ç–µ–ª—å–Ω–æ —É–ø—Ä–æ—â–∞—Ç—å —Å–ª–æ–∂–Ω—ã–µ –∑–∞–¥–∞—á–∏ –∏ –æ–±—É—á–∞—Ç—å—Å—è –Ω–∞ —ç—Ç–∏—Ö –≤–∞—Ä–∏–∞–Ω—Ç–∞—Ö, –¥–æ—Å—Ç–∏–≥–∞—è –ø—Ä–æ—Ä—ã–≤–Ω—ã—Ö —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ –≤ –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏—Ö –∑–∞–¥–∞—á–∞—Ö (–Ω–∞–ø—Ä–∏–º–µ—Ä, —Ä–æ—Å—Ç —Ç–æ—á–Ω–æ—Å—Ç–∏ —Å 1% –¥–æ 82% –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è –∏ 90% –Ω–∞ MIT Integration Bee). –†–∞–±–æ—Ç–∞ –ø–æ–¥—á—ë—Ä–∫–∏–≤–∞–µ—Ç —Ü–µ–Ω–Ω–æ—Å—Ç—å —Å—Ç—Ä–∞—Ç–µ–≥–∏—á–µ—Å–∫–æ–≥–æ —Å–∞–º–æ–æ–±—É—á–µ–Ω–∏—è, –ø—Ä–æ–≤–µ—Ä—è–µ–º—ã—Ö –Ω–∞–≥—Ä–∞–¥ –∏ —Ç–µ—Å—Ç–æ–≤–æ–≥–æ —Å–∞–º–æ—É–ª—É—á—à–µ–Ω–∏—è, –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É—è –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è –ø–æ–¥—Ö–æ–¥–∞ –≤ –¥—Ä—É–≥–∏—Ö –¥–æ–º–µ–Ω–∞—Ö, —Ç–∞–∫–∏—Ö –∫–∞–∫ —Å–∏–Ω—Ç–µ–∑ –ø—Ä–æ–≥—Ä–∞–º–º –∏ –∞–≤—Ç–æ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –¥–æ–∫–∞–∑–∞—Ç–µ–ª—å—Å—Ç–≤–æ —Ç–µ–æ—Ä–µ–º.',
      'Scalable-Softmax (SSMax) ‚Äî —ç—Ç–æ –Ω–æ–≤–∞—è –∑–∞–º–µ–Ω–∞ —Ñ—É–Ω–∫—Ü–∏–∏ Softmax –≤ —Å–ª–æ—è—Ö –≤–Ω–∏–º–∞–Ω–∏—è Transformer-–º–æ–¥–µ–ª–µ–π, —Ä–∞–∑—Ä–∞–±–æ—Ç–∞–Ω–Ω–∞—è –¥–ª—è —Ä–µ—à–µ–Ω–∏—è –ø—Ä–æ–±–ª–µ–º—ã "–∑–∞—Ç—É—Ö–∞–Ω–∏—è –≤–Ω–∏–º–∞–Ω–∏—è" –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –≤—Ö–æ–¥–Ω–æ–≥–æ –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞. SSMax —Å–æ—Ö—Ä–∞–Ω—è–µ—Ç —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏ —Ñ–æ–∫—É—Å–∏—Ä–æ–≤–∞—Ç—å—Å—è –Ω–∞ –∫–ª—é—á–µ–≤–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –¥–∞–∂–µ –ø—Ä–∏ –¥–ª–∏–Ω–Ω—ã—Ö –≤—Ö–æ–¥–∞—Ö, —É–ª—É—á—à–∞—è –æ–±–æ–±—â–∞—é—â—É—é —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç—å –ø–æ –¥–ª–∏–Ω–µ, —Å–∫–æ—Ä–æ—Å—Ç—å —Å—Ö–æ–¥–∏–º–æ—Å—Ç–∏ –Ω–∞ —ç—Ç–∞–ø–µ –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–∏—è –∏ —Ç–æ—á–Ω–æ—Å—Ç—å –∏–∑–≤–ª–µ—á–µ–Ω–∏—è –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏. SSMax –º–æ–∂–Ω–æ –≤–Ω–µ–¥—Ä—è—Ç—å –∫–∞–∫ —Å –Ω–∞—á–∞–ª–∞ –æ–±—É—á–µ–Ω–∏—è, —Ç–∞–∫ –∏ –Ω–∞ —É–∂–µ –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–Ω—ã–µ –º–æ–¥–µ–ª–∏, –ø–æ–ª—É—á–∞—è —Å—Ç–∞–±–∏–ª—å–Ω—ã–µ —É–ª—É—á—à–µ–Ω–∏—è. –†–∞–±–æ—Ç–∞ –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç, —á—Ç–æ SSMax ‚Äî –ø–µ—Ä—Å–ø–µ–∫—Ç–∏–≤–Ω—ã–π –∫–∞–Ω–¥–∏–¥–∞—Ç –¥–ª—è –∑–∞–º–µ–Ω—ã Softmax –≤ –±—É–¥—É—â–∏—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–æ–≤, –æ—Å–æ–±–µ–Ω–Ω–æ –≤ –∑–∞–¥–∞—á–∞—Ö —Å –¥–ª–∏–Ω–Ω—ã–º –∫–æ–Ω—Ç–µ–∫—Å—Ç–æ–º',
      '–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç FlashAttention ‚Äî IO-–æ—Ä–∏–µ–Ω—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã–π –∞–ª–≥–æ—Ä–∏—Ç–º —Ç–æ—á–Ω–æ–≥–æ –≤–Ω–∏–º–∞–Ω–∏—è, –æ–ø—Ç–∏–º–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–π –¥–ª—è GPU-–∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä. –û–Ω –∏—Å–ø–æ–ª—å–∑—É–µ—Ç —Ç–∞–π–ª–∏–Ω–≥, —á—Ç–æ–±—ã –º–∏–Ω–∏–º–∏–∑–∏—Ä–æ–≤–∞—Ç—å –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –æ–ø–µ—Ä–∞—Ü–∏–π —á—Ç–µ–Ω–∏—è –∏ –∑–∞–ø–∏—Å–∏ –º–µ–∂–¥—É –≤—ã—Å–æ–∫–æ—Å–∫–æ—Ä–æ—Å—Ç–Ω–æ–π –ø–∞–º—è—Ç—å—é (HBM) –∏ –ª–æ–∫–∞–ª—å–Ω–æ–π SRAM, —á—Ç–æ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É—Å–∫–æ—Ä—è–µ—Ç –æ–±—É—á–µ–Ω–∏–µ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–æ–≤. FlashAttention –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç —Ä–µ–∞–ª—å–Ω—ã–π –ø—Ä–∏—Ä–æ—Å—Ç —Å–∫–æ—Ä–æ—Å—Ç–∏ (–¥–æ 3√ó –Ω–∞ GPT-2, 15% –Ω–∞ BERT-large) –±–µ–∑ —É—â–µ—Ä–±–∞ –¥–ª—è –∫–∞—á–µ—Å—Ç–≤–∞ –º–æ–¥–µ–ª–∏. –†–∞—Å—à–∏—Ä–µ–Ω–Ω–∞—è –≤–µ—Ä—Å–∏—è —Å –±–ª–æ–∫-—Å–ø–∞—Ä—Å–Ω–æ—Å—Ç—å—é –æ–ø–µ—Ä–µ–∂–∞–µ—Ç —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–µ –ø—Ä–∏–±–ª–∏–∂—ë–Ω–Ω—ã–µ –º–µ—Ç–æ–¥—ã –ø–æ —Å–∫–æ—Ä–æ—Å—Ç–∏ –∏ –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª—è–º —Ä–∞–±–æ—Ç–∞—Ç—å —Å –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞–º–∏ –¥–æ 64K —Ç–æ–∫–µ–Ω–æ–≤, –≤–ø–µ—Ä–≤—ã–µ –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É—è –≤—ã—à–µ —Å–ª—É—á–∞–π–Ω–æ–≥–æ —É—Ä–æ–≤–Ω—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –Ω–∞ –∑–∞–¥–∞—á–∞—Ö Path-X –∏ Path-256. –ú–µ—Ç–æ–¥ –æ—Ç–∫—Ä—ã–≤–∞–µ—Ç –ø—É—Ç—å –∫ –±–æ–ª–µ–µ –¥–ª–∏–Ω–Ω—ã–º, –∫–∞—á–µ—Å—Ç–≤–µ–Ω–Ω—ã–º –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã–º —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–∞–º']

## T-Lite-8B

In [3]:
model_name = "t-bank-ai/T-lite-instruct-0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
quant_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quant_config, device_map="auto")

llm = pipeline("text-generation", model=model, tokenizer=tokenizer, return_full_text=False)
terminators = list({
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>") or tokenizer.eos_token_id,
    })

embedding_model = HuggingFaceBgeEmbeddings(
        model_name="BAAI/bge-m3",
        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
    )

reranker = FlagReranker("BAAI/bge-reranker-v2-m3", use_fp16=True)

vectorstore = Chroma(persist_directory="test_chroma_db", embedding_function=embedding_model)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0
  embedding_model = HuggingFaceBgeEmbeddings(
  vectorstore = Chroma(persist_directory="test_chroma_db", embedding_function=embedding_model)


In [None]:
SYSTEM_MSG = (
    "–¢—ã ‚Äî –ø–æ–º–æ—â–Ω–∏–∫ –ø–æ –Ω–∞—É—á–Ω—ã–º —Å—Ç–∞—Ç—å—è–º. "
    "–°—Ñ–æ—Ä–º—É–ª–∏—Ä—É–π –∫—Ä–∞—Ç–∫–æ–µ –∏ —Ç–æ—á–Ω–æ–µ —Ä–µ–∑—é–º–µ –ø–æ —Ç–µ–∫—Å—Ç—É –Ω–∏–∂–µ."
)

def build_messages(abstract, conclusion) -> list[dict]:
    full_text = f"Abstract:\n{abstract}\n\nConclusion:\n{conclusion}"

    return [
        {"role": "system", "content": SYSTEM_MSG},
        {"role": "user", "content": full_text},
        {"role": "assistant", "content": "–ö—Ä–∞—Ç–∫–æ–µ —Ä–µ–∑—é–º–µ:"},
    ]

In [7]:
outputs = []
times = []
for arxiv_url in tqdm(urls):
    arxiv_id = extract_arxiv_id(arxiv_url)
    
    pdf_path = f'./articles/{arxiv_id}.pdf'
    pdf_path, title = download_pdf(arxiv_id, pdf_path)
    md_raw = pymupdf4llm.to_markdown(pdf_path)
    md_trimmed = trim_markdown_after_section(md_raw)
    md_cleaned = clean_markdown_for_rag(md_trimmed)
    abstract = extract_section(md_cleaned, "abstract", ["absctract", "summary"])
    conclusion = extract_section(md_cleaned, "conclusion", ["conclusions", "closing remarks"])
    
    messages = build_messages(abstract, conclusion)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, eos_token_id=terminators)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: {title}\n')
    print(f'–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:19<02:54, 19.35s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: TTRL: Test-Time Reinforcement Learning

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforcement Learning ( **TTRL** ), –∫–æ—Ç–æ—Ä—ã–π –∏—Å–ø–æ–ª—å–∑—É–µ—Ç Reinforcement Learning –¥–ª—è —É–ª—É—á—à–µ–Ω–∏—è –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLMs) –Ω–∞ –æ—Å–Ω–æ–≤–µ –¥–∞–Ω–Ω—ã—Ö –±–µ–∑ –º–µ—Ç–æ–∫. –û—Å–Ω–æ–≤–Ω–∞—è –∑–∞–¥–∞—á–∞ ‚Äî —Å–æ–∑–¥–∞–Ω–∏–µ –ø–æ–¥—Ö–æ–¥—è—â–∏—Ö –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏–π –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –Ω–∞ –æ—Å–Ω–æ–≤–µ –∫–æ–Ω—Å–µ–Ω—Å—É—Å–∞ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –º–æ–¥–µ–ª–∏. **TTRL** –ø—Ä–∏–º–µ–Ω—è–µ—Ç –º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏–π, —á—Ç–æ –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω—ã–µ —É–ª—É—á—à–µ–Ω–∏—è –≤ —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –∑–∞–¥–∞—á–∞—Ö –∏ –º–æ–¥–µ–ª—è—Ö. –í —á–∞—Å—Ç–Ω–æ—Å—Ç–∏, –Ω–∞ —Ç–µ—Å—Ç–µ AIME 2024 **TTRL** —É–≤–µ–ª–∏—á–∏–≤–∞–µ—Ç —Ç–æ—á–Ω–æ—Å—Ç—å –Ω–∞ 159% –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏—Å—Ö–æ–¥–Ω–æ–π –º–æ–¥

 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:41<02:49, 21.17s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Attention Is All You Need

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –Ω–æ–≤—ã–π –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–π –ø–æ–¥—Ö–æ–¥ –∫ –º–æ–¥–µ–ª–∏—Ä–æ–≤–∞–Ω–∏—é –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–µ–π ‚Äî Transformer, –∫–æ—Ç–æ—Ä—ã–π –ø–æ–ª–Ω–æ—Å—Ç—å—é –æ—Å–Ω–æ–≤–∞–Ω –Ω–∞ –º–µ—Ö–∞–Ω–∏–∑–º–∞—Ö –≤–Ω–∏–º–∞–Ω–∏—è, –æ—Ç–∫–∞–∑—ã–≤–∞—è—Å—å –æ—Ç —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã—Ö –∏ —Å–≤—ë—Ä—Ç–æ—á–Ω—ã—Ö —Å–µ—Ç–µ–π. –í —Ö–æ–¥–µ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–æ–≤ –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –º–∞—à–∏–Ω–Ω–æ–≥–æ –ø–µ—Ä–µ–≤–æ–¥–∞ –Ω–∞ –¥–∞–Ω–Ω—ã—Ö WMT 2014, Transformer –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ –≤ –∫–∞—á–µ—Å—Ç–≤–µ –ø–µ—Ä–µ–≤–æ–¥–∞ –∏ —Å–∫–æ—Ä–æ—Å—Ç–∏ –æ–±—É—á–µ–Ω–∏—è –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã–º–∏ –º–æ–¥–µ–ª—è–º–∏. –ù–∞ –∞–Ω–≥–ª–æ-–Ω–µ–º–µ—Ü–∫–æ–º –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–∏–∏ –º–æ–¥–µ–ª—å –¥–æ—Å—Ç–∏–≥–∞–µ—Ç 28.4 BLEU, –ø—Ä–µ–≤–æ—Å—Ö–æ–¥—è –ø—Ä–µ–¥—ã–¥—É—â–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã, –∞ –Ω–∞ –∞–Ω–≥–ª–æ-—Ñ—Ä–∞–Ω—Ü—É–∑—Å–∫–æ–º ‚Äî 41

 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:53<01:59, 17.05s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Recommender systems: when memory matters

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á–Ω–æ–π –ø–∞–º—è—Ç–∏ –Ω–∞ –æ–±—É—á–µ–Ω–∏–µ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω—ã—Ö —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ç–µ–ª—å–Ω—ã—Ö —Å–∏—Å—Ç–µ–º, –∏—Å–ø–æ–ª—å–∑—É—è –Ω–µ—è–≤–Ω—ã–µ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å—Å–∫–∏–µ –¥–∞–Ω–Ω—ã–µ. –ü—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è –æ–Ω–ª–∞–π–Ω-–∞–ª–≥–æ—Ä–∏—Ç–º, –æ–±–Ω–æ–≤–ª—è—é—â–∏–π –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –º–æ–¥–µ–ª–∏ –¥–ª—è –∫–∞–∂–¥–æ–≥–æ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è –Ω–∞ –æ—Å–Ω–æ–≤–µ –±–ª–æ–∫–æ–≤ –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏–π. –≠–º–ø–∏—Ä–∏—á–µ—Å–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –ø–æ–∫–∞–∑—ã–≤–∞—é—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ –º–µ—Ç—Ä–∏–∫ MAP –∏ NDCG –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ –Ω–∞ –æ—Ç—Ñ–∏–ª—å—Ç—Ä–æ–≤–∞–Ω–Ω–æ–º –ø–æ–¥–º–Ω–æ–∂–µ—Å—Ç–≤–µ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π —Å –≤—ã—Å–æ–∫–æ–π —Å—Ç–µ–ø–µ–Ω—å—é –¥–æ–ª–≥–æ—Å—Ä–æ—á–Ω–æ–π –ø–∞–º—è—Ç–∏ –≤ –∏—Ö –ø–æ–≤–µ–¥–µ–Ω–∏–∏, —á—Ç–æ –ø–æ–¥—Ç–≤–µ—Ä–∂–¥–∞–µ—Ç —ç—Ñ—Ñ–µ–∫—Ç

 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [01:12<01:45, 17.65s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Large Language Models Meet NLP: A Survey

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  

–ò—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –ø–æ—Å–≤—è—â–µ–Ω–æ –∞–Ω–∞–ª–∏–∑—É –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLM) –≤ –∑–∞–¥–∞—á–∞—Ö –æ–±—Ä–∞–±–æ—Ç–∫–∏ –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ–≥–æ —è–∑—ã–∫–∞ (NLP). –û–Ω–æ –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–æ –Ω–∞ —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∏–∑—É—á–µ–Ω–∏–µ —Ç–µ–∫—É—â–∏—Ö –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–π –∏ –ø–µ—Ä—Å–ø–µ–∫—Ç–∏–≤ LLM –≤ —ç—Ç–æ–π –æ–±–ª–∞—Å—Ç–∏. –û—Å–Ω–æ–≤–Ω—ã–µ –≤–æ–ø—Ä–æ—Å—ã –≤–∫–ª—é—á–∞—é—Ç: –∫–∞–∫ –º–æ–¥–µ–ª–∏ –ø—Ä–∏–º–µ–Ω—è—é—Ç—Å—è –≤ NLP, —Ä–µ—à–µ–Ω—ã –ª–∏ —É–∂–µ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã–µ –∑–∞–¥–∞—á–∏ —Å –∏—Ö –ø–æ–º–æ—â—å—é, –∏ –∫–∞–∫–∏–µ –≤–æ–∑–º–æ–∂–Ω–æ—Å—Ç–∏ –æ—Ç–∫—Ä—ã–≤–∞—é—Ç—Å—è –¥–ª—è –±—É–¥—É—â–µ–≥–æ. –í —Ä–∞–±–æ—Ç–µ –≤–≤–æ–¥–∏—Ç—Å—è –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏—è –º–µ—Ç–æ–¥–æ–≤: –∑–∞–º–æ—Ä–æ–∂–µ–Ω–Ω—ã–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏ –Ω–∞—Å—Ç—Ä–∞–∏–≤–∞–µ–º—ã–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã, —á—Ç–æ –ø–æ–º–æ–≥–∞–µ—Ç —Å—Ç—Ä—É–∫—Ç—É—Ä–∏—Ä–æ–≤–∞—Ç—å —Å—É—â

 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [01:30<01:28, 17.73s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –º–æ–¥–µ–ª–∏, —Å–æ—á–µ—Ç–∞—é—â–∏–µ –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫—É—é –∏ –Ω–µ–ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫—É—é –ø–∞–º—è—Ç—å –¥–ª—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ —Ç–µ–∫—Å—Ç–∞ (RAG). –ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥–ª–∞–≥–∞—é—Ç –¥–≤–∞ –≤–∞—Ä–∏–∞–Ω—Ç–∞ RAG, –≥–¥–µ –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫–∞—è –ø–∞–º—è—Ç—å ‚Äî —ç—Ç–æ –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–Ω–∞—è seq2seq –º–æ–¥–µ–ª—å, –∞ –Ω–µ–ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫–∞—è ‚Äî –ø–ª–æ—Ç–Ω—ã–π –∏–Ω–¥–µ–∫—Å –í–∏–∫–∏–ø–µ–¥–∏–∏, –¥–æ—Å—Ç—É–ø–Ω—ã–π —á–µ—Ä–µ–∑ –Ω–µ–π—Ä–æ–Ω–Ω—ã–π —Ä–µ—Ç—Ä–∏–≤–µ—Ä. –ú–æ–¥–µ–ª–∏ –ø–æ–∫–∞–∑—ã–≤–∞—é—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ –Ω–∞ –∑–∞–¥–∞—á–∞—Ö, —Ç—Ä–µ–±—É—é—â–∏—Ö –¥–æ—Å—Ç—É–ø–∞ –∫ –∑–Ω–∞–Ω–∏—è–º, –≤–∫–ª—é—á–∞—è –∑–∞–¥–∞—á–∏ –≤–æ–ø—Ä–æ—Å-–æ—Ç–≤–µ—Ç–∞, –ø—Ä–µ–≤–æ—Å—Ö–æ–¥—è –∫–∞–∫ –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫–∏–µ, —Ç–∞–∫ –∏ —Å–ø–µ—Ü–∏–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –º–æ–¥–µ–ª–∏. –î–ª—è 

 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [01:45<01:07, 16.91s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Efficient Estimation of Word Representations in Vector Space

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö —Ä–µ—à–µ–Ω–∏—è –¥–ª—è —Å–æ–∑–¥–∞–Ω–∏—è –Ω–µ–ø—Ä–µ—Ä—ã–≤–Ω—ã—Ö –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤ –Ω–∞ –æ—Å–Ω–æ–≤–µ –±–æ–ª—å—à–∏—Ö –¥–∞–Ω–Ω—ã—Ö. –≠—Ç–∏ –º–æ–¥–µ–ª–∏ –ø–æ–∫–∞–∑—ã–≤–∞—é—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ —Ç–æ—á–Ω–æ—Å—Ç–∏ –≤ –∑–∞–¥–∞—á–∞—Ö –∏–∑–º–µ—Ä–µ–Ω–∏—è —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–æ–π –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–æ–π –±–ª–∏–∑–æ—Å—Ç–∏ —Å–ª–æ–≤ –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–∏–º–∏ –º–µ—Ç–æ–¥–∞–º–∏, –≤–∫–ª—é—á–∞—è –Ω–µ–π—Ä–æ–Ω–Ω—ã–µ —Å–µ—Ç–∏. –í–µ–∫—Ç–æ—Ä—ã –æ–±—É—á–∞—é—Ç—Å—è –±—ã—Å—Ç—Ä–µ–µ –∏ —Å –º–µ–Ω—å—à–∏–º–∏ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω—ã–º–∏ –∑–∞—Ç—Ä–∞—Ç–∞–º–∏, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –¥–∞–Ω–Ω—ã–µ –Ω–∞ –ø–æ—Ä—è–¥–∫–∏ –±–æ–ª—å—à–∏–µ, —á–µ–º —Ä–∞–Ω–µ–µ. –í —Å—Ç–∞—Ç—å–µ —Ç–∞–∫–∂–µ –æ–±—Å—É–∂–¥–∞—é—Ç—Å—è –ø

 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [02:03<00:51, 17.33s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Enhancing Retrieval-Augmented Generation: A Study of Best Practices

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  

–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –∏ –∞–Ω–∞–ª–∏–∑–∏—Ä—É—é—Ç—Å—è —Ä–∞–∑–ª–∏—á–Ω—ã–µ –∞—Å–ø–µ–∫—Ç—ã –∏ –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏–∏ —Å–∏—Å—Ç–µ–º Retrieval-Augmented Generation (RAG), –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–Ω—ã–µ –Ω–∞ —É–ª—É—á—à–µ–Ω–∏–µ –∏—Ö –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –∏ –∞–¥–∞–ø—Ç–∏–≤–Ω–æ—Å—Ç–∏. –ò—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –æ—Ö–≤–∞—Ç—ã–≤–∞–µ—Ç –≤–ª–∏—è–Ω–∏–µ —Ç–∞–∫–∏—Ö —Ñ–∞–∫—Ç–æ—Ä–æ–≤, –∫–∞–∫ —Ä–∞–∑–º–µ—Ä —è–∑—ã–∫–æ–≤–æ–π –º–æ–¥–µ–ª–∏, —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ –∑–∞–ø—Ä–æ—Å–æ–≤, —Ä–∞–∑–º–µ—Ä—ã –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –∏ –±–∞–∑ –∑–Ω–∞–Ω–∏–π, –º–µ—Ç–æ–¥—ã –∑–∞–ø—Ä–æ—Å–Ω–æ–≥–æ —Ä–∞—Å—à–∏—Ä–µ–Ω–∏—è, –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –∏ –º—É–ª—å—Ç–∏—è–∑—ã—á–Ω—ã–µ –±–∞–∑—ã –¥–∞–Ω–Ω—ã—Ö. –í —Ö–æ–¥–µ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–æ–≤ –≤—ã—è–≤–ª–µ–Ω—ã –æ–ø—Ç–∏–º–∞–ª—å–Ω—ã–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –¥–ª—è –ø–æ–≤—ã—à–µ–Ω–∏—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –∏ –∫–æ–Ω—Ç–µ–∫—Å—Ç—É–∞–ª—å–Ω–æ–π

 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [02:19<00:33, 16.71s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: LADDER: Self-Improving LLMs Through Recursive Problem Decomposition

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω LADDER ‚Äî –º–µ—Ç–æ–¥, —É–ª—É—á—à–∞—é—â–∏–π —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–µ–π —Ä–µ—à–∞—Ç—å –∑–∞–¥–∞—á–∏ —á–µ—Ä–µ–∑ —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–µ —Ä–∞–∑–ª–æ–∂–µ–Ω–∏–µ –ø—Ä–æ–±–ª–µ–º –∏ –æ–±—É—á–µ–Ω–∏–µ —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–µ—Ä–∏—Ñ–∏—Ü–∏—Ä—É–µ–º—ã—Ö –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏–π. –í–∫–ª—é—á–µ–Ω–∏–µ —Ç–µ—Å—Ç–æ–≤–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è (TTRL) –ø–æ–∑–≤–æ–ª–∏–ª–æ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –ø–æ–≤—ã—Å–∏—Ç—å —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å –≤ —Ä–µ—à–µ–Ω–∏–∏ –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏—Ö –∑–∞–¥–∞—á, —Ç–∞–∫–∏—Ö –∫–∞–∫ –∏–Ω—Ç–µ–≥—Ä–∞—Ü–∏—è, –¥–æ—Å—Ç–∏–≥–Ω—É–≤ 90% —Ç–æ—á–Ω–æ—Å—Ç–∏ –Ω–∞ MIT Integration Bee. –†–∞–±–æ—Ç–∞ –ø–æ–¥—á–µ—Ä–∫–∏–≤–∞–µ—Ç –≤–∞–∂–Ω–æ—Å—Ç—å —Å—Ç—Ä–∞—Ç–µ–≥–∏—á–µ—Å–∫–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è –∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –ø—Ä–æ–≤–µ—Ä—è–µ–º—ã—Ö –æ–±—Ä–∞—Ç–Ω—ã—Ö —Å–≤—è–∑–µ–π –¥–ª—è –ø–æ–≤—ã—à–µ–Ω–∏—è –ø—Ä

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [02:41<00:18, 18.44s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Scalable-Softmax Is Superior for Attention

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Scalable-Softmax (SSMax) ‚Äî –∞–ª—å—Ç–µ—Ä–Ω–∞—Ç–∏–≤–Ω—ã–π –ø–æ–¥—Ö–æ–¥ –∫ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—é —Ñ—É–Ω–∫—Ü–∏–∏ Softmax –≤ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–Ω—ã—Ö –º–æ–¥–µ–ª—è—Ö –¥–ª—è —É–ª—É—á—à–µ–Ω–∏—è —Ä–∞–±–æ—Ç—ã —Å –¥–ª–∏–Ω–Ω—ã–º–∏ –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞–º–∏ –∏ –ø–æ–≤—ã—à–µ–Ω–∏—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ –≤–Ω–∏–º–∞–Ω–∏—è. SSMax —Ä–µ—à–∞–µ—Ç –ø—Ä–æ–±–ª–µ–º—É "–∑–∞—Ç—É—Ö–∞–Ω–∏—è" –≤–Ω–∏–º–∞–Ω–∏—è, –≤–æ–∑–Ω–∏–∫–∞—é—â–µ–≥–æ –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –≤—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö, –∏ —É–ª—É—á—à–∞–µ—Ç —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏ —É–¥–µ—Ä–∂–∏–≤–∞—Ç—å –≤–Ω–∏–º–∞–Ω–∏–µ –Ω–∞ –∫–ª—é—á–µ–≤—ã—Ö —ç–ª–µ–º–µ–Ω—Ç–∞—Ö –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏. 

### –û—Å–Ω–æ–≤–Ω—ã–µ –≤—ã–≤–æ–¥—ã:
1. **–ü—Ä–æ–±–ª–µ–º–∞ Softmax**: –ú–∞–∫—Å–∏–º–∞–ª—å–Ω—ã–π —ç–ª–µ–º–µ–Ω—Ç Softmax —É–º–µ–Ω—å—à–∞–µ—Ç—Å—è —Å —Ä–æ—Å—Ç–æ–º —Ä–∞–∑–º–µ—Ä–∞ –≤—Ö–æ–¥–Ω–æ–≥–æ –≤–µ–∫—Ç–æ—Ä–∞, —á—Ç–

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [03:06<00:00, 18.65s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  
FlashAttention ‚Äî —ç—Ç–æ –Ω–æ–≤—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ä–µ–∞–ª–∏–∑–∞—Ü–∏–∏ –≤–Ω–∏–º–∞–Ω–∏—è –≤ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–∞—Ö, –∫–æ—Ç–æ—Ä—ã–π —Ä–µ—à–∞–µ—Ç –ø—Ä–æ–±–ª–µ–º—É –≤—ã—Å–æ–∫–æ–π –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π –∏ –ø–∞–º—è—Ç–∏-–∑–∞—Ç—Ä–∞—Ç–Ω–æ—Å—Ç–∏ –Ω–∞ –¥–ª–∏–Ω–Ω—ã—Ö –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç—è—Ö. –í –æ—Ç–ª–∏—á–∏–µ –æ—Ç —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö –º–µ—Ç–æ–¥–æ–≤, FlashAttention —É—á–∏—Ç—ã–≤–∞–µ—Ç –æ–ø–µ—Ä–∞—Ü–∏–∏ –≤–≤–æ–¥–∞-–≤—ã–≤–æ–¥–∞ (IO) –º–µ–∂–¥—É —É—Ä–æ–≤–Ω—è–º–∏ –ø–∞–º—è—Ç–∏ GPU, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É–º–µ–Ω—å—à–∏—Ç—å –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –æ–±—Ä–∞—â–µ–Ω–∏–π –∫ –≤—ã—Å–æ–∫–æ—Å–∫–æ—Ä–æ—Å—Ç–Ω–æ–π –ø–∞–º—è—Ç–∏ (HBM) –∏ —É–ª—É—á—à–∏—Ç—å –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å. 

–û—Å–Ω–æ–≤–Ω—ã–µ –¥–æ—Å—Ç–∏–∂–µ–Ω–∏—è:
- FlashAttention –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç –¥–æ 15% —É—Å–∫–æ—Ä–µ–Ω–∏—è –≤ —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–µ BERT-lar




In [41]:
times

[14.314668655395508,
 17.953752756118774,
 9.457916498184204,
 13.708330154418945,
 13.110188961029053,
 12.668460130691528,
 15.162318706512451,
 12.614119529724121,
 19.203843116760254,
 18.3269145488739]

In [49]:
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ –º–æ–¥–µ–ª–∏ –¢–±–∞–Ω–∫: {np.mean(times):.0f} —Å–µ–∫')

–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ –º–æ–¥–µ–ª–∏ –¢–±–∞–Ω–∫: 15 —Å–µ–∫


In [15]:
df_summarization = pd.DataFrame()

df_summarization['gt'] = gt
df_summarization['Tbank'] = outputs

In [18]:
df_summarization.to_csv('summarize_examples.csv', index=False)

In [19]:
df_summarization

Unnamed: 0,gt,Tbank
0,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Re...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforc...
1,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Transformer ‚Äî –ø–µ—Ä–≤–∞—è –∞—Ä—Ö–∏...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –Ω–æ–≤—ã–π –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–π –ø–æ–¥—Ö...
2,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è –ø–æ–¥—Ö–æ–¥ –∫ –æ–±—É—á–µ–Ω–∏—é —Ä–µ–∫–æ–º–µ...,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á...
3,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –æ–±–∑...,\n\n–ò—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –ø–æ—Å–≤—è—â–µ–Ω–æ –∞–Ω–∞–ª–∏–∑—É –∏—Å–ø–æ–ª—å–∑–æ–≤–∞...
4,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –æ–±—â–∏–π –ø–æ–¥—Ö–æ–¥ –∫ –¥–æ–æ–±—É—á–µ–Ω–∏—é ...,"–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –º–æ–¥–µ–ª–∏, —Å–æ—á–µ—Ç–∞—é—â–∏–µ –ø..."
5,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –¥–≤–∞ –ø—Ä–æ—Å—Ç—ã—Ö –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö...
6,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤...,\n\n–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –∏ –∞–Ω–∞–ª–∏–∑–∏—Ä—É—é—Ç—Å—è ...
7,"–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç LADDER ‚Äî —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞...","–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω LADDER ‚Äî –º–µ—Ç–æ–¥, —É–ª—É—á—à–∞—é—â..."
8,Scalable-Softmax (SSMax) ‚Äî —ç—Ç–æ –Ω–æ–≤–∞—è –∑–∞–º–µ–Ω–∞ —Ñ—É...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Scalable-Softmax (SSMax)...
9,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç FlashAttention ‚Äî IO-–æ—Ä–∏–µ–Ω—Ç...,\nFlashAttention ‚Äî —ç—Ç–æ –Ω–æ–≤—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ä–µ–∞–ª–∏–∑–∞...


## YandexGPT5-lite 8B

In [4]:
model_name = "yandex/YandexGPT-5-Lite-8B-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
quant_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quant_config, device_map="auto")

llm = pipeline("text-generation", model=model, tokenizer=tokenizer, return_full_text=False)

embedding_model = HuggingFaceBgeEmbeddings(
        model_name="BAAI/bge-m3",
        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
    )

reranker = FlagReranker("BAAI/bge-reranker-v2-m3", use_fp16=True)

vectorstore = Chroma(persist_directory="test_chroma_db", embedding_function=embedding_model)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0
  embedding_model = HuggingFaceBgeEmbeddings(
  vectorstore = Chroma(persist_directory="test_chroma_db", embedding_function=embedding_model)


In [8]:
SYSTEM_MSG = (
    "–¢—ã ‚Äî –ø–æ–º–æ—â–Ω–∏–∫ –ø–æ –Ω–∞—É—á–Ω—ã–º —Å—Ç–∞—Ç—å—è–º. "
    "–°—Ñ–æ—Ä–º—É–ª–∏—Ä—É–π –∫—Ä–∞—Ç–∫–æ–µ –∏ —Ç–æ—á–Ω–æ–µ —Ä–µ–∑—é–º–µ –ø–æ —Ç–µ–∫—Å—Ç—É –Ω–∏–∂–µ. –û—Ç–≤–µ—á–∞–π —Å—Ç—Ä–æ–≥–æ –Ω–∞ —Ä—É—Å—Å–∫–æ–º —è–∑—ã–∫–µ"
)

def build_messages(abstract, conclusion) -> list[dict]:
    full_text = f"Abstract:\n{abstract}\n\nConclusion:\n{conclusion}"

    return [
        {"role": "user", "content": SYSTEM_MSG},
        {"role": "user", "content": full_text}
    ]

In [9]:
outputs = []
times = []
for arxiv_url in tqdm(urls):
    arxiv_id = extract_arxiv_id(arxiv_url)
    
    pdf_path = f'./articles/{arxiv_id}.pdf'
    pdf_path, title = download_pdf(arxiv_id, pdf_path)
    md_raw = pymupdf4llm.to_markdown(pdf_path)
    md_trimmed = trim_markdown_after_section(md_raw)
    md_cleaned = clean_markdown_for_rag(md_trimmed)
    abstract = extract_section(md_cleaned, "abstract", ["absctract", "summary"])
    conclusion = extract_section(md_cleaned, "conclusion", ["conclusions", "closing remarks"])
    
    messages = build_messages(abstract, conclusion)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False)[0]['generated_text']
    
    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: {title}\n')
    print(f'–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: {answer}\n\n')
    outputs.append(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:20<03:06, 20.71s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: TTRL: Test-Time Reinforcement Learning

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforcement Learning (TTRL) –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLM) —Å –ø–æ–º–æ—â—å—é RL –Ω–∞ –Ω–µ–º–∞—Ä–∫–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö. TTRL –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω—ã–µ –∑–Ω–∞–Ω–∏—è –∏–∑ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—É—á–µ–Ω–Ω—ã—Ö –º–æ–¥–µ–ª–µ–π –∏ –ø–æ–∑–≤–æ–ª—è–µ—Ç –∏–º ¬´—Å–∞–º–æ—ç–≤–æ–ª—é—Ü–∏–æ–Ω–∏—Ä–æ–≤–∞—Ç—å¬ª.

–≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ TTRL –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –º–æ–¥–µ–ª–µ–π –∏ –∑–∞–¥–∞—á. –ù–∞–ø—Ä–∏–º–µ—Ä, TTRL –ø–æ–≤—ã—Å–∏–ª –ø–æ–∫–∞–∑–∞—Ç–µ–ª—å pass@1 –º–æ–¥–µ–ª–∏ Qwen-2.5-Math-7B –ø—Ä–∏–º–µ—Ä–Ω–æ –Ω–∞ 159% –Ω–∞ AIME 2024, –∏—Å–ø–æ–ª—å–∑—É—è —Ç–æ–ª—å–∫–æ –Ω–µ–º–∞—Ä–∫–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ —Ç–µ—Å—Ç–æ–≤—ã–µ –¥–∞–Ω–Ω—ã–µ.

–ú–µ—Ç–æ–¥ TTRL –¥–µ–º–æ–Ω—Å—

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:44<02:58, 22.29s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Attention Is All You Need

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∞ –Ω–æ–≤–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –Ω–µ–π—Ä–æ–Ω–Ω–æ–π —Å–µ—Ç–∏ ‚Äî Transformer, –æ—Å–Ω–æ–≤–∞–Ω–Ω–∞—è –∏—Å–∫–ª—é—á–∏—Ç–µ–ª—å–Ω–æ –Ω–∞ –º–µ—Ö–∞–Ω–∏–∑–º–∞—Ö –≤–Ω–∏–º–∞–Ω–∏—è, –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã—Ö –∏ —Å–≤—ë—Ä—Ç–æ—á–Ω—ã—Ö —Å–ª–æ—ë–≤. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ –º–∞—à–∏–Ω–Ω–æ–º—É –ø–µ—Ä–µ–≤–æ–¥—É –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ –º–æ–¥–µ–ª–∏ –Ω–∞ –æ—Å–Ω–æ–≤–µ Transformer –ø—Ä–µ–≤–æ—Å—Ö–æ–¥—è—Ç –ø–æ –∫–∞—á–µ—Å—Ç–≤—É —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–µ –º–æ–¥–µ–ª–∏, –ø—Ä–∏ —ç—Ç–æ–º –æ–Ω–∏ –±–æ–ª–µ–µ –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑—É–µ–º—ã –∏ —Ç—Ä–µ–±—É—é—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –º–µ–Ω—å—à–µ –≤—Ä–µ–º–µ–Ω–∏ –Ω–∞ –æ–±—É—á–µ–Ω–∏–µ.

–ù–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ Transformer –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É–µ—Ç –ª—É—á—à–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –¥—Ä—É–≥–∏–º–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏, –æ–±—É—á–∞—è—Å—å –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –±—ã—Å—Ç—Ä–µ–µ

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:54<01:56, 16.68s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Recommender systems: when memory matters

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –∏–∑—É—á–∞–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ–≤—Ä–µ–º–µ–Ω–Ω–æ–π –ø–∞–º—è—Ç–∏ –Ω–∞ –æ–±—É—á–∞–µ–º–æ—Å—Ç—å –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ–π —Å–∏—Å—Ç–µ–º—ã —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–π —Å —É—á—ë—Ç–æ–º –Ω–µ—è–≤–Ω–æ–π –æ–±—Ä–∞—Ç–Ω–æ–π —Å–≤—è–∑–∏ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π. –ü—Ä–µ–¥–ª–æ–∂–µ–Ω –æ–Ω–ª–∞–π–Ω-–∞–ª–≥–æ—Ä–∏—Ç–º, –æ–±–Ω–æ–≤–ª—è—é—â–∏–π –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –º–æ–¥–µ–ª–∏ –ø–æ–±–ª–æ—á–Ω–æ. –≠–º–ø–∏—Ä–∏—á–µ—Å–∫–∏–µ –æ—Ü–µ–Ω–∫–∏ –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ —Ñ–∏–ª—å—Ç—Ä–∞—Ü–∏—è –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –ø–æ —Å—Ç–µ–ø–µ–Ω–∏ –¥–æ–ª–≥–æ–≤—Ä–µ–º–µ–Ω–Ω–æ–π –ø–∞–º—è—Ç–∏ –≤ –∏—Ö –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏—è—Ö —Å —Å–∏—Å—Ç–µ–º–æ–π —Å—É—â–µ—Å—Ç–≤–µ–Ω–Ω–æ —É–ª—É—á—à–∞–µ—Ç –ø–æ–∫–∞–∑–∞—Ç–µ–ª–∏ MAP –∏ NDCG –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ –∫—Ä—É–ø–Ω–æ–º–∞—Å—à—Ç–∞–±–Ω—ã—Ö —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ç–µ–ª—å–Ω—ã—Ö —Å–∏—Å—Ç–µ–º.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [01:07<01:33, 15.56s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Large Language Models Meet NLP: A Survey

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Å–∏—Å—Ç–µ–º–Ω—ã–π –æ–±–∑–æ—Ä –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLM) –≤ –∑–∞–¥–∞—á–∞—Ö –æ–±—Ä–∞–±–æ—Ç–∫–∏ –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ–≥–æ —è–∑—ã–∫–∞ (NLP). –ê–≤—Ç–æ—Ä—ã –≤–≤–æ–¥—è—Ç —É–Ω–∏—Ñ–∏—Ü–∏—Ä–æ–≤–∞–Ω–Ω—É—é —Ç–∞–∫—Å–æ–Ω–æ–º–∏—é, –≤–∫–ª—é—á–∞—é—â—É—é –ø—Ä–∏–ª–æ–∂–µ–Ω–∏—è —Å –∑–∞–º–æ—Ä–æ–∂–µ–Ω–Ω—ã–º–∏ –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º–∏ –∏ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏—è —Å –Ω–∞—Å—Ç—Ä–æ–π–∫–æ–π –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤. 

–¶–µ–ª—å —Ä–∞–±–æ—Ç—ã ‚Äî –æ—Ç–≤–µ—Ç–∏—Ç—å –Ω–∞ –≤–æ–ø—Ä–æ—Å—ã –æ —Ç–µ–∫—É—â–µ–º –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–∏ LLM –≤ NLP, —Ä–µ—à–µ–Ω–∏–∏ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö –∑–∞–¥–∞—á NLP —Å –ø–æ–º–æ—â—å—é LLM –∏ –ø–µ—Ä—Å–ø–µ–∫—Ç–∏–≤–∞—Ö —Ä–∞–∑–≤–∏—Ç–∏—è LLM –≤ —ç—Ç–æ–π –æ–±–ª–∞—Å—Ç–∏. –¢–∞–∫–∂–µ –≤—ã–¥–µ–ª–µ–Ω—ã –Ω–æ–≤—ã–µ –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–∏—è –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–π –∏ —Å–≤—è–∑–∞–Ω–Ω—ã–µ —Å –Ω–∏–º–∏ –≤—ã–∑–æ–≤—ã. –°—Ç–∞—Ç—å—è –º–æ–∂–µ—Ç —Å–ª—É–∂–∏—Ç—å –ø—Ä–∞–∫

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [01:23<01:17, 15.59s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞–µ—Ç—Å—è –ø—Ä–æ–±–ª–µ–º–∞ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω–æ–≥–æ –¥–æ—Å—Ç—É–ø–∞ –±–æ–ª—å—à–∏—Ö –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—É—á–µ–Ω–Ω—ã—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π –∫ –∑–Ω–∞–Ω–∏—è–º –∏ –∏—Ö –º–∞–Ω–∏–ø—É–ª–∏—Ä–æ–≤–∞–Ω–∏—è –∏–º–∏. –ü—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –æ–±—â–∏–π —Ä–µ—Ü–µ–ø—Ç —Ç–æ–Ω–∫–æ–π –Ω–∞—Å—Ç—Ä–æ–π–∫–∏ –¥–ª—è –º–æ–¥–µ–ª–µ–π –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ —Å –¥–æ–ø–æ–ª–Ω–µ–Ω–∏–µ–º –Ω–∞ –æ—Å–Ω–æ–≤–µ –ø–æ–∏—Å–∫–∞ (RAG), –∫–æ—Ç–æ—Ä—ã–µ —Å–æ—á–µ—Ç–∞—é—Ç –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—É—á–µ–Ω–Ω—É—é –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫—É—é –∏ –Ω–µ–ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫—É—é –ø–∞–º—è—Ç—å. –ú–æ–¥–µ–ª–∏ RAG —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–ª–∏—Å—å –Ω–∞ —à–∏—Ä–æ–∫–æ–º —Å–ø–µ–∫—Ç—Ä–µ –∑–∞–¥–∞—á, —Ç—Ä–µ–±—É—é—â–∏—Ö –≥–ª—É–±–æ–∫–∏—Ö –∑–Ω–∞–Ω–∏–π, –∏ —É—Å—Ç–∞–Ω–æ–≤–∏–ª–∏ –Ω–æ–≤—ã–π —Å—Ç–∞–Ω–¥–∞—Ä—Ç –≤ —Ç—Ä—ë—Ö –∑–∞–¥–∞—á–∞—Ö –≤–æ–ø—Ä–æ—Å–æ–≤ –∏ –æ—Ç–≤–µ—Ç–æ–≤ —Å –æ—Ç–

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [01:39<01:03, 15.86s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Efficient Estimation of Word Representations in Vector Space

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–µ –Ω–æ–≤—ã–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã –º–æ–¥–µ–ª–µ–π –¥–ª—è –≤—ã—á–∏—Å–ª–µ–Ω–∏—è –Ω–µ–ø—Ä–µ—Ä—ã–≤–Ω—ã—Ö –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤ –∏–∑ –æ—á–µ–Ω—å –±–æ–ª—å—à–∏—Ö –Ω–∞–±–æ—Ä–æ–≤ –¥–∞–Ω–Ω—ã—Ö. –ü—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–Ω—ã–µ –º–æ–¥–µ–ª–∏ –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É—é—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ —Ç–æ—á–Ω–æ—Å—Ç–∏ –ø—Ä–∏ –≥–æ—Ä–∞–∑–¥–æ –º–µ–Ω—å—à–∏—Ö –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω—ã—Ö –∑–∞—Ç—Ä–∞—Ç–∞—Ö.

–û–ø–∏—Å–∞–Ω–Ω—ã–µ –º–æ–¥–µ–ª–∏ –æ–±–µ—Å–ø–µ—á–∏–≤–∞—é—Ç —Å–æ–≤—Ä–µ–º–µ–Ω–Ω—ã–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –≤ –∑–∞–¥–∞—á–∞—Ö –∏–∑–º–µ—Ä–µ–Ω–∏—è —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –∏ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö —Å—Ö–æ–¥—Å—Ç–≤ —Å–ª–æ–≤. –û–Ω–∏ –º–æ–≥—É—Ç –±—ã—Ç—å –æ–±—É—á–µ–Ω—ã –Ω–∞ –∫–æ—Ä–ø—É—Å–∞—Ö —Ä–∞–∑–º–µ—Ä–æ–º –≤ —Ç—Ä–∏–ª–ª–∏–æ–Ω —Å–ª–æ–≤ –∏ –∏–º–µ—é—Ç –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª –¥–ª—è –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è –≤ —Ä–∞–∑–ª–∏

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [01:51<00:43, 14.38s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Enhancing Retrieval-Augmented Generation: A Study of Best Practices

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã —É—Å–æ–≤–µ—Ä—à–µ–Ω—Å—Ç–≤–æ–≤–∞–Ω–Ω—ã–µ –¥–∏–∑–∞–π–Ω—ã —Å–∏—Å—Ç–µ–º Retrieval-Augmented Generation (RAG), –∫–æ—Ç–æ—Ä—ã–µ –≤–∫–ª—é—á–∞—é—Ç —Ä–∞—Å—à–∏—Ä–µ–Ω–∏–µ –∑–∞–ø—Ä–æ—Å–æ–≤, –Ω–æ–≤—ã–µ —Å—Ç—Ä–∞—Ç–µ–≥–∏–∏ –ø–æ–∏—Å–∫–∞ –∏ –∫–æ–Ω—Ç—Ä–∞—Å—Ç–Ω–æ–µ –∏–Ω–∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ. –ò—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏ –∏–∑—É—á–∞–µ—Ç –∫–ª—é—á–µ–≤—ã–µ —Ñ–∞–∫—Ç–æ—Ä—ã, –≤–ª–∏—è—é—â–∏–µ –Ω–∞ –∫–∞—á–µ—Å—Ç–≤–æ –æ—Ç–≤–µ—Ç–æ–≤, —Ç–∞–∫–∏–µ –∫–∞–∫ —Ä–∞–∑–º–µ—Ä —è–∑—ã–∫–æ–≤–æ–π –º–æ–¥–µ–ª–∏, –¥–∏–∑–∞–π–Ω –ø–æ–¥—Å–∫–∞–∑–æ–∫, —Ä–∞–∑–º–µ—Ä —Ñ—Ä–∞–≥–º–µ–Ω—Ç–æ–≤ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤, —Ä–∞–∑–º–µ—Ä –±–∞–∑—ã –∑–Ω–∞–Ω–∏–π –∏ –¥—Ä—É–≥–∏–µ. –†–µ–∑—É–ª—å—Ç–∞—Ç—ã –ø—Ä–µ–¥–ª–∞–≥–∞—é—Ç –ø—Ä–∞–∫—Ç–∏—á–µ—Å–∫–∏–µ –∏–¥–µ–∏ –¥–ª—è —Ä–∞–∑—Ä–∞–±–æ—Ç–∫–∏ RAG-—Å–∏—Å—Ç–µ–º, –æ–±–µ—Å–ø–µ—á–∏–≤–∞—è –±–∞–ª–∞–Ω—Å –º–µ–∂–¥—É –∫–æ–Ω—Ç–µ–∫—

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [02:08<00:30, 15.41s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: LADDER: Self-Improving LLMs Through Recursive Problem Decomposition

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Ñ—Ä–µ–π–º–≤–æ—Ä–∫ LADDER –¥–ª—è —É–ª—É—á—à–µ–Ω–∏—è —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–µ–π —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π –∫ —Ä–µ—à–µ–Ω–∏—é –∑–∞–¥–∞—á —á–µ—Ä–µ–∑ —Ä–µ–∫—É—Ä—Å–∏–≤–Ω—É—é –¥–µ–∫–æ–º–ø–æ–∑–∏—Ü–∏—é –∑–∞–¥–∞—á –∏ –æ–±—É—á–µ–Ω–∏–µ —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å –ø—Ä–æ–≤–µ—Ä—è–µ–º—ã–º–∏ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è–º–∏. –¢–∞–∫–∂–µ –æ–ø–∏—Å–∞–Ω–æ –µ–≥–æ —Ä–∞—Å—à–∏—Ä–µ–Ω–∏–µ –¥–ª—è –≤—Ä–µ–º–µ–Ω–∏ —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è (TTRL).

LADDER –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª—è–º –≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞—Ç—å –∏ –æ–±—É—á–∞—Ç—å—Å—è –Ω–∞ –ø–æ—Å—Ç–µ–ø–µ–Ω–Ω–æ —É–ø—Ä–æ—â–∞—é—â–∏—Ö—Å—è –≤–∞—Ä–∏–∞–Ω—Ç–∞—Ö —Å–ª–æ–∂–Ω—ã—Ö –∑–∞–¥–∞—á. –ü–æ–¥—Ö–æ–¥ –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É–µ—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω—ã–µ —É–ª—É—á—à–µ–Ω–∏—è –±–µ–∑ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ—Å—Ç–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω–æ–≥–æ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–Ω–∏—è –∏–ª–∏ —á–µ–ª–æ–≤–µ—á–µ—Å–∫–æ–≥–æ –Ω–∞–¥–

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [02:27<00:16, 16.30s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Scalable-Softmax Is Superior for Attention

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è Scalable-Softmax (SSMax) –∫–∞–∫ –∞–ª—å—Ç–µ—Ä–Ω–∞—Ç–∏–≤–∞ —Ñ—É–Ω–∫—Ü–∏–∏ Softmax –≤ –º–µ—Ö–∞–Ω–∏–∑–º–∞—Ö –≤–Ω–∏–º–∞–Ω–∏—è Transformer. SSMax —Ä–µ—à–∞–µ—Ç –ø—Ä–æ–±–ª–µ–º—É ¬´–∑–∞—Ç—É—Ö–∞–Ω–∏—è¬ª –≤–Ω–∏–º–∞–Ω–∏—è –∏ —É–ª—É—á—à–∞–µ—Ç –æ–±–æ–±—â–µ–Ω–∏–µ –¥–ª—è –¥–ª–∏–Ω–Ω—ã—Ö –∫–æ–Ω—Ç–µ–∫—Å—Ç–æ–≤.

–≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ –º–æ–¥–µ–ª–∏ —Å SSMax –¥–æ—Å—Ç–∏–≥–∞—é—Ç –±–æ–ª–µ–µ –±—ã—Å—Ç—Ä–æ–≥–æ —Å–Ω–∏–∂–µ–Ω–∏—è –ø–æ—Ç–µ—Ä—å –ø—Ä–∏ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ–º –æ–±—É—á–µ–Ω–∏–∏ –∏ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É–ª—É—á—à–∞—é—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –≤ –¥–ª–∏–Ω–Ω—ã—Ö –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞—Ö –∏ –ø—Ä–∏ –∏–∑–≤–ª–µ—á–µ–Ω–∏–∏ –∫–ª—é—á–µ–≤–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏. –ú–æ–¥–µ–ª–∏, –∏—Å–ø–æ–ª—å–∑—É—é—â–∏–µ SSMax —Å –Ω–∞—á–∞–ª–∞ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è, –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É—é—Ç –Ω–∞–∏–±–æ–ª—å—à—É—é —Å–ø–æ—Å–æ

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [02:48<00:00, 16.88s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:  –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è FlashAttention ‚Äî –∞–ª–≥–æ—Ä–∏—Ç–º —Ç–æ—á–Ω–æ–≥–æ –≤–Ω–∏–º–∞–Ω–∏—è, —É—á–∏—Ç—ã–≤–∞—é—â–∏–π –æ–ø–µ—Ä–∞—Ü–∏–∏ –≤–≤–æ–¥–∞-–≤—ã–≤–æ–¥–∞ (IO-aware). –û–Ω –∏—Å–ø–æ–ª—å–∑—É–µ—Ç —Ç–∞–π–ª–∏–Ω–≥ –¥–ª—è —Å–æ–∫—Ä–∞—â–µ–Ω–∏—è –∫–æ–ª–∏—á–µ—Å—Ç–≤–∞ –æ–ø–µ—Ä–∞—Ü–∏–π —á—Ç–µ–Ω–∏—è/–∑–∞–ø–∏—Å–∏ –º–µ–∂–¥—É –ø–∞–º—è—Ç—å—é GPU –∏ —Å–æ–∫—Ä–∞—â–∞–µ—Ç –≤—Ä–µ–º—è —Ä–∞–±–æ—Ç—ã —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–æ–≤ –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–º–∏ –º–µ—Ç–æ–¥–∞–º–∏.

FlashAttention –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç —É—Å–∫–æ—Ä–µ–Ω–∏–µ —Ä–∞–±–æ—Ç—ã —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–æ–≤ –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –±–∞–∑–æ–≤—ã–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏: –Ω–∞ 15% –¥–ª—è BERT-large, –≤ 3 —Ä–∞–∑–∞ –¥–ª—è GPT-2 –∏ –≤ 2,4 —Ä–∞–∑–∞ –¥–ª—è long-range arena. –¢–∞–∫–∂–µ –∞–ª–≥–æ—Ä–∏—Ç–º –ø–æ–∑–≤–æ–ª—è–µ—Ç —É–≤–µ–ª–∏—á–∏—Ç—å –¥–ª–∏–Ω—É –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞ –≤ —Ç—Ä–




In [10]:
times

[14.131155252456665,
 16.678635597229004,
 7.474043846130371,
 9.699246644973755,
 10.073513269424438,
 12.498071432113647,
 8.384318590164185,
 14.539063930511475,
 13.785437822341919,
 11.562705516815186]

In [11]:
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ –º–æ–¥–µ–ª–∏ YandexGPT5: {np.mean(times):.0f} —Å–µ–∫')

–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ –º–æ–¥–µ–ª–∏ YandexGPT5: 12 —Å–µ–∫


In [10]:
df_summarization = pd.read_csv('summarize_examples.csv')

In [14]:
df_summarization['YandexGPT'] = outputs

In [16]:
df_summarization.to_csv('summarize_examples.csv', index=False)

In [23]:
df_summarization

Unnamed: 0,gt,Tbank,YandexGPT
0,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Re...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforc...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforce...
1,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Transformer ‚Äî –ø–µ—Ä–≤–∞—è –∞—Ä—Ö–∏...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –Ω–æ–≤—ã–π –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–π –ø–æ–¥—Ö...,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∞ –Ω–æ–≤–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –Ω–µ–π—Ä–æ–Ω...
2,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è –ø–æ–¥—Ö–æ–¥ –∫ –æ–±—É—á–µ–Ω–∏—é —Ä–µ–∫–æ–º–µ...,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á...,–í —Å—Ç–∞—Ç—å–µ –∏–∑—É—á–∞–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ–≤—Ä–µ–º–µ–Ω–Ω–æ–π –ø–∞–º—è...
3,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –æ–±–∑...,\n\n–ò—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –ø–æ—Å–≤—è—â–µ–Ω–æ –∞–Ω–∞–ª–∏–∑—É –∏—Å–ø–æ–ª—å–∑–æ–≤–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Å–∏—Å—Ç–µ–º–Ω—ã–π –æ–±–∑–æ—Ä –±–æ–ª—å—à–∏—Ö —è...
4,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –æ–±—â–∏–π –ø–æ–¥—Ö–æ–¥ –∫ –¥–æ–æ–±—É—á–µ–Ω–∏—é ...,"–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –º–æ–¥–µ–ª–∏, —Å–æ—á–µ—Ç–∞—é—â–∏–µ –ø...",–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞–µ—Ç—Å—è –ø—Ä–æ–±–ª–µ–º–∞ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω–æ–≥...
5,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –¥–≤–∞ –ø—Ä–æ—Å—Ç—ã—Ö –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–µ –Ω–æ–≤—ã–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã –º–æ–¥–µ...
6,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤...,\n\n–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –∏ –∞–Ω–∞–ª–∏–∑–∏—Ä—É—é—Ç—Å—è ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã —É—Å–æ–≤–µ—Ä—à–µ–Ω—Å—Ç–≤–æ–≤–∞–Ω–Ω—ã–µ –¥–∏–∑–∞...
7,"–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç LADDER ‚Äî —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞...","–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω LADDER ‚Äî –º–µ—Ç–æ–¥, —É–ª—É—á—à–∞—é—â...",–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Ñ—Ä–µ–π–º–≤–æ—Ä–∫ LADDER –¥–ª—è —É–ª—É—á...
8,Scalable-Softmax (SSMax) ‚Äî —ç—Ç–æ –Ω–æ–≤–∞—è –∑–∞–º–µ–Ω–∞ —Ñ—É...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Scalable-Softmax (SSMax)...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è Scalable-Softmax (SSMax)...
9,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç FlashAttention ‚Äî IO-–æ—Ä–∏–µ–Ω—Ç...,\nFlashAttention ‚Äî —ç—Ç–æ –Ω–æ–≤—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ä–µ–∞–ª–∏–∑–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è FlashAttention ‚Äî –∞–ª–≥–æ—Ä–∏—Ç...


## QWEN 3

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
quant_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    #quantization_config=quant_config,
    torch_dtype="auto",
    device_map="cuda"
)

llm = pipeline("text-generation", model=model, tokenizer=tokenizer, return_full_text=False)

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Device set to use cuda


In [6]:
SYSTEM_MSG = (
    "–¢—ã ‚Äî –ø–æ–º–æ—â–Ω–∏–∫ –ø–æ –Ω–∞—É—á–Ω—ã–º —Å—Ç–∞—Ç—å—è–º. "
    "–°—Ñ–æ—Ä–º—É–ª–∏—Ä—É–π –∫—Ä–∞—Ç–∫–æ–µ –∏ —Ç–æ—á–Ω–æ–µ —Ä–µ–∑—é–º–µ –ø–æ —Ç–µ–∫—Å—Ç—É –Ω–∏–∂–µ. –û—Ç–≤–µ—á–∞–π —Å—Ç—Ä–æ–≥–æ –Ω–∞ —Ä—É—Å—Å–∫–æ–º —è–∑—ã–∫–µ"
)

def build_messages(abstract, conclusion) -> list[dict]:
    full_text = f"Abstract:\n{abstract}\n\nConclusion:\n{conclusion}"

    return [
        {"role": "system", "content": SYSTEM_MSG},
        {"role": "user", "content": full_text},
        {"role": "assistant", "content": "–ö—Ä–∞—Ç–∫–æ–µ —Ä–µ–∑—é–º–µ:"},
    ]

In [9]:
outputs = []
times = []
for arxiv_url in tqdm(urls):
    arxiv_id = extract_arxiv_id(arxiv_url)
    
    pdf_path = f'../articles/{arxiv_id}.pdf'
    pdf_path, title = download_pdf(arxiv_id, pdf_path)
    md_raw = pymupdf4llm.to_markdown(pdf_path)
    md_trimmed = trim_markdown_after_section(md_raw)
    md_cleaned = clean_markdown_for_rag(md_trimmed)
    abstract = extract_section(md_cleaned, "abstract", ["absctract", "summary"])
    conclusion = extract_section(md_cleaned, "conclusion", ["conclusions", "closing remarks"])
    
    messages = build_messages(abstract, conclusion)
    messages = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, temperature=0.7, top_p=0.8, top_k=20, min_p=0)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: {title}\n')
    print(f'–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:22<03:25, 22.83s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: TTRL: Test-Time Reinforcement Learning

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: –í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –Ω–æ–≤—ã–π –º–µ—Ç–æ–¥ –æ–±—É—á–µ–Ω–∏—è –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLM) —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º —É—Å–∏–ª–µ–Ω–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è (RL) –Ω–∞ –Ω–µ–æ—Ç–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö, –Ω–∞–∑–≤–∞–Ω–Ω—ã–π Test-Time Reinforcement Learning (TTRL). –û—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–∏ –º–µ—Ç–æ–¥–∞ –≥–æ–ª–æ—Å–æ–≤–∞–Ω–∏—è –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏–π –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ TTRL –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–µ–π –Ω–∞ —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –∑–∞–¥–∞—á–∞—Ö, –≤–∫–ª—é—á–∞—è –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–µ –∑–∞–¥–∞—á–∏, –∏ –ø—Ä–∏–±–ª–∏–∂–∞–µ—Ç –∏—Ö –∫ —É—Ä–æ–≤–Ω—é –º–æ–¥–µ–ª–µ–π, –æ–±—É—á–µ–Ω–Ω—ã—Ö

 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:41<02:43, 20.40s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Attention Is All You Need

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: –í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ Transformer, –æ—Å–Ω–æ–≤–∞–Ω–Ω–∞—è –∏—Å–∫–ª—é—á–∏—Ç–µ–ª—å–Ω–æ –Ω–∞ –º–µ—Ö–∞–Ω–∏–∑–º–∞—Ö –≤–Ω–∏–º–∞–Ω–∏—è, –∑–∞–º–µ–Ω—è—é—â–∞—è —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã–µ —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ –∏ —Å–≤—ë—Ä—Ç–æ—á–Ω—ã–µ —Å–µ—Ç–∏. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –º–∞—à–∏–Ω–Ω–æ–≥–æ –ø–µ—Ä–µ–≤–æ–¥–∞ –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ –º–æ–¥–µ–ª—å –¥–æ—Å—Ç–∏–≥–∞–µ—Ç –±–æ–ª–µ–µ –≤—ã—Å–æ–∫–æ–≥–æ –∫–∞—á–µ—Å—Ç–≤–∞ –ø–µ—Ä–µ–≤–æ–¥–∞, –æ–±–ª–∞–¥–∞–µ—Ç –ª—É—á—à–µ–π –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑—É–µ–º–æ—Å—Ç—å—é –∏ —Ç—Ä–µ–±—É–µ—Ç –º–µ–Ω—å—à–µ –≤—Ä–µ–º–µ–Ω–∏ –Ω–∞ –æ–±—É—á–µ–Ω–∏–µ. Transformer —É—Å—Ç–∞–Ω–æ–≤–∏–ª –Ω–æ–≤—ã–µ —Ä–µ–∫–æ—Ä–¥—ã –ø–æ BLEU –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∞–Ω–≥–ª–∏–π—Å–∫–æ-–Ω–µ–º–µ—Ü–∫–æ–≥–æ –∏ –∞–Ω–≥–ª–∏–π—Å–∫–æ-—Ñ—Ä–∞–Ω—Ü—É–∑—Å–∫–æ–≥–æ –ø–µ—Ä–µ–≤–æ–¥–∞. –ú–æ–¥–µ–ª—å —É—Å–ø–µ—à–Ω–æ –ø—Ä–∏–º–µ–Ω—è–ª–∞—Å—å –∏ –Ω–∞ –¥—Ä—É–≥–∏—Ö –∑–∞–¥–∞—á–∞—Ö, —Ç–∞–∫–∏—Ö –∫–∞–∫ —Ä

 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [01:06<02:36, 22.35s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Recommender systems: when memory matters

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: –í —Å—Ç–∞—Ç—å–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á–Ω–æ–π –ø–∞–º—è—Ç–∏ –Ω–∞ –æ–±—É—á–∞–µ–º–æ—Å—Ç—å –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ–≥–æ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ç–µ–ª—å–Ω–æ–≥–æ —Å–∏—Å—Ç–µ–º—ã —Å —É—á–µ—Ç–æ–º –Ω–µ—è–≤–Ω–æ–π –æ–±—Ä–∞—Ç–Ω–æ–π —Å–≤—è–∑–∏ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π. –ü—Ä–µ–¥–ª–æ–∂–µ–Ω–∞ –æ–Ω–ª–∞–π–Ω-–∞–ª–≥–æ—Ä–∏—Ç–º, –≥–¥–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –º–æ–¥–µ–ª–∏ –æ–±–Ω–æ–≤–ª—è—é—Ç—Å—è –ø–æ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è–º –Ω–∞ –±–ª–æ–∫–∞—Ö —ç–ª–µ–º–µ–Ω—Ç–æ–≤, —Å–æ—Å—Ç–æ—è—â–∏—Ö –∏–∑ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –Ω–µ–ø—Ä–æ—Å–º–æ—Ç—Ä–µ–Ω–Ω—ã—Ö —ç–ª–µ–º–µ–Ω—Ç–æ–≤, –∑–∞ –∫–æ—Ç–æ—Ä—ã–º–∏ —Å–ª–µ–¥—É–µ—Ç –ø—Ä–æ—Å–º–æ—Ç—Ä–µ–Ω–Ω—ã–π. –≠–º–ø–∏—Ä–∏—á–µ—Å–∫–∏–µ –æ—Ü–µ–Ω–∫–∏ –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ —Ñ–∏–ª—å—Ç—Ä–∞—Ü–∏—è –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –ø–æ —Å—Ç–µ–ø–µ–Ω–∏ –¥–æ–ª–≥–æ—Å—Ä–æ—á–Ω–æ–π –ø–∞–º—è—Ç–∏ –≤ –∏—Ö –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏–∏ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É–ª—É—á—à–∞–µ—Ç –ø

 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [01:25<02:05, 21.00s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Large Language Models Meet NLP: A Survey

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: –°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç —Å–æ–±–æ–π –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –æ–±–∑–æ—Ä –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLM) –≤ –æ–±–ª–∞—Å—Ç–∏ –æ–±—Ä–∞–±–æ—Ç–∫–∏ –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ–≥–æ —è–∑—ã–∫–∞ (NLP). –í —Ä–∞–±–æ—Ç–µ –≤–≤–æ–¥–∏—Ç—Å—è —É–Ω–∏—Ñ–∏—Ü–∏—Ä–æ–≤–∞–Ω–Ω–∞—è –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏—è, –≤–∫–ª—é—á–∞—é—â–∞—è –ø–∞—Ä–∞–º–µ—Ç—Ä—ã "–∑–∞–º–æ—Ä–æ–∂–µ–Ω–Ω—ã—Ö" –∏ "–Ω–∞—Å—Ç—Ä–æ–µ–Ω–Ω—ã—Ö" –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–π, –∞ —Ç–∞–∫–∂–µ –≤—ã—è–≤–ª—è—é—Ç—Å—è –Ω–æ–≤—ã–µ –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–∏—è –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–π –∏ –≤—ã–∑–æ–≤—ã. –¶–µ–ª—å—é —è–≤–ª—è–µ—Ç—Å—è –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–ª–µ–Ω–∏–µ —Ü–µ–Ω–Ω—ã—Ö –∏–Ω—Å–∞–π—Ç–æ–≤ –∏ –ø—Ä–∞–∫—Ç–∏—á–µ—Å–∫–∏—Ö —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–π –¥–ª—è —Ä–∞–∑—Ä–∞–±–æ—Ç–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö LLM –≤ NLP, –∞ —Ç–∞–∫–∂–µ –ø–æ–¥–¥–µ—Ä–∂–∫–∞ –¥–∞–ª—å–Ω–µ–π—à–∏—Ö –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–π —á–µ—Ä–µ–∑ –ø—É–±–ª–∏—á–Ω—ã–π —Ä–µ—Å—É—

 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [01:45<01:44, 20.80s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: –ê–≤—Ç–æ—Ä—ã –∏—Å—Å–ª–µ–¥—É—é—Ç –º–æ–¥–µ–ª—å RAG (Retrieval-Augmented Generation), –∫–æ—Ç–æ—Ä–∞—è —Å–æ—á–µ—Ç–∞–µ—Ç –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫—É—é –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–Ω—É—é seq2seq-–º–æ–¥–µ–ª—å —Å –Ω–µ –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫–æ–π –ø–∞–º—è—Ç—å—é –≤ –≤–∏–¥–µ –ø–ª–æ—Ç–Ω–æ–≥–æ –∏–Ω–¥–µ–∫—Å–∞ –í–∏–∫–∏–ø–µ–¥–∏–∏. –û–Ω–∏ —Å—Ä–∞–≤–Ω–∏–≤–∞—é—Ç –¥–≤–∞ –ø–æ–¥—Ö–æ–¥–∞ –∫ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—é —ç—Ç–æ–π –ø–∞–º—è—Ç–∏ –∏ –ø–æ–∫–∞–∑—ã–≤–∞—é—Ç, —á—Ç–æ RAG-–º–æ–¥–µ–ª–∏ –ø—Ä–µ–≤–æ—Å—Ö–æ–¥—è—Ç –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫–∏–µ seq2seq-–º–æ–¥–µ–ª–∏ –∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã retrieve-and-extract –Ω–∞ –∑–∞–¥–∞—á–∞—Ö, —Ç—Ä–µ–±—É—é—â–∏—Ö –∑–Ω–∞–Ω–∏–π, –≤–∫–ª—é—á–∞—è –æ—Ç–∫—Ä—ã—Ç—ã–µ QA-–∑–∞–¥–∞—á–∏. –¢–∞–∫–∂–µ RAG-–º–æ–¥–µ–ª–∏ –≥–µ–Ω–µ—Ä–∏—Ä—É—é—Ç –±–æ–ª–µ–µ —Ç–æ—á–Ω—É—é, —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–Ω—É—é –∏ —Ñ–∞–∫—Ç—É—Ä–Ω—É—é –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é.




 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [02:12<01:31, 22.93s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Efficient Estimation of Word Representations in Vector Space

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: –í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞—é—Ç—Å—è –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö –ø–æ–¥—Ö–æ–¥–∞ –¥–ª—è –≤—ã—á–∏—Å–ª–µ–Ω–∏—è –Ω–µ–ø—Ä–µ—Ä—ã–≤–Ω—ã—Ö –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤ –Ω–∞ –æ—Å–Ω–æ–≤–µ –æ—á–µ–Ω—å –±–æ–ª—å—à–∏—Ö –¥–∞–Ω–Ω—ã—Ö. –ü—Ä–µ–¥–ª–æ–∂–µ–Ω–Ω—ã–µ –º–µ—Ç–æ–¥—ã –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É—é—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ —Ç–æ—á–Ω–æ—Å—Ç–∏ –≤ –∑–∞–¥–∞—á–∞—Ö –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è —Å—Ö–æ–∂–µ—Å—Ç–∏ —Å–ª–æ–≤ –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–º–∏ –º–µ—Ç–æ–¥–∞–º–∏, –ø—Ä–∏ —ç—Ç–æ–º —Ç—Ä–µ–±—É—è –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –º–µ–Ω—å—à–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω—ã—Ö —Ä–µ—Å—É—Ä—Å–æ–≤. –í—ã—Å–æ–∫–æ–∫–∞—á–µ—Å—Ç–≤–µ–Ω–Ω—ã–µ –≤–µ–∫—Ç–æ—Ä—ã –º–æ–∂–Ω–æ –æ–±—É—á–∏—Ç—å –∑–∞ –º–µ–Ω–µ–µ —á–µ–º –¥–µ–Ω—å –Ω–∞ –¥–∞–Ω–Ω—ã—Ö –æ–±—ä–µ–º–æ–º 1,6 –º–∏–ª–ª–∏–∞—Ä–¥–∞ —Å–ª–æ–≤. –≠—Ç–∏ –≤–µ–∫—Ç–æ—Ä—ã –¥–æ—Å—Ç–∏–≥–∞—é—Ç —Å–æ—Å—Ç–æ—è–Ω–∏—è –∏—

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [02:30<01:03, 21.15s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Enhancing Retrieval-Augmented Generation: A Study of Best Practices

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: –í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –ø—Ä–æ–¥–≤–∏–Ω—É—Ç—ã–µ –¥–∏–∑–∞–π–Ω-—Ä–µ—à–µ–Ω–∏—è RAG-—Å–∏—Å—Ç–µ–º, –≤–∫–ª—é—á–∞—é—â–∏–µ —Ä–∞—Å—à–∏—Ä–µ–Ω–∏–µ –∑–∞–ø—Ä–æ—Å–∞, –Ω–æ–≤—ã–µ —Å—Ç—Ä–∞—Ç–µ–≥–∏–∏ –ø–æ–∏—Å–∫–∞ –∏ Contrastive In-Context Learning. –ü—Ä–æ–≤–µ–¥–µ–Ω–æ —Å–∏—Å—Ç–µ–º–Ω–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –∫–ª—é—á–µ–≤—ã—Ö —Ñ–∞–∫—Ç–æ—Ä–æ–≤, –≤–ª–∏—è—é—â–∏—Ö –Ω–∞ –∫–∞—á–µ—Å—Ç–≤–æ –æ—Ç–≤–µ—Ç–æ–≤, —Ç–∞–∫–∏—Ö –∫–∞–∫ —Ä–∞–∑–º–µ—Ä –º–æ–¥–µ–ª–∏, –¥–∏–∑–∞–π–Ω –ø—Ä–æ–º–ø—Ç–æ–≤, —Ä–∞–∑–º–µ—Ä –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤, —Ä–∞–∑–º–µ—Ä –±–∞–∑—ã –∑–Ω–∞–Ω–∏–π –∏ –¥—Ä—É–≥–∏–µ. –†–µ–∑—É–ª—å—Ç–∞—Ç—ã –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏—è –¥–∞—é—Ç –ø—Ä–∞–∫—Ç–∏—á–µ—Å–∫–∏–µ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏ –¥–ª—è –æ–ø—Ç–∏–º–∏–∑–∞—Ü–∏–∏ RAG-—Å–∏—Å—Ç–µ–º, –æ–±–µ—Å–ø–µ—á–∏–≤–∞—è –±–∞–ª–∞–Ω—Å –º–µ–∂–¥—É –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ç–∏–≤–Ω–æ—Å—Ç—å—é –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å—é –ø–æ–∏—Å–∫

 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [02:52<00:43, 21.62s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: LADDER: Self-Improving LLMs Through Recursive Problem Decomposition

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: LADDER ‚Äî —ç—Ç–æ —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞—é—â–∏–π —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏ —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π —Ä–µ—à–∞—Ç—å –∑–∞–¥–∞—á–∏ –∑–∞ —Å—á—ë—Ç —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–≥–æ —Ä–∞–∑–ª–æ–∂–µ–Ω–∏—è –ø—Ä–æ–±–ª–µ–º –∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å –ø—Ä–æ–≤–µ—Ä—è–µ–º—ã–º–∏ –Ω–∞–≥—Ä–∞–¥–∞–º–∏. –ï–≥–æ —Ä–∞—Å—à–∏—Ä–µ–Ω–∏–µ TTRL –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ —É–ª—É—á—à–∞—Ç—å —Å–≤–æ–∏ –Ω–∞–≤—ã–∫–∏ –≤ —Ä–µ–∂–∏–º–µ —Ä–∞–±–æ—Ç—ã. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑–∞–ª–∏ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ –ø–æ–≤—ã—à–µ–Ω–∏–µ —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–µ–π –∫ –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–º—É –º—ã—à–ª–µ–Ω–∏—é, —á—Ç–æ –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É–µ—Ç —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å –ø–æ–¥—Ö–æ–¥–∞. –†–∞–±–æ—Ç–∞ –æ—Å–Ω–æ–≤–∞–Ω–∞ –Ω–∞ —Ç—Ä—ë—Ö –∫–ª—é—á–µ–≤—ã—Ö –∏–¥–µ—è—Ö: —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–º —Ä–∞–∑–ª–æ–∂–µ–Ω–∏–∏ –∑–∞–¥–∞—á, –≤–∞–∂–Ω–æ—Å—Ç–∏ –ø—Ä–æ–≤–µ—Ä—è–µ–º—ã

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [03:17<00:22, 22.61s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Scalable-Softmax Is Superior for Attention

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: –í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –Ω–æ–≤—ã–π –ø–æ–¥—Ö–æ–¥ ‚Äî Scalable-Softmax (SSMax), –∫–æ—Ç–æ—Ä—ã–π –∑–∞–º–µ–Ω—è–µ—Ç –∫–ª–∞—Å—Å–∏—á–µ—Å–∫—É—é —Ñ—É–Ω–∫—Ü–∏—é Softmax –≤ —Å–ª–æ—è—Ö –≤–Ω–∏–º–∞–Ω–∏—è —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–æ–≤. SSMax —Ä–µ—à–∞–µ—Ç –ø—Ä–æ–±–ª–µ–º—É —É—Ö—É–¥—à–µ–Ω–∏—è —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è –≤–Ω–∏–º–∞–Ω–∏—è –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ –¥–ª–∏–Ω—ã –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞, –ø–æ–∑–≤–æ–ª—è—è –º–æ–¥–µ–ª—è–º —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ —Ñ–æ–∫—É—Å–∏—Ä–æ–≤–∞—Ç—å—Å—è –Ω–∞ –∫–ª—é—á–µ–≤–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ –º–æ–¥–µ–ª–∏ —Å SSMax –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É—é—Ç —É–ª—É—á—à–µ–Ω–Ω—É—é —Å–∫–æ—Ä–æ—Å—Ç—å –æ–±—É—á–µ–Ω–∏—è, –ª—É—á—à—É—é –≥–µ–Ω–µ—Ä–∞–ª–∏–∑–∞—Ü–∏—é –Ω–∞ –¥–ª–∏–Ω–Ω—ã–µ –∫–æ–Ω—Ç–µ–∫—Å—Ç—ã –∏ –±–æ–ª–µ–µ –≤—ã—Å–æ–∫—É—é —Ç–æ—á–Ω–æ—Å—Ç—å –≤ –∑–∞–¥–∞—á–∞—Ö –∏–∑–≤–ª–µ—á–µ–Ω–∏—è –∫–ª—é—á–µ–≤–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏. SSMax –º–æ–∂–µ—Ç

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [03:42<00:00, 22.22s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: FlashAttention ‚Äî —ç—Ç–æ –Ω–æ–≤—ã–π –∞–ª–≥–æ—Ä–∏—Ç–º —Ç–æ—á–Ω–æ–≥–æ –≤–Ω–∏–º–∞–Ω–∏—è, —É—á–∏—Ç—ã–≤–∞—é—â–∏–π –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –ø–∞–º—è—Ç–∏ (IO-–æ–ø—Ç–∏–º–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–π), –∫–æ—Ç–æ—Ä—ã–π –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É—Å–∫–æ—Ä—è–µ—Ç –æ–±—Ä–∞–±–æ—Ç–∫—É –¥–ª–∏–Ω–Ω—ã—Ö –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–µ–π –≤ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–∞—Ö. –û–Ω –∏—Å–ø–æ–ª—å–∑—É–µ—Ç —Ç–∞–π–ª–∏–Ω–≥ –¥–ª—è –º–∏–Ω–∏–º–∏–∑–∞—Ü–∏–∏ –æ–ø–µ—Ä–∞—Ü–∏–π —á—Ç–µ–Ω–∏—è/–∑–∞–ø–∏—Å–∏ –º–µ–∂–¥—É –≤—ã—Å–æ–∫–æ–ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ–π –ø–∞–º—è—Ç—å—é HBM –∏ SRAM. FlashAttention –ø–æ–∑–≤–æ–ª—è–µ—Ç —É—Å–∫–æ—Ä–∏—Ç—å –æ–±—É—á–µ–Ω–∏–µ –º–æ–¥–µ–ª–µ–π, —Ç–∞–∫–∏—Ö –∫–∞–∫ BERT-large, GPT-2 –∏ Long Range Arena, –∏ —É–ª—É—á—à–∞–µ—Ç –∫–∞—á–µ—Å—Ç–≤–æ –º–æ–¥–µ–ª–µ–π, –ø–æ–∑–≤–æ–ª—è—è –æ–±—Ä–∞–±–∞—Ç—ã–≤–∞—Ç—å –±–æ–ª–µ–µ –¥–ª–∏–Ω–Ω—ã–µ –∫–æ–Ω—Ç–µ–∫—Å—Ç—ã. –†–∞—Å—à–∏—Ä–µ–Ω–∏–µ FlashAttention ‚Äî –±




In [6]:
outputs = []
times = []
for arxiv_url in tqdm(urls):
    arxiv_id = extract_arxiv_id(arxiv_url)
    
    pdf_path = f'./articles/{arxiv_id}.pdf'
    pdf_path, title = download_pdf(arxiv_id, pdf_path)
    md_raw = pymupdf4llm.to_markdown(pdf_path)
    md_trimmed = trim_markdown_after_section(md_raw)
    md_cleaned = clean_markdown_for_rag(md_trimmed)
    abstract = extract_section(md_cleaned, "abstract", ["absctract", "summary"])
    conclusion = extract_section(md_cleaned, "conclusion", ["conclusions", "closing remarks"])
    
    messages = build_messages(abstract, conclusion)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, temperature=None, top_p=None, top_k=None)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: {title}\n')
    print(f'–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:24<03:39, 24.43s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: TTRL: Test-Time Reinforcement Learning

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:   
–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforcement Learning (TTRL), –ø–æ–∑–≤–æ–ª—è—é—â–∏–π –æ–±—É—á–∞—Ç—å –±–æ–ª—å—à–∏–µ —è–∑—ã–∫–æ–≤—ã–µ –º–æ–¥–µ–ª–∏ —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º —É—Å–∏–ª–∏—Ç–µ–ª—å–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è –Ω–∞ –Ω–µ–æ—Ç–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö. –û—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–∏ —Ñ—É–Ω–∫—Ü–∏–∏ –Ω–∞–≥—Ä–∞–¥—ã –Ω–∞ –æ—Å–Ω–æ–≤–µ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤, –æ—Å–Ω–æ–≤–∞–Ω–Ω–æ–π –Ω–∞ —Å–æ–≥–ª–∞—Å–∏–∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –º–æ–¥–µ–ª–∏. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ TTRL –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –Ω–∞ —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –∑–∞–¥–∞—á–∞—Ö, –≤–∫–ª—é—á–∞—è –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–µ –∑–∞–¥–∞—á–∏, –∏ –ø—Ä–∏–±–ª–∏–∂–∞–µ—Ç —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –∫ —É—Ä–æ–≤–Ω—è–º –º–æ–¥–µ–ª–µ–π, –æ–±—É—á–µ–Ω–Ω—ã—Ö –Ω–∞ –¥–∞–Ω–Ω—ã—Ö

 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:47<03:08, 23.60s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Attention Is All You Need

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:   
–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—É Transformer ‚Äî –º–æ–¥–µ–ª—å, –æ—Å–Ω–æ–≤–∞–Ω–Ω—É—é –ø–æ–ª–Ω–æ—Å—Ç—å—é –Ω–∞ –º–µ—Ö–∞–Ω–∏–∑–º–∞—Ö –≤–Ω–∏–º–∞–Ω–∏—è, –∫–æ—Ç–æ—Ä–∞—è –∑–∞–º–µ–Ω—è–µ—Ç —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã–µ —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ –∏ —Å–≤—ë—Ä—Ç–æ—á–Ω—ã–µ —Å–µ—Ç–∏. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ Transformer –ø—Ä–µ–≤–æ—Å—Ö–æ–¥–∏—Ç —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–µ –º–æ–¥–µ–ª–∏ –ø–æ –∫–∞—á–µ—Å—Ç–≤—É –ø–µ—Ä–µ–≤–æ–¥–∞, –æ–±–ª–∞–¥–∞–µ—Ç –ª—É—á—à–µ–π –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑—É–µ–º–æ—Å—Ç—å—é –∏ —Ç—Ä–µ–±—É–µ—Ç –º–µ–Ω—å—à–µ –≤—Ä–µ–º–µ–Ω–∏ –Ω–∞ –æ–±—É—á–µ–Ω–∏–µ. –ù–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ —Å –∞–Ω–≥–ª–∏–π—Å–∫–æ–≥–æ –Ω–∞ –Ω–µ–º–µ—Ü–∫–∏–π –∏ —Ñ—Ä–∞–Ω—Ü—É–∑–∫–∏–π –º–æ–¥–µ–ª—å –¥–æ—Å—Ç–∏–≥–ª–∞ –Ω–æ–≤—ã—Ö —Ä–µ–∫–æ—Ä–¥–æ–≤ –ø–æ –º–µ—Ç—Ä–∏–∫–µ BLEU. –¢–∞–∫–∂–µ Transformer —É—Å–ø–µ—à–Ω–æ –ø—Ä–∏–º–µ–Ω—è–ª—Å—è –¥–ª—è –¥—Ä—É–≥–∏—Ö –∑–∞–¥–∞—á, —Ç–∞–∫–∏—Ö –∫–∞–∫ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—

 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [01:10<02:43, 23.42s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Recommender systems: when memory matters

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:   
–í —Å—Ç–∞—Ç—å–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á–Ω–æ–π –ø–∞–º—è—Ç–∏ –Ω–∞ –æ–±—É—á–∞–µ–º–æ—Å—Ç—å –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ–≥–æ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ç–µ–ª—å–Ω–æ–≥–æ —Å–∏—Å—Ç–µ–º—ã —Å —É—á–µ—Ç–æ–º –Ω–µ—è–≤–Ω–æ–π –æ–±—Ä–∞—Ç–Ω–æ–π —Å–≤—è–∑–∏ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π. –ü—Ä–µ–¥–ª–æ–∂–µ–Ω –æ–Ω–ª–∞–π–Ω-–∞–ª–≥–æ—Ä–∏—Ç–º, –æ–±–Ω–æ–≤–ª—è—é—â–∏–π –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –º–æ–¥–µ–ª–∏ –ø–æ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è–º –Ω–∞ –±–ª–æ–∫–∞—Ö —ç–ª–µ–º–µ–Ω—Ç–æ–≤, —Å–æ—Å—Ç–æ—è—â–∏—Ö –∏–∑ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –Ω–µ–ø—Ä–æ—Å–º–æ—Ç—Ä–µ–Ω–Ω—ã—Ö —ç–ª–µ–º–µ–Ω—Ç–æ–≤, –∑–∞ –∫–æ—Ç–æ—Ä—ã–º–∏ —Å–ª–µ–¥—É–µ—Ç –ø—Ä–æ—Å–º–æ—Ç—Ä–µ–Ω–Ω—ã–π. –≠–º–ø–∏—Ä–∏—á–µ—Å–∫–∏–µ –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏—è –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ —Ñ–∏–ª—å—Ç—Ä–∞—Ü–∏—è –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –ø–æ —Å—Ç–µ–ø–µ–Ω–∏ –¥–æ–ª–≥–æ—Å—Ä–æ—á–Ω–æ–π –ø–∞–º—è—Ç–∏ –≤ –∏—Ö –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏–∏ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É–ª—É—á—à–∞–

 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [01:29<02:08, 21.49s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Large Language Models Meet NLP: A Survey

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:   
–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç —Å–æ–±–æ–π –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –æ–±–∑–æ—Ä –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLM) –≤ –æ–±–ª–∞—Å—Ç–∏ –æ–±—Ä–∞–±–æ—Ç–∫–∏ –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ–≥–æ —è–∑—ã–∫–∞ (NLP). –í —Ä–∞–±–æ—Ç–µ –≤–≤–æ–¥–∏—Ç—Å—è —É–Ω–∏—Ñ–∏—Ü–∏—Ä–æ–≤–∞–Ω–Ω–∞—è –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏—è, –≤–∫–ª—é—á–∞—é—â–∞—è –ø–∞—Ä–∞–º–µ—Ç—Ä—ã "–∑–∞–º–æ—Ä–æ–∂–µ–Ω–Ω—ã—Ö" –∏ "–Ω–∞—Å—Ç—Ä–æ–µ–Ω–Ω—ã—Ö" –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–π, –∞ —Ç–∞–∫–∂–µ –≤—ã—è–≤–ª—è—é—Ç—Å—è –Ω–æ–≤—ã–µ –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–∏—è –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–π –∏ –≤—ã–∑–æ–≤—ã. –¶–µ–ª—å ‚Äî –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–∏—Ç—å —Ü–µ–Ω–Ω—ã–µ –∏–Ω—Å–∞–π—Ç—ã –∏ –ø—Ä–∞–∫—Ç–∏—á–µ—Å–∫–∏–µ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏ –¥–ª—è –¥–∞–ª—å–Ω–µ–π—à–µ–≥–æ —Ä–∞–∑–≤–∏—Ç–∏—è LLM –≤ NLP, –∞ —Ç–∞–∫–∂–µ –ø–æ–¥–¥–µ—Ä–∂–∏–≤–∞—Ç—å –ø—É–±–ª–∏—á–Ω—ã–π —Ä–µ—Å—É—Ä—Å –¥–ª—è –æ—Ç—Å–ª–µ–∂–∏–≤–∞–Ω–∏—è –ø–æ—Å–ª–µ–¥–Ω–∏—Ö –¥–æ—Å—Ç–∏–∂–µ–Ω–∏

 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [01:51<01:49, 21.84s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:   
–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç –æ–±–æ–±—â–µ–Ω–Ω—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ç–æ–Ω–∫–æ–π –Ω–∞—Å—Ç—Ä–æ–π–∫–µ –º–æ–¥–µ–ª–µ–π Retrieval-Augmented Generation (RAG), —Å–æ—á–µ—Ç–∞—é—â–∏—Ö –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫—É—é –∏ –Ω–µ–ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫—É—é –ø–∞–º—è—Ç—å –¥–ª—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ —Ç–µ–∫—Å—Ç–∞. –í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–∞–≥–∞—é—Ç—Å—è –¥–≤–µ —Ñ–æ—Ä–º—É–ª–∏—Ä–æ–≤–∫–∏ RAG: –æ–¥–Ω–∞ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –æ–¥–∏–Ω–∞–∫–æ–≤—ã–µ –∏–∑–≤–ª–µ—á–µ–Ω–Ω—ã–µ –¥–æ–∫—É–º–µ–Ω—Ç—ã –Ω–∞ –ø—Ä–æ—Ç—è–∂–µ–Ω–∏–∏ –≤—Å–µ–π –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏, –∞ –¥—Ä—É–≥–∞—è ‚Äî —Ä–∞–∑–Ω—ã–µ –¥–æ–∫—É–º–µ–Ω—Ç—ã –¥–ª—è –∫–∞–∂–¥–æ–≥–æ —Ç–æ–∫–µ–Ω–∞. –ú–æ–¥–µ–ª–∏ –æ—Ü–µ–Ω–∏–≤–∞—é—Ç—Å—è –Ω–∞ –∑–∞–¥–∞—á–∞—Ö, —Ç—Ä–µ–±—É—é—â–∏—Ö –∑–Ω–∞–Ω–∏–π, –∏ –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É—é—Ç –ª—É—á—à–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø–∞—Ä–∞–º–µ—Ç—Ä–∏—á–µ—Å–∫–∏–º–∏ seq2seq-–º–æ–¥–µ–ª—è–º–∏ –∏ –∞—Ä—Ö–∏—Ç–µ–∫—

 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [02:18<01:34, 23.69s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Efficient Estimation of Word Representations in Vector Space

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:   
–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –º–æ–¥–µ–ª–µ–π –¥–ª—è –≤—ã—á–∏—Å–ª–µ–Ω–∏—è –Ω–µ–ø—Ä–µ—Ä—ã–≤–Ω—ã—Ö –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤ –Ω–∞ –æ—Å–Ω–æ–≤–µ –±–æ–ª—å—à–∏—Ö –¥–∞–Ω–Ω—ã—Ö. –≠—Ç–∏ –º–æ–¥–µ–ª–∏ –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É—é—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ —Ç–æ—á–Ω–æ—Å—Ç–∏ –≤ –∑–∞–¥–∞—á–∞—Ö —Å—Ö–æ–¥—Å—Ç–≤–∞ —Å–ª–æ–≤ –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–º–∏ –º–µ—Ç–æ–¥–∞–º–∏, –ø—Ä–∏ —ç—Ç–æ–º —Ç—Ä–µ–±—É—è –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –º–µ–Ω—å—à–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω—ã—Ö —Ä–µ—Å—É—Ä—Å–æ–≤. –í–µ–∫—Ç–æ—Ä—ã –ø–æ–∫–∞–∑—ã–≤–∞—é—Ç —Å–æ—Å—Ç–æ—è–Ω–∏–µ-–æ—Ñ-–∞—Ä—Ç —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –Ω–∞–±–æ—Ä–∞—Ö –¥–ª—è —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –∏ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∑–∞–¥–∞—á. –ò—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ —ç—Ç–∏—Ö –º–æ–¥–µ–ª–µ–π –ø–æ–∑–≤–æ–ª—è–µ—Ç —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ –æ

 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [02:35<01:03, 21.33s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Enhancing Retrieval-Augmented Generation: A Study of Best Practices

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:   
–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –ø—Ä–æ–¥–≤–∏–Ω—É—Ç—ã–µ –¥–∏–∑–∞–π–Ω-—Ä–µ—à–µ–Ω–∏—è RAG-—Å–∏—Å—Ç–µ–º, –≤–∫–ª—é—á–∞—é—â–∏–µ —Ä–∞—Å—à–∏—Ä–µ–Ω–∏–µ –∑–∞–ø—Ä–æ—Å–∞, –Ω–æ–≤—ã–µ —Å—Ç—Ä–∞—Ç–µ–≥–∏–∏ –ø–æ–∏—Å–∫–∞ –∏ Contrastive In-Context Learning. –ü—Ä–æ–≤–µ–¥–µ–Ω–æ —Å–∏—Å—Ç–µ–º–Ω–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –∫–ª—é—á–µ–≤—ã—Ö —Ñ–∞–∫—Ç–æ—Ä–æ–≤, –≤–ª–∏—è—é—â–∏—Ö –Ω–∞ –∫–∞—á–µ—Å—Ç–≤–æ –æ—Ç–≤–µ—Ç–æ–≤, —Ç–∞–∫–∏—Ö –∫–∞–∫ —Ä–∞–∑–º–µ—Ä –º–æ–¥–µ–ª–∏, –¥–∏–∑–∞–π–Ω –ø—Ä–æ–º–ø—Ç–æ–≤, —Ä–∞–∑–º–µ—Ä –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤, —Ä–∞–∑–º–µ—Ä –±–∞–∑—ã –∑–Ω–∞–Ω–∏–π –∏ –¥—Ä—É–≥–∏–µ. –†–µ–∑—É–ª—å—Ç–∞—Ç—ã –¥–∞—é—Ç –ø—Ä–∞–∫—Ç–∏—á–µ—Å–∫–∏–µ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏ –ø–æ –æ–ø—Ç–∏–º–∏–∑–∞—Ü–∏–∏ RAG-—Å–∏—Å—Ç–µ–º, –æ–±–µ—Å–ø–µ—á–∏–≤–∞—è –±–∞–ª–∞–Ω—Å –º–µ–∂–¥—É –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ç–∏–≤–Ω–æ—Å—Ç—å—é –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å—é –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏. –†–µ–∞–ª–∏–∑–∞—Ü–∏—è –∏ –∫–

 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [02:55<00:41, 20.85s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: LADDER: Self-Improving LLMs Through Recursive Problem Decomposition

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:   
LADDER ‚Äî —ç—Ç–æ —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞—é—â–∏–π —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏ —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π –∫ —Ä–µ—à–µ–Ω–∏—é –∑–∞–¥–∞—á —á–µ—Ä–µ–∑ —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–µ —Ä–∞–∑–ª–æ–∂–µ–Ω–∏–µ –ø—Ä–æ–±–ª–µ–º –∏ –æ–±—É—á–µ–Ω–∏–µ —Å –ø–æ–¥—Ç–≤–µ—Ä–∂–¥–µ–Ω–Ω—ã–º–∏ –Ω–∞–≥—Ä–∞–¥–∞–º–∏. –ï–≥–æ —Ä–∞—Å—à–∏—Ä–µ–Ω–∏–µ TTRL –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ —É–ª—É—á—à–∞—Ç—å —Å–≤–æ–∏ –Ω–∞–≤—ã–∫–∏ –≤ —Ä–µ–∂–∏–º–µ —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑–∞–ª–∏ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ –ø–æ–≤—ã—à–µ–Ω–∏–µ —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–µ–π –∫ –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–º—É –º—ã—à–ª–µ–Ω–∏—é, —á—Ç–æ –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É–µ—Ç —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å –ø–æ–¥—Ö–æ–¥–∞. –†–∞–±–æ—Ç–∞ –ø–æ–¥—á–µ—Ä–∫–∏–≤–∞–µ—Ç –≤–∞–∂–Ω–æ—Å—Ç—å —Å—Ç—Ä–∞—Ç–µ–≥–∏—á–µ—Å–∫–æ–≥–æ –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏—è –º–æ–¥–µ–ª–µ–π —Å –æ–∫—Ä—É–∂–µ–Ω–∏–µ–º, –∞ –Ω–µ —Ç–æ–ª—å–∫–æ –

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [03:27<00:24, 24.34s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: Scalable-Softmax Is Superior for Attention

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:   
–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –Ω–æ–≤—ã–π –º–µ—Ç–æ–¥ Scalable-Softmax (SSMax), –∫–æ—Ç–æ—Ä—ã–π —É–ª—É—á—à–∞–µ—Ç —Ä–∞–±–æ—Ç—É –≤–Ω–∏–º–∞–Ω–∏—è –≤ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–Ω—ã—Ö –º–æ–¥–µ–ª—è—Ö, –æ—Å–æ–±–µ–Ω–Ω–æ –ø—Ä–∏ —Ä–∞–±–æ—Ç–µ —Å –¥–ª–∏–Ω–Ω—ã–º–∏ –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞–º–∏. SSMax —Ä–µ—à–∞–µ—Ç –ø—Ä–æ–±–ª–µ–º—É —É–ø–ª–æ—â–µ–Ω–∏—è —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è –≤–Ω–∏–º–∞–Ω–∏—è, —Ö–∞—Ä–∞–∫—Ç–µ—Ä–Ω—É—é –¥–ª—è —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω–æ–≥–æ Softmax, –∏ –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª—è–º –ª—É—á—à–µ —Ñ–æ–∫—É—Å–∏—Ä–æ–≤–∞—Ç—å—Å—è –Ω–∞ –∫–ª—é—á–µ–≤–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏. –≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã –ø–æ–∫–∞–∑–∞–ª–∏, —á—Ç–æ –º–æ–¥–µ–ª–∏ —Å SSMax –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É—é—Ç –±–æ–ª–µ–µ –±—ã—Å—Ç—Ä—É—é —Å—Ö–æ–¥–∏–º–æ—Å—Ç—å, —É–ª—É—á—à–µ–Ω–Ω—É—é –æ–±–æ–±—â–∞–µ–º–æ—Å—Ç—å –Ω–∞ –¥–ª–∏–Ω–Ω—ã–µ –∫–æ–Ω—Ç–µ–∫—Å—Ç—ã –∏ –ª—É—á—à—É—é —Ç–æ—á–Ω–æ—Å—Ç—å –≤ –∑–∞–¥–∞—á–∞—Ö –∏–∑–≤–ª–µ—á–µ–Ω–∏—è –∫–ª—é—á–µ–≤–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [03:47<00:00, 22.77s/it]

–ù–∞–∑–≤–∞–Ω–∏–µ —Å—Ç–∞—Ç—å–∏: FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness

–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è:   
FlashAttention ‚Äî —ç—Ç–æ –∞–ª–≥–æ—Ä–∏—Ç–º —Ç–æ—á–Ω–æ–≥–æ –≤–Ω–∏–º–∞–Ω–∏—è, —É—á–∏—Ç—ã–≤–∞—é—â–∏–π –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –ø–∞–º—è—Ç–∏ (IO-–æ–ø—Ç–∏–º–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–π), –∫–æ—Ç–æ—Ä—ã–π –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É—Å–∫–æ—Ä—è–µ—Ç –æ–±—É—á–µ–Ω–∏–µ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–æ–≤, —Å–æ–∫—Ä–∞—â–∞—è –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –æ–±—Ä–∞—â–µ–Ω–∏–π –∫ –≤—ã—Å–æ–∫–æ–ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ–π –ø–∞–º—è—Ç–∏ GPU. –û–Ω –ø–æ–∑–≤–æ–ª—è–µ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –±–æ–ª–µ–µ –¥–ª–∏–Ω–Ω—ã–µ –∫–æ–Ω—Ç–µ–∫—Å—Ç—ã, —É–ª—É—á—à–∞—è –∫–∞—á–µ—Å—Ç–≤–æ –º–æ–¥–µ–ª–µ–π –∏ –æ—Ç–∫—Ä—ã–≤–∞—è –Ω–æ–≤—ã–µ –≤–æ–∑–º–æ–∂–Ω–æ—Å—Ç–∏, —Ç–∞–∫–∏–µ –∫–∞–∫ –≤—ã—Å–æ–∫–∞—è —Ç–æ—á–Ω–æ—Å—Ç—å –Ω–∞ –∑–∞–¥–∞—á–∞—Ö —Å –æ—á–µ–Ω—å –¥–ª–∏–Ω–Ω—ã–º–∏ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç—è–º–∏.







In [7]:
times

[18.104891061782837,
 19.647082090377808,
 16.988260507583618,
 12.867859601974487,
 16.346726655960083,
 23.98879098892212,
 13.167678594589233,
 15.985154390335083,
 26.50315833091736,
 11.247574806213379]

In [8]:
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ –º–æ–¥–µ–ª–∏ Qwen3: {np.mean(times):.0f} —Å–µ–∫')

–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ –º–æ–¥–µ–ª–∏ Qwen3: 17 —Å–µ–∫


In [11]:
df_summarization = pd.read_csv('summarize_examples.csv')

In [12]:
outputs = [x.split('\n')[1] for x in outputs]
df_summarization['Qwen3'] = outputs

In [13]:
df_summarization = df_summarization[['gt', 'Tbank', 'YandexGPT', 'Qwen3', 'summarize_best_llm', 'sim_scores_tbank', 'sim_scores_yandex']]

In [14]:
df_summarization

Unnamed: 0,gt,Tbank,YandexGPT,Qwen3,summarize_best_llm,sim_scores_tbank,sim_scores_yandex
0,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Re...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforc...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforce...,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –Ω–æ–≤—ã–π –º–µ—Ç–æ–¥ –æ–±—É—á–µ–Ω–∏—è...,1,0.909943,0.930276
1,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Transformer ‚Äî –ø–µ—Ä–≤–∞—è –∞—Ä—Ö–∏...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –Ω–æ–≤—ã–π –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–π –ø–æ–¥—Ö...,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∞ –Ω–æ–≤–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –Ω–µ–π—Ä–æ–Ω...,–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—É Transformer ‚Äî ...,1,0.797422,0.852751
2,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è –ø–æ–¥—Ö–æ–¥ –∫ –æ–±—É—á–µ–Ω–∏—é —Ä–µ–∫–æ–º–µ...,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á...,–í —Å—Ç–∞—Ç—å–µ –∏–∑—É—á–∞–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ–≤—Ä–µ–º–µ–Ω–Ω–æ–π –ø–∞–º—è...,–í —Å—Ç–∞—Ç—å–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á–Ω–æ–π –ø–∞–º—è...,0,0.774107,0.773998
3,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –æ–±–∑...,\n\n–ò—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –ø–æ—Å–≤—è—â–µ–Ω–æ –∞–Ω–∞–ª–∏–∑—É –∏—Å–ø–æ–ª—å–∑–æ–≤–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Å–∏—Å—Ç–µ–º–Ω—ã–π –æ–±–∑–æ—Ä –±–æ–ª—å—à–∏—Ö —è...,–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç —Å–æ–±–æ–π –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫...,1,0.755156,0.731499
4,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –æ–±—â–∏–π –ø–æ–¥—Ö–æ–¥ –∫ –¥–æ–æ–±—É—á–µ–Ω–∏—é ...,"–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –º–æ–¥–µ–ª–∏, —Å–æ—á–µ—Ç–∞—é—â–∏–µ –ø...",–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞–µ—Ç—Å—è –ø—Ä–æ–±–ª–µ–º–∞ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω–æ–≥...,–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç –æ–±–æ–±—â—ë–Ω–Ω—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ç–æ–Ω–∫–æ–π...,0,0.737129,0.688586
5,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –¥–≤–∞ –ø—Ä–æ—Å—Ç—ã—Ö –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–µ –Ω–æ–≤—ã–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã –º–æ–¥–µ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö –ø–æ...,0,0.739331,0.708769
6,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤...,\n\n–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –∏ –∞–Ω–∞–ª–∏–∑–∏—Ä—É—é—Ç—Å—è ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã —É—Å–æ–≤–µ—Ä—à–µ–Ω—Å—Ç–≤–æ–≤–∞–Ω–Ω—ã–µ –¥–∏–∑–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –ø—Ä–æ–¥–≤–∏–Ω—É—Ç—ã–µ –¥–∏–∑–∞–π–Ω-—Å–∏—Å—Ç–µ...,0,0.856174,0.903531
7,"–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç LADDER ‚Äî —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞...","–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω LADDER ‚Äî –º–µ—Ç–æ–¥, —É–ª—É—á—à–∞—é—â...",–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Ñ—Ä–µ–π–º–≤–æ—Ä–∫ LADDER –¥–ª—è —É–ª—É—á...,"LADDER ‚Äî —ç—Ç–æ —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞—é—â–∏–π —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏...",1,0.804446,0.914699
8,Scalable-Softmax (SSMax) ‚Äî —ç—Ç–æ –Ω–æ–≤–∞—è –∑–∞–º–µ–Ω–∞ —Ñ—É...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Scalable-Softmax (SSMax)...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è Scalable-Softmax (SSMax)...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –Ω–æ–≤—ã–π –º–µ—Ç–æ–¥ Scalable-Softma...,1,0.864461,0.898752
9,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç FlashAttention ‚Äî IO-–æ—Ä–∏–µ–Ω—Ç...,\nFlashAttention ‚Äî —ç—Ç–æ –Ω–æ–≤—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ä–µ–∞–ª–∏–∑–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è FlashAttention ‚Äî –∞–ª–≥–æ—Ä–∏—Ç...,"FlashAttention ‚Äî —ç—Ç–æ —Ç–æ—á–Ω—ã–π –∞–ª–≥–æ—Ä–∏—Ç–º –≤–Ω–∏–º–∞–Ω–∏—è,...",0,0.89904,0.91454


In [37]:
n = 9
str_gt = df_summarization['gt'].iloc[n]
str_tbank = df_summarization['Tbank'].iloc[n]
str_yandex = df_summarization['YandexGPT'].iloc[n]
str_qwen = df_summarization['Qwen3'].iloc[n]

print(f'–≠—Ç–∞–ª–æ–Ω–Ω–∞—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: {str_gt}\n')
print(f'–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è Tbank: {str_tbank}\n')
print(f'–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è YandexGPT: {str_yandex}\n')  
print(f'–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è Qwen: {str_qwen}')

–≠—Ç–∞–ª–æ–Ω–Ω–∞—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: –ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç FlashAttention ‚Äî IO-–æ—Ä–∏–µ–Ω—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã–π –∞–ª–≥–æ—Ä–∏—Ç–º —Ç–æ—á–Ω–æ–≥–æ –≤–Ω–∏–º–∞–Ω–∏—è, –æ–ø—Ç–∏–º–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–π –¥–ª—è GPU-–∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä. –û–Ω –∏—Å–ø–æ–ª—å–∑—É–µ—Ç —Ç–∞–π–ª–∏–Ω–≥, —á—Ç–æ–±—ã –º–∏–Ω–∏–º–∏–∑–∏—Ä–æ–≤–∞—Ç—å –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –æ–ø–µ—Ä–∞—Ü–∏–π —á—Ç–µ–Ω–∏—è –∏ –∑–∞–ø–∏—Å–∏ –º–µ–∂–¥—É –≤—ã—Å–æ–∫–æ—Å–∫–æ—Ä–æ—Å—Ç–Ω–æ–π –ø–∞–º—è—Ç—å—é (HBM) –∏ –ª–æ–∫–∞–ª—å–Ω–æ–π SRAM, —á—Ç–æ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É—Å–∫–æ—Ä—è–µ—Ç –æ–±—É—á–µ–Ω–∏–µ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–æ–≤. FlashAttention –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç —Ä–µ–∞–ª—å–Ω—ã–π –ø—Ä–∏—Ä–æ—Å—Ç —Å–∫–æ—Ä–æ—Å—Ç–∏ (–¥–æ 3√ó –Ω–∞ GPT-2, 15% –Ω–∞ BERT-large) –±–µ–∑ —É—â–µ—Ä–±–∞ –¥–ª—è –∫–∞—á–µ—Å—Ç–≤–∞ –º–æ–¥–µ–ª–∏. –†–∞—Å—à–∏—Ä–µ–Ω–Ω–∞—è –≤–µ—Ä—Å–∏—è —Å –±–ª–æ–∫-—Å–ø–∞—Ä—Å–Ω–æ—Å—Ç—å—é –æ–ø–µ—Ä–µ–∂–∞–µ—Ç —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–µ –ø—Ä–∏–±–ª–∏–∂—ë–Ω–Ω—ã–µ –º–µ—Ç–æ–¥—ã –ø–æ —Å–∫–æ—Ä–æ—Å—Ç–∏ –∏ –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª—è–º —Ä–∞–±–æ—Ç–∞

In [6]:
#test
df = pd.read_csv('summarize_examples.csv')

In [12]:
n = 5
str_gt = df['gt'].iloc[n]
str_tbank = df['Tbank'].iloc[n]
str_yandex = df['YandexGPT'].iloc[n]
str_qwen = df['Qwen3'].iloc[n]

print(f'–≠—Ç–∞–ª–æ–Ω–Ω–∞—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: {str_gt}\n')
print(f'–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è Tbank: {str_tbank}\n')
print(f'–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è YandexGPT: {str_yandex}\n')  
print(f'–°—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è Qwen: {str_qwen}')

–≠—Ç–∞–ª–æ–Ω–Ω–∞—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è: –ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –¥–≤–∞ –ø—Ä–æ—Å—Ç—ã—Ö –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö –ø–æ–¥—Ö–æ–¥–∞ ‚Äî CBOW –∏ Skip-gram ‚Äî –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤ –Ω–∞ –º–∞—Å—à—Ç–∞–±–Ω—ã—Ö –∫–æ—Ä–ø—É—Å–∞—Ö. –≠—Ç–∏ –º–æ–¥–µ–ª–∏ –æ–±–µ—Å–ø–µ—á–∏–≤–∞—é—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ –∫–∞—á–µ—Å—Ç–≤–∞ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –∏ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π –ø—Ä–∏ –Ω–∏–∑–∫–∏—Ö –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω—ã—Ö –∑–∞—Ç—Ä–∞—Ç–∞—Ö, –ø–æ–∑–≤–æ–ª—è—è –æ–±—É—á–∞—Ç—å –≤–µ–∫—Ç–æ—Ä—ã –¥–∞–∂–µ –Ω–∞ —Ç—Ä–∏–ª–ª–∏–æ–Ω–µ —Å–ª–æ–≤. –í–µ–∫—Ç–æ—Ä—ã –¥–æ—Å—Ç–∏–≥–∞—é—Ç state-of-the-art —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ –≤ –∑–∞–¥–∞—á–∞—Ö –æ—Ü–µ–Ω–∫–∏ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–æ–π –±–ª–∏–∑–æ—Å—Ç–∏ –∏ –Ω–∞—Ö–æ–¥—è—Ç –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–µ –≤ —Ä—è–¥–µ NLP-–∑–∞–¥–∞—á, –≤–∫–ª—é—á–∞—è –º–∞—à–∏–Ω–Ω—ã–π –ø–µ—Ä–µ–≤–æ–¥, –∞–Ω–∞–ª–∏–∑ —Ç–æ–Ω–∞–ª—å–Ω–æ—Å—Ç–∏ –∏ —Ä–∞—Å—à–∏—Ä–µ–Ω–∏–µ –∑–Ω–

## LLM as a judge

In [38]:
# scores [1-3] —á–µ–º –º–µ–Ω—å—à–µ —Ç–µ–º –ª—É—á—à–µ
scores_tbank = [1, 3, 1, 3, 2, 3, 3, 2, 3, 1]
scores_yandex = [3, 1, 2, 2, 3, 2, 2, 1, 2, 3]
scores_qwen = [2, 2, 3, 1, 1, 1, 1, 3, 1, 2]

In [39]:
df_summarization['summarize_tbank_rank'] = scores_tbank
df_summarization['summarize_yandex_rank'] = scores_yandex
df_summarization['summarize_qwen_rank'] = scores_qwen

In [53]:
lst = ['gt',
 'Tbank',
 'YandexGPT',
 'Qwen3',
'summarize_tbank_rank',
 'summarize_yandex_rank',
 'summarize_qwen_rank',
 'sim_scores_tbank',
 'sim_scores_yandex',
 ]

In [56]:
df_summarization = df_summarization[lst]

In [59]:
df_summarization

Unnamed: 0,gt,Tbank,YandexGPT,Qwen3,summarize_tbank_rank,summarize_yandex_rank,summarize_qwen_rank,sim_scores_tbank,sim_scores_yandex
0,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Re...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforc...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforce...,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –Ω–æ–≤—ã–π –º–µ—Ç–æ–¥ –æ–±—É—á–µ–Ω–∏—è...,1,3,2,0.909943,0.930276
1,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Transformer ‚Äî –ø–µ—Ä–≤–∞—è –∞—Ä—Ö–∏...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –Ω–æ–≤—ã–π –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–π –ø–æ–¥—Ö...,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∞ –Ω–æ–≤–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –Ω–µ–π—Ä–æ–Ω...,–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—É Transformer ‚Äî ...,3,1,2,0.797422,0.852751
2,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è –ø–æ–¥—Ö–æ–¥ –∫ –æ–±—É—á–µ–Ω–∏—é —Ä–µ–∫–æ–º–µ...,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á...,–í —Å—Ç–∞—Ç—å–µ –∏–∑—É—á–∞–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ–≤—Ä–µ–º–µ–Ω–Ω–æ–π –ø–∞–º—è...,–í —Å—Ç–∞—Ç—å–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á–Ω–æ–π –ø–∞–º—è...,1,2,3,0.774107,0.773998
3,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –æ–±–∑...,\n\n–ò—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –ø–æ—Å–≤—è—â–µ–Ω–æ –∞–Ω–∞–ª–∏–∑—É –∏—Å–ø–æ–ª—å–∑–æ–≤–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Å–∏—Å—Ç–µ–º–Ω—ã–π –æ–±–∑–æ—Ä –±–æ–ª—å—à–∏—Ö —è...,–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç —Å–æ–±–æ–π –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫...,3,2,1,0.755156,0.731499
4,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –æ–±—â–∏–π –ø–æ–¥—Ö–æ–¥ –∫ –¥–æ–æ–±—É—á–µ–Ω–∏—é ...,"–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –º–æ–¥–µ–ª–∏, —Å–æ—á–µ—Ç–∞—é—â–∏–µ –ø...",–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞–µ—Ç—Å—è –ø—Ä–æ–±–ª–µ–º–∞ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω–æ–≥...,–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç –æ–±–æ–±—â—ë–Ω–Ω—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ç–æ–Ω–∫–æ–π...,2,3,1,0.737129,0.688586
5,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –¥–≤–∞ –ø—Ä–æ—Å—Ç—ã—Ö –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–µ –Ω–æ–≤—ã–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã –º–æ–¥–µ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö –ø–æ...,3,2,1,0.739331,0.708769
6,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤...,\n\n–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –∏ –∞–Ω–∞–ª–∏–∑–∏—Ä—É—é—Ç—Å—è ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã —É—Å–æ–≤–µ—Ä—à–µ–Ω—Å—Ç–≤–æ–≤–∞–Ω–Ω—ã–µ –¥–∏–∑–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –ø—Ä–æ–¥–≤–∏–Ω—É—Ç—ã–µ –¥–∏–∑–∞–π–Ω-—Å–∏—Å—Ç–µ...,3,2,1,0.856174,0.903531
7,"–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç LADDER ‚Äî —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞...","–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω LADDER ‚Äî –º–µ—Ç–æ–¥, —É–ª—É—á—à–∞—é—â...",–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Ñ—Ä–µ–π–º–≤–æ—Ä–∫ LADDER –¥–ª—è —É–ª—É—á...,"LADDER ‚Äî —ç—Ç–æ —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞—é—â–∏–π —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏...",2,1,3,0.804446,0.914699
8,Scalable-Softmax (SSMax) ‚Äî —ç—Ç–æ –Ω–æ–≤–∞—è –∑–∞–º–µ–Ω–∞ —Ñ—É...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Scalable-Softmax (SSMax)...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è Scalable-Softmax (SSMax)...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –Ω–æ–≤—ã–π –º–µ—Ç–æ–¥ Scalable-Softma...,3,2,1,0.864461,0.898752
9,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç FlashAttention ‚Äî IO-–æ—Ä–∏–µ–Ω—Ç...,\nFlashAttention ‚Äî —ç—Ç–æ –Ω–æ–≤—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ä–µ–∞–ª–∏–∑–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è FlashAttention ‚Äî –∞–ª–≥–æ—Ä–∏—Ç...,"FlashAttention ‚Äî —ç—Ç–æ —Ç–æ—á–Ω—ã–π –∞–ª–≥–æ—Ä–∏—Ç–º –≤–Ω–∏–º–∞–Ω–∏—è,...",1,3,2,0.89904,0.91454


In [61]:
df_summarization[['summarize_tbank_rank', 'summarize_yandex_rank', 'summarize_qwen_rank']]

Unnamed: 0,summarize_tbank_rank,summarize_yandex_rank,summarize_qwen_rank
0,1,3,2
1,3,1,2
2,1,2,3
3,3,2,1
4,2,3,1
5,3,2,1
6,3,2,1
7,2,1,3
8,3,2,1
9,1,3,2


In [62]:
df_summarization[['summarize_tbank_rank', 'summarize_yandex_rank', 'summarize_qwen_rank']].mean()

summarize_tbank_rank     2.2
summarize_yandex_rank    2.1
summarize_qwen_rank      1.7
dtype: float64

In [63]:
df_summarization.to_csv('summarize_examples.csv', index=False)

# Bleu/Rouge

In [70]:
df = pd.read_csv('summarize_examples.csv')

## Bleu

In [71]:
hyps = df["Tbank"].tolist()
refs = df["gt"].tolist()
bleu = BLEU(tokenize="intl", smooth_method="exp", effective_order=True)   

df["bleu_tbank"] = [
    bleu.sentence_score(h, [r]).score for h, r in zip(hyps, refs)
]

In [72]:
hyps = df["YandexGPT"].tolist()
refs = df["gt"].tolist()
bleu = BLEU(tokenize="intl", smooth_method="exp", effective_order=True)   

df["bleu_yandex"] = [
    bleu.sentence_score(h, [r]).score for h, r in zip(hyps, refs)
]

In [73]:
hyps = df["Qwen3"].tolist()
refs = df["gt"].tolist()
bleu = BLEU(tokenize="intl", smooth_method="exp", effective_order=True)   

df["bleu_qwen"] = [
    bleu.sentence_score(h, [r]).score for h, r in zip(hyps, refs)
]

In [78]:
df[['bleu_tbank', 'bleu_yandex', 'bleu_qwen']]

Unnamed: 0,bleu_tbank,bleu_yandex,bleu_qwen
0,14.12465,20.594433,21.681487
1,3.24908,6.779259,1.948774
2,9.707592,8.007145,8.230425
3,5.847037,7.050412,14.343329
4,14.852588,2.320887,5.058517
5,6.148636,5.149729,12.760426
6,12.118868,14.483633,17.328665
7,5.896288,14.176252,10.095051
8,8.201007,9.626733,12.271704
9,10.64977,8.815559,1.047562


In [80]:
df[['bleu_tbank', 'bleu_yandex', 'bleu_qwen']].mean()

bleu_tbank      9.079552
bleu_yandex     9.700404
bleu_qwen      10.476594
dtype: float64

In [79]:
df.to_csv('summarize_examples.csv', index=False)

## Rouge

In [154]:
df = pd.read_csv('summarize_examples.csv')

In [162]:
import re
from nltk.stem.snowball import SnowballStemmer

stem = SnowballStemmer("russian")

class RussianTok(tokenizers.Tokenizer):
    _re = re.compile(r"[–∞-—è—ë]+", re.I)
    def tokenize(self, txt):
        return [stem.stem(w) for w in self._re.findall(txt.lower())]

ru_tok = RussianTok()

scorer = rouge_scorer.RougeScorer(
    ["rouge1", "rouge2", "rougeL"],
    use_stemmer=False,          
    tokenizer=ru_tok            # —Å–≤–æ–π —Ç–æ–∫–µ–Ω–∞–π–∑–µ—Ä
)

In [163]:
pair_scores = [
    scorer.score(ref, hyp)          
    for hyp, ref in zip(df["Tbank"], df["gt"])
]

df["rouge1_f_tbank"] = [s["rouge1"].fmeasure for s in pair_scores]
df["rouge2_f_tbank"] = [s["rouge2"].fmeasure for s in pair_scores]
df["rougeL_f_tbank"] = [s["rougeL"].fmeasure for s in pair_scores]

df["rouge1_p_tbank"] = [s["rouge1"].precision for s in pair_scores]
df["rouge2_p_tbank"] = [s["rouge2"].precision for s in pair_scores]
df["rougeL_p_tbank"] = [s["rougeL"].precision for s in pair_scores]

df["rouge1_r_tbank"] = [s["rouge1"].recall for s in pair_scores]
df["rouge2_r_tbank"] = [s["rouge2"].recall for s in pair_scores]
df["rougeL_r_tbank"] = [s["rougeL"].recall for s in pair_scores]

In [164]:
pair_scores = [
    scorer.score(ref, hyp)          
    for hyp, ref in zip(df["YandexGPT"], df["gt"])
]

df["rouge1_f_yandex"] = [s["rouge1"].fmeasure for s in pair_scores]
df["rouge2_f_yandex"] = [s["rouge2"].fmeasure for s in pair_scores]
df["rougeL_f_yandex"] = [s["rougeL"].fmeasure for s in pair_scores]

df["rouge1_p_yandex"] = [s["rouge1"].precision for s in pair_scores]
df["rouge2_p_yandex"] = [s["rouge2"].precision for s in pair_scores]
df["rougeL_p_yandex"] = [s["rougeL"].precision for s in pair_scores]

df["rouge1_r_yandex"] = [s["rouge1"].recall for s in pair_scores]
df["rouge2_r_yandex"] = [s["rouge2"].recall for s in pair_scores]
df["rougeL_r_yandex"] = [s["rougeL"].recall for s in pair_scores]

In [166]:
pair_scores = [
    scorer.score(ref, hyp)          
    for hyp, ref in zip(df["Qwen3"], df["gt"])
]

df["rouge1_f_qwen"] = [s["rouge1"].fmeasure for s in pair_scores]
df["rouge2_f_qwen"] = [s["rouge2"].fmeasure for s in pair_scores]
df["rougeL_f_qwen"] = [s["rougeL"].fmeasure for s in pair_scores]

df["rouge1_p_qwen"] = [s["rouge1"].precision for s in pair_scores]
df["rouge2_p_qwen"] = [s["rouge2"].precision for s in pair_scores]
df["rougeL_p_qwen"] = [s["rougeL"].precision for s in pair_scores]

df["rouge1_r_qwen"] = [s["rouge1"].recall for s in pair_scores]
df["rouge2_r_qwen"] = [s["rouge2"].recall for s in pair_scores]
df["rougeL_r_qwen"] = [s["rougeL"].recall for s in pair_scores]

### F-measure

In [167]:
df[['rouge1_f_tbank', 'rouge2_f_tbank', 'rougeL_f_tbank']].mean()

rouge1_f_tbank    0.454545
rouge2_f_tbank    0.128164
rougeL_f_tbank    0.288164
dtype: float64

In [168]:
df[['rouge1_f_yandex', 'rouge2_f_yandex', 'rougeL_f_yandex']].mean()

rouge1_f_yandex    0.458288
rouge2_f_yandex    0.126939
rougeL_f_yandex    0.296055
dtype: float64

In [169]:
df[['rouge1_f_qwen', 'rouge2_f_qwen', 'rougeL_f_qwen']].mean()

rouge1_f_qwen    0.460138
rouge2_f_qwen    0.154361
rougeL_f_qwen    0.314638
dtype: float64

### Precision

In [119]:
df[['rouge1_p_tbank', 'rouge2_p_tbank', 'rougeL_p_tbank']].mean()

rouge1_p_tbank    0.423024
rouge2_p_tbank    0.119411
rougeL_p_tbank    0.265858
dtype: float64

In [123]:
df[['rouge1_p_yandex', 'rouge2_p_yandex', 'rougeL_p_yandex']].mean()

rouge1_p_yandex    0.446262
rouge2_p_yandex    0.123050
rougeL_p_yandex    0.287735
dtype: float64

In [124]:
df[['rouge1_p_qwen', 'rouge2_p_qwen', 'rougeL_p_qwen']].mean()

rouge1_p_qwen    0.462337
rouge2_p_qwen    0.150601
rougeL_p_qwen    0.316523
dtype: float64

### Recall

In [125]:
df[['rouge1_r_tbank', 'rouge2_r_tbank', 'rougeL_r_tbank']].mean()

rouge1_r_tbank    0.504198
rouge2_r_tbank    0.141827
rougeL_r_tbank    0.323061
dtype: float64

In [126]:
df[['rouge1_r_yandex', 'rouge2_r_yandex', 'rougeL_r_yandex']].mean()

rouge1_r_yandex    0.483706
rouge2_r_yandex    0.134893
rougeL_r_yandex    0.313183
dtype: float64

In [127]:
df[['rouge1_r_qwen', 'rouge2_r_qwen', 'rougeL_r_qwen']].mean()

rouge1_r_qwen    0.472849
rouge2_r_qwen    0.161655
rougeL_r_qwen    0.322263
dtype: float64

In [129]:
df.to_csv('summarize_examples.csv', index=False)

# Cosine similarity

In [64]:
urls = ['https://arxiv.org/pdf/2504.16084',
        'https://arxiv.org/abs/1706.03762',
        'https://arxiv.org/pdf/2112.02242',
        'https://arxiv.org/pdf/2405.12819',
        'https://arxiv.org/pdf/2005.11401',
        'https://arxiv.org/pdf/1301.3781',
        'https://arxiv.org/pdf/2501.07391',
        'https://arxiv.org/pdf/2503.00735',
        'https://arxiv.org/pdf/2501.19399',
        'https://arxiv.org/pdf/2205.14135']


In [65]:
from sentence_transformers import SentenceTransformer
import torch

# –ó–∞–≥—Ä—É–∑–∏–º SOTA —ç–º–±–µ–¥–¥–µ—Ä –¥–ª—è —Ä—É—Å—Å–∫–∏—Ö —Ç–µ–∫—Å—Ç–æ–≤
embedder = SentenceTransformer("ai-forever/FRIDA")

In [15]:
df_summarization = pd.read_csv('summarize_examples.csv')

In [20]:
df_summarization

Unnamed: 0,gt,Tbank,YandexGPT,summarize_best_llm
0,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Re...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforc...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforce...,1
1,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Transformer ‚Äî –ø–µ—Ä–≤–∞—è –∞—Ä—Ö–∏...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –Ω–æ–≤—ã–π –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–π –ø–æ–¥—Ö...,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∞ –Ω–æ–≤–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –Ω–µ–π—Ä–æ–Ω...,1
2,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è –ø–æ–¥—Ö–æ–¥ –∫ –æ–±—É—á–µ–Ω–∏—é —Ä–µ–∫–æ–º–µ...,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á...,–í —Å—Ç–∞—Ç—å–µ –∏–∑—É—á–∞–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ–≤—Ä–µ–º–µ–Ω–Ω–æ–π –ø–∞–º—è...,0
3,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –æ–±–∑...,\n\n–ò—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –ø–æ—Å–≤—è—â–µ–Ω–æ –∞–Ω–∞–ª–∏–∑—É –∏—Å–ø–æ–ª—å–∑–æ–≤–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Å–∏—Å—Ç–µ–º–Ω—ã–π –æ–±–∑–æ—Ä –±–æ–ª—å—à–∏—Ö —è...,1
4,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –æ–±—â–∏–π –ø–æ–¥—Ö–æ–¥ –∫ –¥–æ–æ–±—É—á–µ–Ω–∏—é ...,"–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –º–æ–¥–µ–ª–∏, —Å–æ—á–µ—Ç–∞—é—â–∏–µ –ø...",–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞–µ—Ç—Å—è –ø—Ä–æ–±–ª–µ–º–∞ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω–æ–≥...,0
5,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –¥–≤–∞ –ø—Ä–æ—Å—Ç—ã—Ö –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–µ –Ω–æ–≤—ã–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã –º–æ–¥–µ...,0
6,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤...,\n\n–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –∏ –∞–Ω–∞–ª–∏–∑–∏—Ä—É—é—Ç—Å—è ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã —É—Å–æ–≤–µ—Ä—à–µ–Ω—Å—Ç–≤–æ–≤–∞–Ω–Ω—ã–µ –¥–∏–∑–∞...,0
7,"–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç LADDER ‚Äî —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞...","–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω LADDER ‚Äî –º–µ—Ç–æ–¥, —É–ª—É—á—à–∞—é—â...",–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Ñ—Ä–µ–π–º–≤–æ—Ä–∫ LADDER –¥–ª—è —É–ª—É—á...,1
8,Scalable-Softmax (SSMax) ‚Äî —ç—Ç–æ –Ω–æ–≤–∞—è –∑–∞–º–µ–Ω–∞ —Ñ—É...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Scalable-Softmax (SSMax)...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è Scalable-Softmax (SSMax)...,1
9,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç FlashAttention ‚Äî IO-–æ—Ä–∏–µ–Ω—Ç...,\nFlashAttention ‚Äî —ç—Ç–æ –Ω–æ–≤—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ä–µ–∞–ª–∏–∑–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è FlashAttention ‚Äî –∞–ª–≥–æ—Ä–∏—Ç...,0


## Tbank

In [21]:
reference_answers = df_summarization['gt'].tolist()
generated_answers = df_summarization['Tbank'].tolist()
articles = urls

In [66]:
from tqdm import tqdm
from typing import List, Dict

def evaluate_similarity_frida(
    questions: List[str], 
    generated_answers: List[str],
    reference_answers: List[str]
) -> List[float]:
    """
    –í—ã—á–∏—Å–ª—è–µ—Ç –∫–æ—Å–∏–Ω—É—Å–Ω—É—é –±–ª–∏–∑–æ—Å—Ç—å –º–µ–∂–¥—É —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã–º–∏ –∏ —ç—Ç–∞–ª–æ–Ω–Ω—ã–º–∏ –æ—Ç–≤–µ—Ç–∞–º–∏
    —Å –ø–æ–º–æ—â—å—é FRIDA SentenceTransformer.

    :param questions: –°–ø–∏—Å–æ–∫ –≤–æ–ø—Ä–æ—Å–æ–≤
    :param generated_answers: –°–ø–∏—Å–æ–∫ –æ—Ç–≤–µ—Ç–æ–≤, —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö LLM.
    :param reference_answers: –°–ø–∏—Å–æ–∫ —ç—Ç–∞–ª–æ–Ω–Ω—ã—Ö –æ—Ç–≤–µ—Ç–æ–≤.
        
    :return: –ö–æ—Å–∏–Ω—É—Å–Ω–∞—è –±–ª–∏–∑–æ—Å—Ç—å –ø–æ –∫–∞–∂–¥–æ–π –ø–∞—Ä–µ.
    """

    inputs = (
        [f"search_document: {text}" for text in generated_answers] +
        [f"search_document: {text}" for text in reference_answers]
    )

    embeddings = embedder.encode(inputs, convert_to_tensor=True)

    gen_embs = embeddings[:len(generated_answers)]
    ref_embs = embeddings[len(generated_answers):]

    sim_scores = (gen_embs @ ref_embs.T).diagonal().tolist()
    print(len(sim_scores))
    for i, (question, generated, reference) in enumerate(zip(questions, generated_answers, reference_answers)):
        
        print(f"–°—Ç–∞—Ç—å—è: {question}:")
        #print(f"–û–∂–∏–¥–∞–ª–æ—Å—å:\n{reference}")
        #print(f"–ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–æ:\n{generated}")
        print(f"cosine_similarity: {sim_scores[i]:.4f}\n")
        print(100 * '*')

    avg_sim = sum(sim_scores) / len(sim_scores)
    print("\n–û–±—â–∞—è –æ—Ü–µ–Ω–∫–∞")
    print(f"–°—Ä–µ–¥–Ω–∏–π similarity: {avg_sim:.4f}")
    
    return sim_scores

In [24]:
scores_tbank = evaluate_similarity_frida(articles, generated_answers, reference_answers)

10
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2504.16084:
cosine_similarity: 0.9099

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/abs/1706.03762:
cosine_similarity: 0.7974

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2112.02242:
cosine_similarity: 0.7741

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2405.12819:
cosine_similarity: 0.7552

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2005.11401:
cosine_similarity: 0.7371

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/1301.3781:
cosine_similarity: 0.7393

*******************************************

In [25]:
scores_tbank

[0.9099433422088623,
 0.7974216341972351,
 0.7741065621376038,
 0.7551555037498474,
 0.7371289134025574,
 0.7393308281898499,
 0.8561741709709167,
 0.8044455647468567,
 0.8644611239433289,
 0.8990402817726135]

In [26]:
df_summarization['sim_scores_tbank'] = scores_tbank

## YandexGPT

In [28]:
reference_answers = df_summarization['gt'].tolist()
generated_answers = df_summarization['YandexGPT'].tolist()
articles = urls

In [29]:
scores_yandex = evaluate_similarity_frida(articles, generated_answers, reference_answers)

10
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2504.16084:
cosine_similarity: 0.9303

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/abs/1706.03762:
cosine_similarity: 0.8528

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2112.02242:
cosine_similarity: 0.7740

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2405.12819:
cosine_similarity: 0.7315

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2005.11401:
cosine_similarity: 0.6886

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/1301.3781:
cosine_similarity: 0.7088

*******************************************

In [30]:
df_summarization['sim_scores_yandex'] = scores_yandex

In [31]:
df_summarization

Unnamed: 0,gt,Tbank,YandexGPT,summarize_best_llm,sim_scores_tbank,sim_scores_yandex
0,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Re...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforc...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforce...,1,0.909943,0.930276
1,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Transformer ‚Äî –ø–µ—Ä–≤–∞—è –∞—Ä—Ö–∏...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –Ω–æ–≤—ã–π –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–π –ø–æ–¥—Ö...,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∞ –Ω–æ–≤–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –Ω–µ–π—Ä–æ–Ω...,1,0.797422,0.852751
2,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è –ø–æ–¥—Ö–æ–¥ –∫ –æ–±—É—á–µ–Ω–∏—é —Ä–µ–∫–æ–º–µ...,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á...,–í —Å—Ç–∞—Ç—å–µ –∏–∑—É—á–∞–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ–≤—Ä–µ–º–µ–Ω–Ω–æ–π –ø–∞–º—è...,0,0.774107,0.773998
3,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –æ–±–∑...,\n\n–ò—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –ø–æ—Å–≤—è—â–µ–Ω–æ –∞–Ω–∞–ª–∏–∑—É –∏—Å–ø–æ–ª—å–∑–æ–≤–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Å–∏—Å—Ç–µ–º–Ω—ã–π –æ–±–∑–æ—Ä –±–æ–ª—å—à–∏—Ö —è...,1,0.755156,0.731499
4,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –æ–±—â–∏–π –ø–æ–¥—Ö–æ–¥ –∫ –¥–æ–æ–±—É—á–µ–Ω–∏—é ...,"–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –º–æ–¥–µ–ª–∏, —Å–æ—á–µ—Ç–∞—é—â–∏–µ –ø...",–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞–µ—Ç—Å—è –ø—Ä–æ–±–ª–µ–º–∞ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω–æ–≥...,0,0.737129,0.688586
5,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –¥–≤–∞ –ø—Ä–æ—Å—Ç—ã—Ö –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–µ –Ω–æ–≤—ã–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã –º–æ–¥–µ...,0,0.739331,0.708769
6,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤...,\n\n–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –∏ –∞–Ω–∞–ª–∏–∑–∏—Ä—É—é—Ç—Å—è ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã —É—Å–æ–≤–µ—Ä—à–µ–Ω—Å—Ç–≤–æ–≤–∞–Ω–Ω—ã–µ –¥–∏–∑–∞...,0,0.856174,0.903531
7,"–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç LADDER ‚Äî —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞...","–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω LADDER ‚Äî –º–µ—Ç–æ–¥, —É–ª—É—á—à–∞—é—â...",–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Ñ—Ä–µ–π–º–≤–æ—Ä–∫ LADDER –¥–ª—è —É–ª—É—á...,1,0.804446,0.914699
8,Scalable-Softmax (SSMax) ‚Äî —ç—Ç–æ –Ω–æ–≤–∞—è –∑–∞–º–µ–Ω–∞ —Ñ—É...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Scalable-Softmax (SSMax)...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è Scalable-Softmax (SSMax)...,1,0.864461,0.898752
9,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç FlashAttention ‚Äî IO-–æ—Ä–∏–µ–Ω—Ç...,\nFlashAttention ‚Äî —ç—Ç–æ –Ω–æ–≤—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ä–µ–∞–ª–∏–∑–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è FlashAttention ‚Äî –∞–ª–≥–æ—Ä–∏—Ç...,0,0.89904,0.91454


## Qwen3

In [67]:
reference_answers = df_summarization['gt'].tolist()
generated_answers = df_summarization['Qwen3'].tolist()
articles = urls

In [70]:
scores_qwen = evaluate_similarity_frida(articles, generated_answers, reference_answers)

10
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2504.16084:
cosine_similarity: 0.9530

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/abs/1706.03762:
cosine_similarity: 0.8079

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2112.02242:
cosine_similarity: 0.7632

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2405.12819:
cosine_similarity: 0.7767

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/2005.11401:
cosine_similarity: 0.7756

****************************************************************************************************
–°—Ç–∞—Ç—å—è: https://arxiv.org/pdf/1301.3781:
cosine_similarity: 0.8769

*******************************************

In [71]:
df_summarization['sim_scores_qwen'] = scores_qwen

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_summarization['sim_scores_qwen'] = scores_qwen


In [72]:
df_summarization

Unnamed: 0,gt,Tbank,YandexGPT,Qwen3,summarize_tbank_rank,summarize_yandex_rank,summarize_qwen_rank,sim_scores_tbank,sim_scores_yandex,sim_scores_qwen
0,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Re...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforc...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –º–µ—Ç–æ–¥ Test-Time Reinforce...,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –Ω–æ–≤—ã–π –º–µ—Ç–æ–¥ –æ–±—É—á–µ–Ω–∏—è...,1,3,2,0.909943,0.930276,0.953033
1,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Transformer ‚Äî –ø–µ—Ä–≤–∞—è –∞—Ä—Ö–∏...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω –Ω–æ–≤—ã–π –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–π –ø–æ–¥—Ö...,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∞ –Ω–æ–≤–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –Ω–µ–π—Ä–æ–Ω...,–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—É Transformer ‚Äî ...,3,1,2,0.797422,0.852751,0.807929
2,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è –ø–æ–¥—Ö–æ–¥ –∫ –æ–±—É—á–µ–Ω–∏—é —Ä–µ–∫–æ–º–µ...,–í –¥–∞–Ω–Ω–æ–π —Ä–∞–±–æ—Ç–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á...,–í —Å—Ç–∞—Ç—å–µ –∏–∑—É—á–∞–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ–≤—Ä–µ–º–µ–Ω–Ω–æ–π –ø–∞–º—è...,–í —Å—Ç–∞—Ç—å–µ –∏—Å—Å–ª–µ–¥—É–µ—Ç—Å—è –≤–ª–∏—è–Ω–∏–µ –¥–æ–ª–≥–æ—Å—Ä–æ—á–Ω–æ–π –ø–∞–º—è...,1,2,3,0.774107,0.773998,0.763235
3,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –æ–±–∑...,\n\n–ò—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ –ø–æ—Å–≤—è—â–µ–Ω–æ –∞–Ω–∞–ª–∏–∑—É –∏—Å–ø–æ–ª—å–∑–æ–≤–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Å–∏—Å—Ç–µ–º–Ω—ã–π –æ–±–∑–æ—Ä –±–æ–ª—å—à–∏—Ö —è...,–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç —Å–æ–±–æ–π –ø–µ—Ä–≤—ã–π —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫...,3,2,1,0.755156,0.731499,0.776702
4,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –æ–±—â–∏–π –ø–æ–¥—Ö–æ–¥ –∫ –¥–æ–æ–±—É—á–µ–Ω–∏—é ...,"–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –º–æ–¥–µ–ª–∏, —Å–æ—á–µ—Ç–∞—é—â–∏–µ –ø...",–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞–µ—Ç—Å—è –ø—Ä–æ–±–ª–µ–º–∞ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω–æ–≥...,–°—Ç–∞—Ç—å—è –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è–µ—Ç –æ–±–æ–±—â—ë–Ω–Ω—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ç–æ–Ω–∫–æ–π...,2,3,1,0.737129,0.688586,0.775555
5,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç –¥–≤–∞ –ø—Ä–æ—Å—Ç—ã—Ö –∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–µ –Ω–æ–≤—ã–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã –º–æ–¥–µ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –¥–≤–∞ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã—Ö –ø–æ...,3,2,1,0.739331,0.708769,0.876854
6,–í —Ä–∞–±–æ—Ç–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ —Å–∏—Å—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤...,\n\n–í —Å—Ç–∞—Ç—å–µ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—é—Ç—Å—è –∏ –∞–Ω–∞–ª–∏–∑–∏—Ä—É—é—Ç—Å—è ...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã —É—Å–æ–≤–µ—Ä—à–µ–Ω—Å—Ç–≤–æ–≤–∞–Ω–Ω—ã–µ –¥–∏–∑–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω—ã –ø—Ä–æ–¥–≤–∏–Ω—É—Ç—ã–µ –¥–∏–∑–∞–π–Ω-—Å–∏—Å—Ç–µ...,3,2,1,0.856174,0.903531,0.781188
7,"–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç LADDER ‚Äî —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞...","–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω LADDER ‚Äî –º–µ—Ç–æ–¥, —É–ª—É—á—à–∞—é—â...",–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Ñ—Ä–µ–π–º–≤–æ—Ä–∫ LADDER –¥–ª—è —É–ª—É—á...,"LADDER ‚Äî —ç—Ç–æ —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, —É–ª—É—á—à–∞—é—â–∏–π —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏...",2,1,3,0.804446,0.914699,0.919719
8,Scalable-Softmax (SSMax) ‚Äî —ç—Ç–æ –Ω–æ–≤–∞—è –∑–∞–º–µ–Ω–∞ —Ñ—É...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω Scalable-Softmax (SSMax)...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è Scalable-Softmax (SSMax)...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω –Ω–æ–≤—ã–π –º–µ—Ç–æ–¥ Scalable-Softma...,3,2,1,0.864461,0.898752,0.911014
9,–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—é—Ç FlashAttention ‚Äî IO-–æ—Ä–∏–µ–Ω—Ç...,\nFlashAttention ‚Äî —ç—Ç–æ –Ω–æ–≤—ã–π –ø–æ–¥—Ö–æ–¥ –∫ —Ä–µ–∞–ª–∏–∑–∞...,–í —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç—Å—è FlashAttention ‚Äî –∞–ª–≥–æ—Ä–∏—Ç...,"FlashAttention ‚Äî —ç—Ç–æ —Ç–æ—á–Ω—ã–π –∞–ª–≥–æ—Ä–∏—Ç–º –≤–Ω–∏–º–∞–Ω–∏—è,...",1,3,2,0.89904,0.91454,0.846391


In [73]:
df_summarization.to_csv('summarize_examples.csv', index=False)

In [75]:
df_summarization[['sim_scores_tbank', 'sim_scores_yandex', 'sim_scores_qwen']].mean()

sim_scores_tbank     0.813721
sim_scores_yandex    0.831740
sim_scores_qwen      0.841162
dtype: float64

In [76]:
df_summarization.to_csv('summarize_examples.csv', index=False)

# –°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏

In [9]:
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ Tbank: 15 —Å–µ–∫')
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ Yandex: 12 —Å–µ–∫')
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ Qwen3: 17 —Å–µ–∫')

–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ Tbank: 15 —Å–µ–∫
–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ Yandex: 12 —Å–µ–∫
–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏ Qwen3: 17 —Å–µ–∫


# –°—Ä–µ–¥–Ω–∏–µ —Ä–∞–Ω–≥–∏ –Ω–∞ –æ—Å–Ω–æ–≤–µ similarity

In [269]:
df = pd.read_csv('test_results/summarize_examples.csv')
ranked = df[['sim_scores_tbank', 'sim_scores_yandex', 'sim_scores_qwen']].rank(axis=1, method='min', ascending=False)

In [270]:
ranked.mean().sort_values()

sim_scores_qwen      1.7
sim_scores_yandex    2.0
sim_scores_tbank     2.3
dtype: float64

# –û—Ü–µ–Ω–∫–∞ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞

–û—Ü–µ–Ω–∏–≤–∞—Ç—å –±—É–¥–µ–º –∞–Ω–∞–ª–æ–≥–∏—á–Ω—ã–º –æ–±—Ä–∞–∑–æ–º —Å —Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏–µ–π - –Ω–∞–º–∞–π–Ω–∏–º –≤–æ–ø—Ä–æ—Å—ã –ø–æ —Å—Ç–∞—Ç—å—è–º, —ç—Ç–∞–ª–æ–Ω–Ω—ã–µ –æ—Ç–≤–µ—Ç—ã –Ω–∞ –Ω–∏—Ö –∏ –≤ –¥–∞–ª—å–Ω–µ–π—à–µ–º –±—É–¥–µ–º –ø—Ä–∏–º–µ–Ω—è—Ç—å LLM as a judge + cos similarity –ø–æ–¥—Ö–æ–¥

## –ì–µ–Ω–µ—Ä–∞—Ü–∏—è —ç—Ç–∞–ª–æ–Ω–Ω—ã—Ö –æ—Ç–≤–µ—Ç–æ–≤ –ø–æ 50 –≤–æ–ø—Ä–æ—Å–∞–º

### –°—Ç–∞—Ç—å—è Attention is all you need

In [142]:
arxiv_url = "https://arxiv.org/abs/1706.03762"
arxiv_id = extract_arxiv_id(arxiv_url)

In [143]:
questions = ['–ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?',
            '–ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?',
             '–ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?',
             '–ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?',
             '–ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –æ–¥–Ω–æ–π –≥–æ–ª–æ–≤–æ–π –≤–Ω–∏–º–∞–Ω–∏—è?',
             '–ö–∞–∫–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –∏ –ø–æ—á–µ–º—É –±—ã–ª–∏ –≤—ã–±—Ä–∞–Ω—ã —Å–∏–Ω—É—Å–æ–∏–¥—ã?',
             '–í —á–µ–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º–∏ –∏ —Å–≤–µ—Ä—Ç–æ—á–Ω—ã–º–∏ —Å–ª–æ—è–º–∏ –≤ –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –∏ –¥–ª–∏–Ω—ã –ø—É—Ç–∏ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–µ–π?',
             '–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer?',
             '–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Transformer –ø–æ–∫–∞–∑–∞–ª –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíDE –∏ EN‚ÜíFR –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–∏–º–∏ SOTA?',
             '–ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—é –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏?'
            ]

In [144]:
gt = ['''–í –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω–æ–π —Å—Ç–∞—Ç—å–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è Adam —Å –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º–∏: Œ≤‚ÇÅ=0.9, Œ≤‚ÇÇ=0.98, Œµ=1e‚àí9. Learning rate –≤–∞—Ä—å–∏—Ä–æ–≤–∞–ª—Å—è –ø–æ scheduler‚Äô—É —Å warm-up:
lr = ùëë model ‚àí 0.5 ‚ãÖ min(step‚àí0.5,step‚ãÖwarmup_steps‚àí1.5)lr=d model‚àí0.5‚Äã‚ãÖmin(step ‚àí0.5,step‚ãÖwarmup_steps ‚àí1.5)
–¢–∞–∫–∂–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è dropout (0.1) –∏ label smoothing (Œµ=0.1).''',
     'Transformer –∑–∞–º–µ–Ω—è–µ—Ç RNN –∏–ª–∏ LSTM/GRU, –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–≤—à–∏–µ—Å—è –≤ encoder/decoder, –Ω–∞ self-attention –∏ feed-forward —Å–ª–æ–∏. –û–Ω —Ç–∞–∫–∂–µ —É–±–∏—Ä–∞–µ—Ç –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ—Å—Ç—å –≤ —Ä–µ–∫—É—Ä—Å–∏–∏.',
     '''–≠—Ç–æ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏–º–∞–Ω–∏—è, –∫–æ—Ç–æ—Ä—ã–π –≤—ã—á–∏—Å–ª—è–µ—Ç:
Attention(ùëÑ,ùêæ,ùëâ)=softmax(ùëÑùêæùëáùëëùëò)Attention(Q,K,V)=softmax(dk‚Äã‚ÄãQK T)
–î–µ–ª–µ–Ω–∏–µ –Ω–∞ ùëëùëò –ø—Ä–µ–¥–æ—Ç–≤—Ä–∞—â–∞–µ—Ç —Å–ª–∏—à–∫–æ–º –±–æ–ª—å—à–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è dot product, –∫–æ—Ç–æ—Ä—ã–µ –º–æ–≥–ª–∏ –±—ã –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ –º–∞–ª–µ–Ω—å–∫–∏–º –≥—Ä–∞–¥–∏–µ–Ω—Ç–∞–º –∏–∑-–∑–∞ saturation softmax.''',
     'Encoder: N –æ–¥–∏–Ω–∞–∫–æ–≤—ã—Ö –±–ª–æ–∫–æ–≤ (–æ–±—ã—á–Ω–æ N=6), –∫–∞–∂–¥—ã–π –≤–∫–ª—é—á–∞–µ—Ç: Multi-Head Self-Attention ‚Üí Add & Norm ‚Üí Feed-Forward ‚Üí Add & Norm. Decoder: —Ç–æ–∂–µ N –±–ª–æ–∫–æ–≤, –Ω–æ —Å –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã–º masked self-attention –∏ encoder-decoder attention.',
     '–ü–æ—Ç–æ–º—É —á—Ç–æ –∫–∞–∂–¥–∞—è "–≥–æ–ª–æ–≤–∞" –æ–±—É—á–∞–µ—Ç—Å—è —Ñ–æ–∫—É—Å–∏—Ä–æ–≤–∞—Ç—å—Å—è –Ω–∞ —Ä–∞–∑–Ω—ã—Ö –∞—Å–ø–µ–∫—Ç–∞—Ö –≤—Ö–æ–¥–∞ ‚Äî —Å–∏–Ω—Ç–∞–∫—Å–∏—Å, —Å–µ–º–∞–Ω—Ç–∏–∫–∞, –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –∏ —Ç.–¥., –¥–∞–≤–∞—è –±–æ–ª–µ–µ –±–æ–≥–∞—Ç–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ.',
     '–ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –¥–æ–±–∞–≤–ª—è—é—Ç –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –æ –ø–æ—Ä—è–¥–∫–µ —Ç–æ–∫–µ–Ω–æ–≤, –∫–æ—Ç–æ—Ä—ã–π —Ç–µ—Ä—è–µ—Ç—Å—è –≤ self-attention. –°–∏–Ω—É—Å–æ–∏–¥—ã –ø–æ–∑–≤–æ–ª—è—é—Ç –º–æ–¥–µ–ª–∏ —ç–∫—Å—Ç—Ä–∞–ø–æ–ª–∏—Ä–æ–≤–∞—Ç—å –Ω–∞ –±–æ–ª–µ–µ –¥–ª–∏–Ω–Ω—ã–µ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏, —á–µ–º —Ç–µ, —á—Ç–æ –±—ã–ª–∏ –≤ –æ–±—É—á–µ–Ω–∏–∏.',
     'Self-attention: O(n¬≤) –ø–æ –¥–ª–∏–Ω–µ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏, –Ω–æ –≤—Å–µ —ç–ª–µ–º–µ–Ω—Ç—ã –æ–±—Ä–∞–±–∞—Ç—ã–≤–∞—é—Ç—Å—è –ø–∞—Ä–∞–ª–ª–µ–ª—å–Ω–æ. RNN: O(n), –Ω–æ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ (–Ω–µ—Ç –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–º–∞). Conv: O(log n) –≥–ª—É–±–∏–Ω–∞, –Ω–æ —Ç—Ä–µ–±—É–µ—Ç –º–Ω–æ–≥–æ —Ñ–∏–ª—å—Ç—Ä–æ–≤. Self-attention –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç –∫–æ—Ä–æ—Ç–∫–∏–π –ø—É—Ç—å –º–µ–∂–¥—É –ª—é–±—ã–º–∏ –¥–≤—É–º—è —Ç–æ–∫–µ–Ω–∞–º–∏.',
     'Dropout (–≤ attention, feed-forward, embeddings). Label smoothing. Early stopping, –ø—Ä–∏ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ—Å—Ç–∏',
     '''EN‚ÜíDE: BLEU 28.4 (vs. –ø—Ä–µ–¥—ã–¥—É—â–∏–µ ~26.4)
EN‚ÜíFR: BLEU 41.8 (vs. ~40.4) 
Transformer –ø—Ä–µ–≤–∑–æ—à–µ–ª –≤—Å–µ –ø—Ä–µ–¥—ã–¥—É—â–∏–µ seq2seq –º–æ–¥–µ–ª–∏, –≤–∫–ª—é—á–∞—è LSTM + attention.''',
     '''–ü–æ—Ç–æ–º—É —á—Ç–æ self-attention –Ω–µ —Ç—Ä–µ–±—É–µ—Ç –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ–π –æ–±—Ä–∞–±–æ—Ç–∫–∏, –∫–∞–∫ –≤ RNN ‚Äî –≤—Å–µ —Ç–æ–∫–µ–Ω—ã –æ–±—Ä–∞–±–∞—Ç—ã–≤–∞—é—Ç—Å—è –æ–¥–Ω–æ–≤—Ä–µ–º–µ–Ω–Ω–æ, —á—Ç–æ –∏–¥–µ–∞–ª—å–Ω–æ –¥–ª—è GPU/TPU.''']

In [145]:
df = pd.DataFrame()

df['Questions'] = questions
df['gt'] = gt

In [146]:
df.to_csv('test_results/article_1_questions.csv', index=False)

### –°—Ç–∞—Ç—å—è TTRL

In [147]:
arxiv_url = "https://arxiv.org/pdf/2504.16084"
arxiv_id = extract_arxiv_id(arxiv_url)

In [148]:
questions = ['–í —á–µ–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –æ—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL)?',
             '–ö–∞–∫–∏–º –±—Ä–∞–∑–æ–º –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –æ—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º?',
             '–ö–∞–∫—É—é —Ä–æ–ª—å –∏–≥—Ä–∞–µ—Ç –º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ (majority voting) –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL?',
             '–ö–∞–∫–∏–µ —É–ª—É—á—à–µ–Ω–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã —Å –ø–æ–º–æ—â—å—é TTRL –Ω–∞ –±–µ–Ω—á–º–∞—Ä–∫–µ AIME 2024?',
             '–ö–∞–∫ TTRL —Å—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç—Å—è —Å RL –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –ø–æ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏?',
             '–ü–æ—á–µ–º—É TTRL —Å–ø–æ—Å–æ–±–µ–Ω —Ä–∞–±–æ—Ç–∞—Ç—å –¥–∞–∂–µ –ø—Ä–∏ –Ω–µ—Ç–æ—á–Ω—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö –º–µ—Ç–æ–∫?',
             '–ö–∞–∫–∏–µ –º–æ–¥–µ–ª–∏ –∏ –±–µ–Ω—á–º–∞—Ä–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è –æ—Ü–µ–Ω–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?',
             '–ö–∞–∫–∏–µ —Ñ–∞–∫—Ç–æ—Ä—ã –º–æ–≥—É—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ —Å–±–æ—é –∏–ª–∏ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?',
             '–ö–∞–∫ TTRL –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?',
             '–° –∫–∞–∫–∏–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å–æ–≤–º–µ—Å—Ç–∏–º TTRL –∏ –∫–∞–∫–∏–µ –∏–∑ –Ω–∏—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö?'
            ]

In [149]:
gt = ['TTRL ‚Äî —ç—Ç–æ –º–µ—Ç–æ–¥ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö. –û–Ω –ø–æ–∑–≤–æ–ª—è–µ—Ç LLM –∞–¥–∞–ø—Ç–∏—Ä–æ–≤–∞—Ç—å—Å—è –≤–æ –≤—Ä–µ–º—è –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞, –∏—Å–ø–æ–ª—å–∑—É—è —Å–æ–±—Å—Ç–≤–µ–Ω–Ω—ã–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è –∏ –º–µ—Ö–∞–Ω–∏–∑–º majority voting –¥–ª—è –ø–æ—Å—Ç—Ä–æ–µ–Ω–∏—è –ø—Å–µ–≤–¥–æ–Ω–∞–≥—Ä–∞–¥.',
     '''–ú–æ–¥–µ–ª—å –º–Ω–æ–≥–æ–∫—Ä–∞—Ç–Ω–æ –≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç –æ—Ç–≤–µ—Ç—ã –Ω–∞ –æ–¥–∏–Ω –∏ —Ç–æ—Ç –∂–µ –∑–∞–ø—Ä–æ—Å. –û—Ç–≤–µ—Ç, –≤—Å—Ç—Ä–µ—á–∞—é—â–∏–π—Å—è —á–∞—â–µ –≤—Å–µ–≥–æ (majority vote), –ø—Ä–∏–Ω–∏–º–∞–µ—Ç—Å—è –∑–∞ "–ø—Å–µ–≤–¥–æ–º–µ—Ç–∫—É", –∏ –∫–∞–∂–¥–æ–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –ø–æ–ª—É—á–∞–µ—Ç –Ω–∞–≥—Ä–∞–¥—É:
1, –µ—Å–ª–∏ —Å–æ–≤–ø–∞–¥–∞–µ—Ç —Å –ø—Å–µ–≤–¥–æ–º–µ—Ç–∫–æ–π
0 ‚Äî –µ—Å–ª–∏ –Ω–µ—Ç.''',
     'Majority voting –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –æ—Ü–µ–Ω–∫–∏ –º–µ—Ç–∫–∏ –∏ –≤—ã—á–∏—Å–ª–µ–Ω–∏—è –Ω–∞–≥—Ä–∞–¥—ã: –æ–Ω –æ–ø—Ä–µ–¥–µ–ª—è–µ—Ç –Ω–∞–∏–±–æ–ª–µ–µ –≤–µ—Ä–æ—è—Ç–Ω—ã–π –ø—Ä–∞–≤–∏–ª—å–Ω—ã–π –æ—Ç–≤–µ—Ç –Ω–∞ –æ—Å–Ω–æ–≤–µ —Å–æ–±—Å—Ç–≤–µ–Ω–Ω—ã—Ö —Å—ç–º–ø–ª–æ–≤ –º–æ–¥–µ–ª–∏, –∑–∞–º–µ–Ω—è—è —Ä–µ–∞–ª—å–Ω—ã–µ –º–µ—Ç–∫–∏.',
     '–ù–∞ AIME 2024 –º–æ–¥–µ–ª—å Qwen2.5-Math-7B —É–ª—É—á—à–∏–ª–∞—Å—å —Å 16.7% –¥–æ 43.3% pass@1, —á—Ç–æ —Å–æ—Å—Ç–∞–≤–ª—è–µ—Ç +159.3% –ø—Ä–∏—Ä–æ—Å—Ç–∞ –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Ä–∞–∑–º–µ—Ç–∫–∏',
     'TTRL –ø–æ—á—Ç–∏ –¥–æ—Å—Ç–∏–≥–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ RL —Å –¥–æ—Å—Ç—É–ø–æ–º –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º (RL-leakage). –≠—Ç–æ –∑–Ω–∞—á–∏—Ç, —á—Ç–æ –ø—Å–µ–≤–¥–æ–Ω–∞–≥—Ä–∞–¥, –ø–æ–ª—É—á–µ–Ω–Ω—ã—Ö —á–µ—Ä–µ–∑ majority voting, –¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ –¥–ª—è —Å–∏–ª—å–Ω–æ–π –∞–¥–∞–ø—Ç–∞—Ü–∏–∏ –º–æ–¥–µ–ª–∏',
     '''–ü–æ—Ç–æ–º—É —á—Ç–æ:
RL —É—Å—Ç–æ–π—á–∏–≤ –∫ —à—É–º—É –≤ –Ω–∞–≥—Ä–∞–¥–∞—Ö.
–î–∞–∂–µ –ø—Ä–∏ –æ—à–∏–±–æ—á–Ω–æ–π –º–µ—Ç–∫–µ —á–∞—Å—Ç—å –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –≤—Å—ë —Ä–∞–≤–Ω–æ –º–æ–∂–µ—Ç –ø–æ–ª—É—á–∏—Ç—å –∫–æ—Ä—Ä–µ–∫—Ç–Ω—ã–π –Ω–µ–≥–∞—Ç–∏–≤–Ω—ã–π —Å–∏–≥–Ω–∞–ª, —É—Å–∏–ª–∏–≤–∞—è –æ–±—É—á–µ–Ω–∏–µ.
Reward accuracy –º–æ–∂–µ—Ç –±—ã—Ç—å –≤—ã—Å–æ–∫–æ–π –¥–∞–∂–µ –ø—Ä–∏ –Ω–∏–∑–∫–æ–π label accuracy''',
     '''–ú–æ–¥–µ–ª–∏: Qwen2.5-Math-1.5B, Qwen2.5-Math-7B, LLaMA-3.1-8B-Instruct
–ë–µ–Ω—á–º–∞—Ä–∫–∏: AIME 2024, AMC, MATH-500''',
     '''–ù–µ–¥–æ—Å—Ç–∞—Ç–æ–∫ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω—ã—Ö –∑–Ω–∞–Ω–∏–π —É –º–æ–¥–µ–ª–∏ (–Ω–∞–ø—Ä–∏–º–µ—Ä, —É LLaMA-3.1-8B-Instruct).
–ù–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã–µ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã (batch size, temperature –∏ —Ç.–¥.)
–°–ª–æ–∂–Ω—ã–µ –∏ –¥–∞–ª–µ–∫–∏–µ –æ—Ç –æ–±—É—á–∞—é—â–µ–≥–æ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è —Ç–µ—Å—Ç–æ–≤—ã–µ –¥–∞–Ω–Ω—ã–µ''',
     '''TTRL –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è –ø–æ–ª–æ–∂–∏—Ç–µ–ª—å–Ω–æ ‚Äî –±–æ–ª–µ–µ –∫—Ä—É–ø–Ω—ã–µ –º–æ–¥–µ–ª–∏ (–Ω–∞–ø—Ä–∏–º–µ—Ä, Qwen2.5-Math-7B) –ø–æ–∫–∞–∑—ã–≤–∞—é—Ç –±–æ–ª—å—à–∏–π –ø—Ä–∏—Ä–æ—Å—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏, —Ç.–∫. –ª—É—á—à–µ –æ—Ü–µ–Ω–∏–≤–∞—é—Ç –º–µ—Ç–∫–∏ –∏ –≥–µ–Ω–µ—Ä–∏—Ä—É—é—Ç –±–æ–ª–µ–µ —Å—Ç–∞–±–∏–ª—å–Ω—ã–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è''',
     '–°–æ–≤–º–µ—Å—Ç–∏–º —Å PPO –∏ GRPO. –í —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å GRPO (–æ—Å–Ω–æ–≤–Ω–æ–π) –∏ PPO (–∞–ª—å—Ç–µ—Ä–Ω–∞—Ç–∏–≤–Ω—ã–π) ‚Äî –æ–±–∞ –ø–æ–∫–∞–∑–∞–ª–∏ —Å—Ö–æ–∂—É—é —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å, –Ω–æ PPO —Å—Ç–∞–±–∏–ª—å–Ω–µ–µ']

In [150]:
df = pd.DataFrame()

df['Questions'] = questions
df['gt'] = gt

In [152]:
df.to_csv('test_results/article_2_questions.csv', index=False)

### –°—Ç–∞—Ç—å—è Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks

In [154]:
arxiv_url = "https://arxiv.org/pdf/2005.11401"
arxiv_id = extract_arxiv_id(arxiv_url)

In [155]:
questions = ['–ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –≤ RAG?',
             '–ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –∏ –∫–∞–∫ –æ–Ω–∞ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ—Ç—Å—è?',
             '–ß–µ–º –æ—Ç–ª–∏—á–∞—é—Ç—Å—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã RAG-Token –∏ RAG-Sequence?',
             '–ö–∞–∫–æ–π –æ–±—ä–µ–º –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –≤–µ–∫—Ç–æ—Ä–Ω–æ–º –∏–Ω–¥–µ–∫—Å–µ Wikipedia?',
             '–ö–∞–∫–∏–µ –∑–∞–¥–∞—á–∏ –±—ã–ª–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω—ã –¥–ª—è –æ—Ü–µ–Ω–∫–∏ RAG-–º–æ–¥–µ–ª–µ–π?',
             '–ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ–º –ø—Ä–∏ –æ—Ç–≤–µ—Ç–µ –Ω–∞ –≤–æ–ø—Ä–æ—Å—ã?',
             '–ö–∞–∫—É—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ–∫–∞–∑–∞–ª RAG –Ω–∞ –∑–∞–¥–∞—á–µ Jeopardy Question Generation –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å BART?',
             '–ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å–æ–≤–º–µ—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG?',
             '–ö–∞–∫–æ–π —ç—Ñ—Ñ–µ–∫—Ç –¥–∞—ë—Ç "hot-swapping" –∏–Ω–¥–µ–∫—Å–∞ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ RAG?',
             '–ö–∞–∫–∏–µ –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –ø—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –≤ RAG-Sequence –∏ RAG-Token?'
            ]

In [160]:
gt = ['BART-large (400M –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤) –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –∫–∞–∫ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä (pŒ∏). –≠—Ç–æ seq2seq Transformer, –æ–±—É—á–µ–Ω–Ω—ã–π —Å –¥–µ–Ω–æ–π–∑–∏–Ω–≥-–æ–±—ä–µ–∫—Ç–∏–≤–æ–º',
     'DPR (Dense Passage Retriever) ‚Äî bi-encoder –Ω–∞ –±–∞–∑–µ BERT-base. –û–¥–∏–Ω BERT –∫–æ–¥–∏—Ä—É–µ—Ç –∑–∞–ø—Ä–æ—Å (q(x)), –¥—Ä—É–≥–æ–π ‚Äî –¥–æ–∫—É–º–µ–Ω—Ç—ã (d(z)). –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ—Ç—Å—è –º–æ–¥–µ–ª—å—é, –æ–±—É—á–µ–Ω–Ω–æ–π –Ω–∞ TriviaQA –∏ Natural Questions',
     '''RAG-Sequence –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –æ–¥–∏–Ω –∏ —Ç–æ—Ç –∂–µ –¥–æ–∫—É–º–µ–Ω—Ç –¥–ª—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤—Å–µ–π –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏.
RAG-Token –º–æ–∂–µ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å —Ä–∞–∑–Ω—ã–µ –¥–æ–∫—É–º–µ–Ω—Ç—ã –¥–ª—è –∫–∞–∂–¥–æ–≥–æ —Ç–æ–∫–µ–Ω–∞ ‚Äî –±–æ–ª–µ–µ –≥–∏–±–∫–∏–π, –Ω–æ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ –∑–∞—Ç—Ä–∞—Ç–Ω—ã–π –≤–∞—Ä–∏–∞–Ω—Ç''',
     '21 –º–∏–ª–ª–∏–æ–Ω –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤, –∫–∞–∂–¥–∞—è —Å—Ç–∞—Ç—å—è —Ä–∞–∑–±–∏—Ç–∞ –Ω–∞ 100-—Å–ª–æ–≤–Ω—ã–µ —á–∞–Ω–∫–∏. –ò—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è Wikipedia dump –æ—Ç –¥–µ–∫–∞–±—Ä—è 2018 –≥–æ–¥–∞',
     '''Open-domain QA: Natural Questions, TriviaQA, WebQuestions, CuratedTrec
Abstractive QA: MS-MARCO
Question Generation: Jeopardy QGen
Classification: FEVER''',
     '''–ü–æ—Ç–æ–º—É —á—Ç–æ:
–ú–æ–∂–Ω–æ –æ–±–æ–±—â–∞—Ç—å –∏ –≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞—Ç—å –æ—Ç–≤–µ—Ç –¥–∞–∂–µ –ø—Ä–∏ –æ—Ç—Å—É—Ç—Å—Ç–≤–∏–∏ —Ç–æ—á–Ω–æ–≥–æ —Å–æ–≤–ø–∞–¥–µ–Ω–∏—è –≤ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ö.
–û—Ç–≤–µ—Ç –º–æ–∂–µ—Ç –±—ã—Ç—å —Ñ–∞–∫—Ç—É–∞–ª—å–Ω—ã–º –∏ —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–Ω—ã–º, –¥–∞–∂–µ –µ—Å–ª–∏ –æ–Ω –Ω–µ –∏–∑–≤–ª–µ–∫–∞–µ—Ç—Å—è –Ω–∞–ø—Ä—è–º—É—é –∏–∑ —Ç–µ–∫—Å—Ç–∞''',
     '''RAG-Token –ø—Ä–µ–≤–∑–æ—à—ë–ª BART –ø–æ –º–µ—Ç—Ä–∏–∫–µ Q-BLEU-1 (22.2 –ø—Ä–æ—Ç–∏–≤ 19.7). –¢–∞–∫–∂–µ –≤ human evaluation RAG –æ–∫–∞–∑–∞–ª—Å—è –±–æ–ª–µ–µ —Ñ–∞–∫—Ç—É–∞–ª—å–Ω—ã–º (42.7%) –∏ —Å–ø–µ—Ü–∏—Ñ–∏—á–Ω—ã–º (37.4%), —á–µ–º BART''',
     '–ú–æ–¥–µ–ª—å –æ–±—É—á–∞–µ—Ç—Å—è end-to-end, –º–∏–Ω–∏–º–∏–∑–∏—Ä—É—è –æ—Ç—Ä–∏—Ü–∞—Ç–µ–ª—å–Ω—ã–π log-likelihood. –¢–æ–ª—å–∫–æ query-encoder BERTq –∏ BART –æ–±—É—á–∞—é—Ç—Å—è, document-encoder BERTd –∏ –∏–Ω–¥–µ–∫—Å –æ—Å—Ç–∞—é—Ç—Å—è —Ñ–∏–∫—Å–∏—Ä–æ–≤–∞–Ω–Ω—ã–º–∏',
     '–ú–æ–∂–Ω–æ –æ–±–Ω–æ–≤–ª—è—Ç—å –∑–Ω–∞–Ω–∏—è –º–æ–¥–µ–ª–∏ –±–µ–∑ –ø–µ—Ä–µ–æ–±—É—á–µ–Ω–∏—è, –ø—Ä–æ—Å—Ç–æ –∑–∞–º–µ–Ω–∏–≤ –≤–µ–∫—Ç–æ—Ä–Ω—ã–π –∏–Ω–¥–µ–∫—Å. –ù–∞–ø—Ä–∏–º–µ—Ä, –ø—Ä–∏ —Å–º–µ–Ω–µ –ª–∏–¥–µ—Ä–æ–≤ —Å—Ç—Ä–∞–Ω –º–µ–∂–¥—É 2016 –∏ 2018 –≥–æ–¥–∞–º–∏ –º–æ–¥–µ–ª—å –∫–æ—Ä—Ä–µ–∫—Ç–Ω–æ –º–µ–Ω—è–ª–∞ –æ—Ç–≤–µ—Ç—ã –ø—Ä–∏ –ø–æ–¥–º–µ–Ω–µ –∏–Ω–¥–µ–∫—Å–∞',
     '''RAG-Token: —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã–π beam search —Å —Å—É–º–º–æ–π –ø–æ top-K –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –Ω–∞ –∫–∞–∂–¥–æ–º —à–∞–≥–µ.
RAG-Sequence: beam search –ø–æ –∫–∞–∂–¥–æ–º—É –¥–æ–∫—É–º–µ–Ω—Ç—É –æ—Ç–¥–µ–ª—å–Ω–æ, —Å –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã–º–∏ forward-pass –¥–ª—è –º–∞—Ä–≥–∏–Ω–∞–ª–∏–∑–∞—Ü–∏–∏, –ª–∏–±–æ "Fast Decoding" ‚Äî –±–µ–∑ –ø–µ—Ä–µ—Å—á—ë—Ç–∞ –≥–∏–ø–æ—Ç–µ–∑, –∫–æ—Ç–æ—Ä—ã—Ö –Ω–µ—Ç –≤ –±–∏–º–µ''']

In [162]:
df = pd.DataFrame()

df['Questions'] = questions
df['gt'] = gt

In [164]:
df.to_csv('test_results/article_3_questions.csv', index=False)

### –°—Ç–∞—Ç—å—è Efficient Estimation of Word Representations in Vector Space

In [165]:
arxiv_url = "https://arxiv.org/pdf/1301.3781"
arxiv_id = extract_arxiv_id(arxiv_url)

In [166]:
questions = ['–ö–∞–∫–∏–µ –¥–≤–µ –º–æ–¥–µ–ª–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä –±—ã–ª–∏ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –∞–≤—Ç–æ—Ä–∞–º–∏ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤?',
             '–í —á—ë–º –æ—Å–Ω–æ–≤–Ω–æ–µ —Ä–∞–∑–ª–∏—á–∏–µ –º–µ–∂–¥—É –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏ CBOW –∏ Skip-gram?',
             '–ö–∞–∫–∏–µ —Ç–∏–ø—ã –æ—Ç–Ω–æ—à–µ–Ω–∏–π –≤–∫–ª—é—á–µ–Ω—ã –≤ Semantic-Syntactic Word Relationship test set?',
             '–ö–∞–∫–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –ø–æ–∫–∞–∑–∞–ª–∞ –º–æ–¥–µ–ª—å Skip-gram –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –ø–æ–¥–∑–∞–¥–∞—á–∞—Ö?',
             '–ö–∞–∫–æ–π –ø–æ–¥—Ö–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ Skip-gram –º–æ–¥–µ–ª–∏ –¥–ª—è –≤—ã–±–æ—Ä–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω—ã—Ö —Å–ª–æ–≤?',
             '–ö–∞–∫–æ–≤–∞ —Ñ–æ—Ä–º—É–ª–∞ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ CBOW?',
             '–ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –≤ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö?',
             '–ö–∞–∫ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∞—Å—å —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∞ –º–æ–¥–µ–ª–µ–π –≤ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω–æ–π —Å–∏—Å—Ç–µ–º–µ DistBelief?',
             '–ö–∞–∫ –±—ã–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω—ã –≤–æ–ø—Ä–æ—Å—ã –≤ —Ç–µ—Å—Ç–æ–≤–æ–º –Ω–∞–±–æ—Ä–µ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ word vectors?',
             '–ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –æ—Ç–≤–µ—Ç–∞ –Ω–∞ –∞–Ω–∞–ª–æ–≥–∏–∏ –≤—Ä–æ–¥–µ ‚Äúking - man + woman = ?‚Äù?'
            ]

In [167]:
gt = ['''CBOW (Continuous Bag-of-Words)
Skip-gram
–û–±–µ –º–æ–¥–µ–ª–∏ —è–≤–ª—è—é—Ç—Å—è –ª–æ–≥-–ª–∏–Ω–µ–π–Ω—ã–º–∏ –∏ –ª–∏—à–µ–Ω—ã —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –¥–ª—è –ø–æ–≤—ã—à–µ–Ω–∏—è —Å–∫–æ—Ä–æ—Å—Ç–∏ –æ–±—É—á–µ–Ω–∏—è''',
     '''CBOW –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç —Ç–µ–∫—É—â–∏–µ —Å–ª–æ–≤–æ –ø–æ –∫–æ–Ω—Ç–µ–∫—Å—Ç—É (—Å—Ä–µ–¥–Ω–µ–µ –ø–æ –æ–∫—Ä–µ—Å—Ç–Ω—ã–º —Å–ª–æ–≤–∞–º).
Skip-gram –Ω–∞–æ–±–æ—Ä–æ—Ç ‚Äî –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç –æ–∫—Ä–µ—Å—Ç–Ω—ã–µ —Å–ª–æ–≤–∞ –ø–æ —Ç–µ–∫—É—â–µ–º—É —Å–ª–æ–≤—É''',
     '''–°–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏–µ (5 —Ç–∏–ø–æ–≤): —Å—Ç–æ–ª–∏—Ü—ã, –≤–∞–ª—é—Ç—ã, –æ—Ç–Ω–æ—à–µ–Ω–∏—è —á–µ–ª–æ–≤–µ–∫-–∂–µ–Ω—â–∏–Ω–∞ –∏ —Ç.–¥.
–°–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏–µ (9 —Ç–∏–ø–æ–≤): —Å—Ç–µ–ø–µ–Ω–∏ —Å—Ä–∞–≤–Ω–µ–Ω–∏—è, –≤—Ä–µ–º–µ–Ω–∞ –≥–ª–∞–≥–æ–ª–æ–≤, –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω–æ—Å—Ç–∏ –∏ –¥—Ä''',
     '''–î–ª—è Skip-gram —Å —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å—é 300 –∏ –æ–±—É—á–µ–Ω–∏–µ–º –Ω–∞ 783M —Å–ª–æ–≤:
Semantic: 50.0%
Syntactic: 55.9%
Total: 53.3%''',
     '–°–ª–æ–≤–æ –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç –∫–æ–Ω—Ç–µ–∫—Å—Ç –≤ –æ–∫–Ω–µ —Ä–∞–∑–º–µ—Ä–∞ C. –ö–æ–Ω—Ç–µ–∫—Å—Ç –≤—ã–±–∏—Ä–∞–µ—Ç—Å—è —Å–ª—É—á–∞–π–Ω–æ –≤ –ø—Ä–µ–¥–µ–ª–∞—Ö –æ–∫–Ω–∞ (–æ—Ç 1 –¥–æ C). –ë–æ–ª–µ–µ –¥–∞–ª—å–Ω–∏–µ —Å–ª–æ–≤–∞ –≤—Å—Ç—Ä–µ—á–∞—é—Ç—Å—è —Ä–µ–∂–µ (subsampling)',
     '''Q=N‚ãÖD+D‚ãÖlog 2‚Äã(V)
–ì–¥–µ:
N ‚Äî –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Å–ª–æ–≤ –≤ –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ
D ‚Äî —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å –≤–µ–∫—Ç–æ—Ä–æ–≤
V ‚Äî —Ä–∞–∑–º–µ—Ä —Å–ª–æ–≤–∞—Ä—è''',
     '–ü–æ—Ç–æ–º—É —á—Ç–æ –æ—Å–Ω–æ–≤–Ω–∞—è –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–∞—è –Ω–∞–≥—Ä—É–∑–∫–∞ –ø—Ä–∏—Ö–æ–¥–∏–ª–∞—Å—å –Ω–∞ —Å–∫—Ä—ã—Ç—ã–π —Å–ª–æ–π. –£–¥–∞–ª–µ–Ω–∏–µ —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –ø–æ–∑–≤–æ–ª–∏–ª–æ –æ–±—É—á–∞—Ç—å –º–æ–¥–µ–ª–∏ –Ω–∞ –≥–æ—Ä–∞–∑–¥–æ –±–æ–ª—å—à–∏—Ö –∫–æ—Ä–ø—É—Å–∞—Ö —Å –º–∏–ª–ª–∏–∞—Ä–¥–∞–º–∏ —Å–ª–æ–≤ –∑–∞ —Ä–∞–∑—É–º–Ω–æ–µ –≤—Ä–µ–º—è',
     '''–ò—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∞—Å—å –∞—Å–∏–Ω—Ö—Ä–æ–Ω–Ω–∞—è mini-batch SGD + AdaGrad
–î–æ 100 –∫–æ–ø–∏–π –º–æ–¥–µ–ª–µ–π –Ω–∞ –º–Ω–æ–∂–µ—Å—Ç–≤–µ CPU
Skip-gram 1000D –Ω–∞ 6B —Å–ª–æ–≤ –æ–±—É—á–∞–ª—Å—è –∑–∞ ~2.5 –¥–Ω—è –Ω–∞ 125 CPU-—è–¥–µ—Ä''',
     '''–°–Ω–∞—á–∞–ª–∞ –≤—Ä—É—á–Ω—É—é —Å–æ–∑–¥–∞–Ω—ã —Å–ø–∏—Å–∫–∏ –ø–∞—Ä —Å–ª–æ–≤.
–ó–∞—Ç–µ–º –∞–≤—Ç–æ–º–∞—Ç–∏—á–µ—Å–∫–∏ —Å—Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–Ω—ã —Ç—ã—Å—è—á–∏ –≤–æ–ø—Ä–æ—Å–æ–≤ –ø—É—Ç—ë–º —Å–ª—É—á–∞–π–Ω–æ–π –∫–æ–º–±–∏–Ω–∞—Ü–∏–∏ –¥–≤—É—Ö –ø–∞—Ä –∏–∑ –æ–¥–Ω–æ–≥–æ —Ç–∏–ø–∞ (–Ω–∞–ø—Ä–∏–º–µ—Ä, –≥–æ—Ä–æ–¥–∞ –∏ —à—Ç–∞—Ç—ã).
–ò—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è —Ç–æ–ª—å–∫–æ –æ–¥–Ω–æ—Å–ª–æ–≤–Ω—ã–µ —Ç–µ—Ä–º–∏–Ω—ã''',
     '''–ü—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è –≤–µ–∫—Ç–æ—Ä–Ω–∞—è –∞—Ä–∏—Ñ–º–µ—Ç–∏–∫–∞:
vec("king") ‚àí vec("man") + vec("woman") ‚âà vec("queen")
–û—Ç–≤–µ—Ç ‚Äî –±–ª–∏–∂–∞–π—à–µ–µ –ø–æ –∫–æ—Å–∏–Ω—É—Å–Ω–æ–º—É —Ä–∞—Å—Å—Ç–æ—è–Ω–∏—é —Å–ª–æ–≤–æ –∫ –ø–æ–ª—É—á–µ–Ω–Ω–æ–º—É –≤–µ–∫—Ç–æ—Ä—É''']

In [168]:
df = pd.DataFrame()

df['Questions'] = questions
df['gt'] = gt

In [170]:
df.to_csv('test_results/article_4_questions.csv', index=False)

### –°—Ç–∞—Ç—å—è LADDER: SELF-IMPROVING LLMS THROUGH RECURSIVE PROBLEM DECOMPOSITION

In [172]:
arxiv_url = "https://arxiv.org/pdf/2503.00735"
arxiv_id = extract_arxiv_id(arxiv_url)

In [173]:
questions = ['–í —á—ë–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –∫–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER?',
             '–ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER?',
             '–ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –≤ LADDER?',
             '–ß—Ç–æ —Ç–∞–∫–æ–µ TTRL –∏ —á–µ–º –æ–Ω –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç LADDER?',
             '–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã –º–æ–¥–µ–ª—å—é Qwen2.5 7B –Ω–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee –ø–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL?',
             '–ö–∞–∫–∞—è —Ñ—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ GRPO –≤ —Ä–∞–º–∫–∞—Ö –æ–±—É—á–µ–Ω–∏—è –≤ LADDER?',
             '–ü–æ—á–µ–º—É RL –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –ø–ª–æ—Ö–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è?',
             '–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏—è –≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã—Ö –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?',
             '–ö–∞–∫ LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ –±–µ–∑ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?',
             '–ö–∞–∫–∏–µ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –∏–ª–∏ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –∑–∞–º–µ—á–µ–Ω—ã –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?'
            ]

In [174]:
gt = ['''–ú–æ–¥–µ–ª—å —Å–∞–º–æ—Å—Ç–æ—è—Ç–µ–ª—å–Ω–æ —É–ª—É—á—à–∞–µ—Ç —Å–≤–æ–∏ —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏, –≥–µ–Ω–µ—Ä–∏—Ä—É—è –∏ —Ä–µ—à–∞—è –ø—Ä–æ—â–µ —Å—Ñ–æ—Ä–º—É–ª–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –≤–∞—Ä–∏–∞–Ω—Ç—ã —Å–ª–æ–∂–Ω—ã—Ö –∑–∞–¥–∞—á. –≠—Ç–æ —Å–æ–∑–¥–∞—ë—Ç –≥—Ä–∞–¥–∏–µ–Ω—Ç —Å–ª–æ–∂–Ω–æ—Å—Ç–∏, –ø–æ –∫–æ—Ç–æ—Ä–æ–º—É –æ–Ω–∞ —É—á–∏—Ç—Å—è —Ä–µ—à–∞—Ç—å –≤—Å—ë –±–æ–ª–µ–µ —Ç—Ä—É–¥–Ω—ã–µ –∑–∞–¥–∞—á–∏ –±–µ–∑ –≤–Ω–µ—à–Ω–µ–π —Ä–∞–∑–º–µ—Ç–∫–∏ –∏–ª–∏ –ø–æ–º–æ—â–∏ —á–µ–ª–æ–≤–µ–∫–∞''',
     '–ü—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è —á–∏—Å–ª–µ–Ω–Ω–∞—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏—è: —Å—Ä–∞–≤–Ω–µ–Ω–∏–µ –æ—Ç–≤–µ—Ç–∞ –º–æ–¥–µ–ª–∏ —Å —á–∏—Å–ª–µ–Ω–Ω—ã–º –∑–Ω–∞—á–µ–Ω–∏–µ–º –∏–Ω—Ç–µ–≥—Ä–∞–ª–∞ –Ω–∞ 5 —Å–ª—É—á–∞–π–Ω—ã—Ö –æ—Ç—Ä–µ–∑–∫–∞—Ö –≤ –¥–∏–∞–ø–∞–∑–æ–Ω–µ [-10, 10], —Å —É—á—ë—Ç–æ–º —Å–∏–Ω–≥—É–ª—è—Ä–Ω–æ—Å—Ç–µ–π –∏ –ø–æ—Ä–æ–≥–æ–º —Ç–æ—á–Ω–æ—Å—Ç–∏ 1e‚àí2',
     '''–í–∞—Ä–∏–∞–Ω—Ç—ã —Å–æ–∑–¥–∞—é—Ç—Å—è –ø–æ –¥–µ—Ä–µ–≤—É:
–ü—Ä–∏–º–µ–Ω—è—é—Ç—Å—è 3‚Äì5 –∑–∞—Ä–∞–Ω–µ–µ –æ–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω—ã—Ö –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏—Ö —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–∞—Ü–∏–π.
–ò—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è batch prompting, temperature cycling –∏ –ø–µ—Ä—Å–æ–Ω–∞–∂–∏ (–ø–µ—Ä—Å–ø–µ–∫—Ç–∏–≤—ã) (–Ω–∞–ø—Ä–∏–º–µ—Ä, "–¥—É–º–∞–π –∫–∞–∫ –≠–π–ª–µ—Ä").
–ì–µ–Ω–µ—Ä–∞—Ü–∏—è –∏–¥—ë—Ç —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ, –¥–æ 3-—Ö —É—Ä–æ–≤–Ω–µ–π –≥–ª—É–±–∏–Ω—ã''',
     '''TTRL (Test-Time RL) ‚Äî —ç—Ç–æ —Ç–µ—Å—Ç–æ–≤–æ–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–µ RL: –ø—Ä–∏ –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–µ –º–æ–¥–µ–ª—å –≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç –≤–∞—Ä–∏–∞–Ω—Ç—ã —Ç–µ–∫—É—â–µ–π –∑–∞–¥–∞—á–∏ –∏ –æ–±—É—á–∞–µ—Ç—Å—è –Ω–∞ –Ω–∏—Ö.
–†–∞–∑–Ω–∏—Ü–∞:
LADDER ‚Äî –æ–±—É—á–µ–Ω–∏–µ –Ω–∞ train-–Ω–∞–±–æ—Ä–µ –∑–∞—Ä–∞–Ω–µ–µ.
TTRL ‚Äî –æ–±—É—á–µ–Ω–∏–µ –≤ –º–æ–º–µ–Ω—Ç —Ç–µ—Å—Ç–∞ –Ω–∞ —Ç–µ–∫—É—â–µ–π –∑–∞–¥–∞—á–µ''',
     '''LADDER: 73% (–≤—ã—à–µ–ª –Ω–∞ —É—Ä–æ–≤–µ–Ω—å –∫–≤–∞–ª–∏—Ñ–∏–∫–∞—Ü–∏–∏)
LADDER + TTRL: 90%, –ª—É—á—à–∏–π —Ä–µ–∑—É–ª—å—Ç–∞—Ç —Å—Ä–µ–¥–∏ –º–æ–¥–µ–ª–µ–π, –≤–∫–ª—é—á–∞—è OpenAI o1 (80%)''',
     '''GRPO –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –¥–≤–∞ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç–∞:
Accuracy reward: 1, –µ—Å–ª–∏ –∏–Ω—Ç–µ–≥—Ä–∞–ª —Ä–µ—à—ë–Ω –ø—Ä–∞–≤–∏–ª—å–Ω–æ (–ø–æ —á–∏—Å–ª–µ–Ω–Ω–æ–π –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏).
Format reward: 1, –µ—Å–ª–∏ –æ—Ç–≤–µ—Ç –æ—Ñ–æ—Ä–º–ª–µ–Ω –≤ <ANSWER>...</ANSWER>
–ü—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ GRPO: –±–µ–∑ –∫—Ä–∏—Ç–∏–∫–∞, —Å –≥—Ä—É–ø–ø–æ–≤–æ–π –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏–µ–π —á–µ—Ä–µ–∑ advantage''',
     '–ë–µ–∑ –≥—Ä–∞–¥–∏–µ–Ω—Ç–∞ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ RL –±—ã—Å—Ç—Ä–æ –ø—Ä–æ–≤–∞–ª–∏–≤–∞–µ—Ç—Å—è: –∑–∞–¥–∞—á–∏ —Å–ª–∏—à–∫–æ–º —Å–ª–æ–∂–Ω—ã, –∏ –æ—Ç—Å—É—Ç—Å—Ç–≤–∏–µ –ø–æ–ª–æ–∂–∏—Ç–µ–ª—å–Ω—ã—Ö –ø—Ä–∏–º–µ—Ä–æ–≤ –ø—Ä–∏–≤–æ–¥–∏—Ç –∫ –∫–∞—Ç–∞—Å—Ç—Ä–æ—Ñ–∏—á–µ—Å–∫–æ–π –¥–µ–≥—Ä–∞–¥–∞—Ü–∏–∏ ‚Äî –¥–æ 0% accuracy –∑–∞ 30 —à–∞–≥–æ–≤ –æ–±—É—á–µ–Ω–∏—è',
     '''Batch prompting (–ø–æ 10 –∑–∞ —Ä–∞–∑)
Temperature cycling (0.8‚Äì1.4)
Persona prompting (—Ä–∞–∑–Ω—ã–µ —Å—Ç–∏–ª–∏ –º—ã—à–ª–µ–Ω–∏—è)
–†–µ–∫—É—Ä—Å–∏–≤–Ω–∞—è –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –¥–æ –≥–ª—É–±–∏–Ω—ã 3''',
     '–ü—É—Ç—ë–º –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ —Ç—ã—Å—è—á –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∏ –æ–±—É—á–µ–Ω–∏—è –Ω–∞ –Ω–∏—Ö. –ú–æ–¥–µ–ª—å (–≤—Å–µ–≥–æ 7B –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤) –æ–ø–µ—Ä–µ–∂–∞–µ—Ç GPT-4o –∏ –¥—Ä—É–≥–∏–µ –±–æ–ª–µ–µ –∫—Ä—É–ø–Ω—ã–µ –º–æ–¥–µ–ª–∏, –Ω–µ —É–≤–µ–ª–∏—á–∏–≤–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—É, –∞ –ª–∏—à—å –ø—Ä–∞–≤–∏–ª—å–Ω–æ –Ω–∞–ø—Ä–∞–≤–ª—è—è –æ–±—É—á–µ–Ω–∏–µ',
     '''–ü—Ä–∏–º–µ—Ä–Ω–æ 8% –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –æ–∫–∞–∑–∞–ª–∏—Å—å –Ω–µ—Ä–µ—à–∞–µ–º—ã–º–∏
–ù–µ–∫–æ—Ç–æ—Ä—ã–µ ‚Äú–æ–±–ª–µ–≥—á—ë–Ω–Ω—ã–µ‚Äù –≤–∞—Ä–∏–∞–Ω—Ç—ã –Ω–∞ –¥–µ–ª–µ –æ–∫–∞–∑–∞–ª–∏—Å—å —Å–ª–æ–∂–Ω–µ–µ, –∏–∑-–∑–∞ —Å–ª—É—á–∞–π–Ω—ã—Ö —É—Å–ª–æ–∂–Ω—è—é—â–∏—Ö —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–∞—Ü–∏–π
–í–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏—è —Ç—Ä–µ–±–æ–≤–∞–ª–∞ –±–æ–ª—å—à–æ–≥–æ —á–∏—Å–ª–∞ –ø—Ä–æ–≤–µ—Ä–æ–∫ –∏ —Ç–∞–π–º–∞—É—Ç–æ–≤
–ë—ã–ª–∞ –ø–æ—Ç—Ä–µ–±–Ω–æ—Å—Ç—å –≤ —Ñ–∏–ª—å—Ç—Ä–∞—Ü–∏–∏ ‚Äú–º—É—Å–æ—Ä–Ω—ã—Ö‚Äù –æ—Ç–≤–µ—Ç–æ–≤ (–Ω–∞–ø—Ä–∏–º–µ—Ä, —Å–∏–º–≤–æ–ª –∏–Ω—Ç–µ–≥—Ä–∞–ª–∞ –∫–∞–∫ –æ—Ç–≤–µ—Ç)''']

In [175]:
df = pd.DataFrame()

df['Questions'] = questions
df['gt'] = gt

In [176]:
df.to_csv('test_results/article_5_questions.csv', index=False)

## –ì–µ–Ω–µ—Ä–∞—Ü–∏—è –æ—Ç–≤–µ—Ç–æ–≤ –º–æ–∏–º–∏ LLM

In [3]:
def retrieve_and_rerank(question, vectordb, reranker, arxiv_id, top_k=5, top_n=2):
    """
    –ò–∑–≤–ª–µ–∫–∞–µ—Ç top_k –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤ –∏–∑ Chroma –∏ —Ä–µ—Ä–∞–Ω–∂–∏—Ä—É–µ—Ç –∏—Ö —Å –ø–æ–º–æ—â—å—é FlagReranker.
    –í–æ–∑–≤—Ä–∞—â–∞–µ—Ç top_n –Ω–∞–∏–±–æ–ª–µ–µ —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã—Ö —Ñ—Ä–∞–≥–º–µ–Ω—Ç–æ–≤.
    
    :param question: –í–æ–ø—Ä–æ—Å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è
    :param vectordb: –û–±—ä–µ–∫—Ç Chroma
    :param top_k: –°–∫–æ–ª—å–∫–æ —Ñ—Ä–∞–≥–º–µ–Ω—Ç–æ–≤ –∏–∑–≤–ª–µ–∫–∞—Ç—å –∏–∑ Chroma
    :param top_n: –°–∫–æ–ª—å–∫–æ –ª—É—á—à–∏—Ö –≤–æ–∑–≤—Ä–∞—â–∞—Ç—å –ø–æ—Å–ª–µ rerank
    :return: –°–ø–∏—Å–æ–∫ –∫–æ—Ä—Ç–µ–∂–µ–π (Document, score)
    """
    retriever = vectordb.as_retriever(search_kwargs={"k": top_k, "filter": {"arxiv_id": arxiv_id}})
    candidates = retriever.get_relevant_documents(question)

    pairs = [[question, doc.page_content] for doc in candidates]
    scores = reranker.compute_score(pairs, normalize=True)

    reranked = sorted(zip(candidates, scores), key=lambda x: -x[1])
    
    # print("üîç Rerank —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã:")
    # for i, (doc, score) in enumerate(reranked):
    #     print(f"#–ß–∞–Ω–∫{i+1} ‚Äî {doc.page_content}...")

    return reranked[:top_n]

### Tbank

In [7]:
model_name = "t-bank-ai/T-lite-instruct-0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
quant_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quant_config, device_map="auto")

llm = pipeline("text-generation", model=model, tokenizer=tokenizer, return_full_text=False)
terminators = list({
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>") or tokenizer.eos_token_id,
    })

embedding_model = HuggingFaceBgeEmbeddings(
        model_name="BAAI/bge-m3",
        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
    )

reranker = FlagReranker("BAAI/bge-reranker-v2-m3", use_fp16=True)

vectorstore = Chroma(persist_directory="test_chroma_db", embedding_function=embedding_model)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0
  embedding_model = HuggingFaceBgeEmbeddings(
  vectorstore = Chroma(persist_directory="test_chroma_db", embedding_function=embedding_model)


In [38]:
SYSTEM_MSG = (
    "–¢—ã ‚Äî –ø–æ–º–æ—â–Ω–∏–∫ –ø–æ –Ω–∞—É—á–Ω—ã–º —Å—Ç–∞—Ç—å—è–º. "
    "–¢–≤–æ—è –∑–∞–¥–∞—á–∞ ‚Äî –¥–∞—Ç—å –∫–æ—Ä–æ—Ç–∫–∏–π, —Ç–æ—á–Ω—ã–π –∏ –æ–¥–Ω–æ–∑–Ω–∞—á–Ω—ã–π –æ—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å, "
    "–∏—Å–ø–æ–ª—å–∑—É—è —Ç–æ–ª—å–∫–æ –ø—Ä–∏–≤–µ–¥—ë–Ω–Ω—ã–π –∫–æ–Ω—Ç–µ–∫—Å—Ç. "
    "–ù–µ–ª—å–∑—è –ø—Ä–æ–¥–æ–ª–∂–∞—Ç—å –¥–∏–∞–ª–æ–≥, –∑–∞–¥–∞–≤–∞—Ç—å –≤—Å—Ç—Ä–µ—á–Ω—ã–µ –≤–æ–ø—Ä–æ—Å—ã –∏–ª–∏ –ø–æ–≤—Ç–æ—Ä—è—Ç—å—Å—è. –û—Ç–≤–µ—á–∞–π —Å—Ç—Ä–æ–≥–æ –Ω–∞ —Ä—É—Å—Å–∫–æ–º —è–∑—ã–∫–µ"
)


def build_messages(question: str, context: str) -> List[dict]:
    """–§–æ—Ä–º–∏—Ä—É–µ—Ç —Å–æ–æ–±—â–µ–Ω–∏—è –≤ –Ω—É–∂–Ω–æ–º –¥–ª—è chat_template —Ñ–æ—Ä–º–∞—Ç–µ."""
    return [
        {"role": "system", "content": SYSTEM_MSG},
        {"role": "user", "content": f"–í–æ–ø—Ä–æ—Å:\n{question}"},
        {"role": "system", "content": f"–ö–æ–Ω—Ç–µ–∫—Å—Ç:\n{context}"},
    ]

#### Attention is all you need

In [19]:
questions = ['–ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?',
            '–ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?',
             '–ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?',
             '–ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?',
             '–ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –æ–¥–Ω–æ–π –≥–æ–ª–æ–≤–æ–π –≤–Ω–∏–º–∞–Ω–∏—è?',
             '–ö–∞–∫–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –∏ –ø–æ—á–µ–º—É –±—ã–ª–∏ –≤—ã–±—Ä–∞–Ω—ã —Å–∏–Ω—É—Å–æ–∏–¥—ã?',
             '–í —á–µ–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º–∏ –∏ —Å–≤–µ—Ä—Ç–æ—á–Ω—ã–º–∏ —Å–ª–æ—è–º–∏ –≤ –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –∏ –¥–ª–∏–Ω—ã –ø—É—Ç–∏ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–µ–π?',
             '–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer?',
             '–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Transformer –ø–æ–∫–∞–∑–∞–ª –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíDE –∏ EN‚ÜíFR –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–∏–º–∏ SOTA?',
             '–ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—é –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏?'
            ]

In [20]:
arxiv_url = "https://arxiv.org/abs/1706.03762"
arxiv_id = extract_arxiv_id(arxiv_url)

In [21]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, eos_token_id=terminators)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:05<00:51,  5.74s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í –¥–∞–Ω–Ω–æ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä Adam —Å –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º–∏: *Œ≤* 1 = 0.9, *Œ≤* 2 = 0.98 –∏ *œµ* = 10^(-9). –í –ø—Ä–æ—Ü–µ—Å—Å–µ –æ–±—É—á–µ–Ω–∏—è –ø—Ä–∏–º–µ–Ω—è–ª—Å—è –¥–∏–Ω–∞–º–∏—á–µ—Å–∫–∏–π –ø–æ–¥—Ö–æ–¥ –∫ –∏–∑–º–µ–Ω–µ–Ω–∏—é —Å–∫–æ—Ä–æ—Å—Ç–∏ –æ–±—É—á–µ–Ω–∏—è.




 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:11<00:44,  5.61s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö, —Ç–∞–∫–∏—Ö –∫–∞–∫ —Ç–µ, —á—Ç–æ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –¥–ª—è –º–∞—à–∏–Ω–Ω–æ–≥–æ –ø–µ—Ä–µ–≤–æ–¥–∞, Transformer –∑–∞–º–µ–Ω—è–µ—Ç —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏ (RNN) –∏ —Å–ª–æ–∏ –Ω–∞ –æ—Å–Ω–æ–≤–µ —Å–≤—ë—Ä—Ç–æ—á–Ω—ã—Ö —Å–µ—Ç–µ–π (CNN). –í–º–µ—Å—Ç–æ –Ω–∏—Ö –æ–Ω –ø—Ä–∏–º–µ–Ω—è–µ—Ç –º–µ—Ö–∞–Ω–∏–∑–º—ã –º–Ω–æ–≥–æ–≥–æ–ª–æ–≤–æ–≥–æ —Å–∞–º–æ-–≤–Ω–∏–º–∞–Ω–∏—è (multi-headed self-attention).




 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:18<00:45,  6.56s/it]

–í–æ–ø—Ä–æ—Å: –ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –î–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk –≤ Scaled Dot-Product Attention –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏–∏ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ —Å–∫–∞–ª—è—Ä–Ω–æ–≥–æ –ø—Ä–æ–∏–∑–≤–µ–¥–µ–Ω–∏—è –º–µ–∂–¥—É –∑–∞–ø—Ä–æ—Å–∞–º–∏ –∏ –∫–ª—é—á–∞–º–∏. –≠—Ç–æ –ø–æ–º–æ–≥–∞–µ—Ç —Å—Ç–∞–±–∏–ª–∏–∑–∏—Ä–æ–≤–∞—Ç—å –∑–Ω–∞—á–µ–Ω–∏—è –≤–µ—Å–æ–≤, –ø–æ–ª—É—á–µ–Ω–Ω—ã—Ö –∏–∑ softmax, –∏ –ø—Ä–µ–¥–æ—Ç–≤—Ä–∞—â–∞–µ—Ç –¥–æ–º–∏–Ω–∏—Ä–æ–≤–∞–Ω–∏–µ –æ–¥–Ω–æ–≥–æ –∫–ª—é—á–∞ –ø—Ä–∏ –±–æ–ª—å—à–∏—Ö —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—è—Ö –≤–µ–∫—Ç–æ—Ä–æ–≤. –¢–∞–∫–∏–º –æ–±—Ä–∞–∑–æ–º, –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—è –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç –±–æ–ª–µ–µ —Ä–∞–≤–Ω–æ–º–µ—Ä–Ω–æ–µ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –≤–Ω–∏–º–∞–Ω–∏—è –º–µ–∂–¥—É –≤—Å–µ–º–∏ –∫–ª—é—á–∞–º–∏, —á—Ç–æ –≤–∞–∂–Ω–æ –¥–ª—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –∏ —É—Å—Ç–æ–π—á–∏–≤–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏.




 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [00:28<00:46,  7.74s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –°—Ç—Ä—É–∫—Ç—É—Ä–∞ Transformer —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ –¥–≤—É—Ö –æ—Å–Ω–æ–≤–Ω—ã—Ö —á–∞—Å—Ç–µ–π: encoder –∏ decoder. Encoder –ø—Ä–∏–Ω–∏–º–∞–µ—Ç –≤—Ö–æ–¥–Ω—É—é –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç—å —Å–∏–º–≤–æ–ª–æ–≤ –∏ –ø—Ä–µ–æ–±—Ä–∞–∑—É–µ—Ç –µ—ë –≤ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç—å –Ω–µ–ø—Ä–µ—Ä—ã–≤–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π. –ó–∞—Ç–µ–º decoder, –∏—Å–ø–æ–ª—å–∑—É—è —ç—Ç–∏ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏—è –∏ –ø—Ä–µ–¥—ã–¥—É—â–∏–µ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ —Å–∏–º–≤–æ–ª—ã, —Å–æ–∑–¥–∞–µ—Ç –≤—ã—Ö–æ–¥–Ω—É—é –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç—å. –û–±–µ —á–∞—Å—Ç–∏ –≤–∫–ª—é—á–∞—é—Ç –≤ —Å–µ–±—è —Å–ª–æ–∏ —Å–∞–º–æ–ø—Ä–∏—Å—É—Ç—Å—Ç–≤–∏—è (self-attention) –∏ –ª–∏–Ω–µ–π–Ω—ã–µ —Å–æ–µ–¥–∏–Ω–µ–Ω–∏—è (point-wise fully connected layers). –í–∏–∑—É–∞–ª—å–Ω–æ —ç—Ç–æ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ –¥–≤—É–º—è –±–ª–æ–∫–∞–º–∏ –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ, –∏–∑–æ–±—Ä–∞–∂–µ–Ω–Ω–æ–π –Ω–∞ —Ä–∏—Å—É–Ω–∫–µ 1.


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [00:37<00:40,  8.08s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –æ–¥–Ω–æ–π –≥–æ–ª–æ–≤–æ–π –≤–Ω–∏–º–∞–Ω–∏—è?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å, –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–ª—è—è –º–æ–¥–µ–ª–∏ –≤–æ–∑–º–æ–∂–Ω–æ—Å—Ç—å –æ–¥–Ω–æ–≤—Ä–µ–º–µ–Ω–Ω–æ –æ–±—Ä–∞—â–∞—Ç—å—Å—è –∫ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –∏–∑ —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –ø–æ–¥–ø—Ä–æ—Å—Ç—Ä–∞–Ω—Å—Ç–≤ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤. –≠—Ç–æ –¥–æ—Å—Ç–∏–≥–∞–µ—Ç—Å—è –∑–∞ —Å—á—ë—Ç —Ä–∞–∑–¥–µ–ª–µ–Ω–∏—è –≤—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö –Ω–∞ –Ω–µ—Å–∫–æ–ª—å–∫–æ –ø–æ–¥–∑–∞–¥–∞—á (–≥–æ–ª–æ–≤), –∫–∞–∂–¥–∞—è –∏–∑ –∫–æ—Ç–æ—Ä—ã—Ö —Ä–∞–±–æ—Ç–∞–µ—Ç –Ω–µ–∑–∞–≤–∏—Å–∏–º–æ. –í –æ—Ç–ª–∏—á–∏–µ –æ—Ç –æ–¥–Ω–æ–≥–æ –≤–Ω–∏–º–∞—Ç–µ–ª—å–Ω–æ–≥–æ –±–ª–æ–∫–∞, –∫–æ—Ç–æ—Ä—ã–π –≤—ã–Ω—É–∂–¥–µ–Ω —É—Å—Ä–µ–¥–Ω—è—Ç—å –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é, –º–Ω–æ–≥–æ–≥–æ–ª–æ–≤–æ–µ –≤–Ω–∏–º–∞–Ω–∏–µ –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ –≥–∏–±–∫–æ –∏ —Å–ø–µ—Ü–∏–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω–æ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å —Ä

 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [00:56<00:47, 11.90s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –∏ –ø–æ—á–µ–º—É –±—ã–ª–∏ –≤—ã–±—Ä–∞–Ω—ã —Å–∏–Ω—É—Å–æ–∏–¥—ã?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ (PE) –≤ –Ω–µ–π—Ä–æ–Ω–Ω—ã—Ö —Å–µ—Ç—è—Ö, —Ç–∞–∫–∏—Ö –∫–∞–∫ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä—ã, –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –¥–ª—è –≤–Ω–µ–¥—Ä–µ–Ω–∏—è –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –æ –ø–æ–∑–∏—Ü–∏—è—Ö –≤ —Ç–µ–∫—Å—Ç–µ. –°–∏–Ω—É—Å–æ–∏–¥–∞–ª—å–Ω—ã–µ —Ñ—É–Ω–∫—Ü–∏–∏ –ø—Ä–∏–º–µ–Ω—è—é—Ç—Å—è –¥–ª—è —ç—Ç–æ–≥–æ –ø–æ –Ω–µ—Å–∫–æ–ª—å–∫–∏–º –ø—Ä–∏—á–∏–Ω–∞–º:

1. **–õ–∏–Ω–µ–π–Ω–∞—è –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç—å –æ—Ç –ø–æ–∑–∏—Ü–∏–∏**: –°–∏–Ω—É—Å–æ–∏–¥—ã –ø–æ–∑–≤–æ–ª—è—é—Ç –ª–µ–≥–∫–æ –≤—ã—Ä–∞–∑–∏—Ç—å —Å–º–µ—â–µ–Ω–∏–µ –ø–æ–∑–∏—Ü–∏–π, —Ç–∞–∫ –∫–∞–∫ *PE* *pos* + *k* –º–æ–∂–Ω–æ –ø—Ä–µ–¥—Å—Ç–∞–≤–∏—Ç—å –∫–∞–∫ –ª–∏–Ω–µ–π–Ω—É—é –∫–æ–º–±–∏–Ω–∞—Ü–∏—é *PE* *pos*.
2. **–ì–µ–æ–º–µ—Ç—Ä–∏—á–µ—Å–∫–∞—è –ø—Ä–æ–≥—Ä–µ—Å—Å–∏—è —á–∞—Å—Ç–æ—Ç**: –≠—Ç–æ –ø–æ–º–æ–≥–∞–µ—Ç –º–æ–¥–µ–ª–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ —Ä–∞–∑–ª–∏—á–∞—Ç—å –ø–æ–∑–

 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [01:13<00:40, 13.63s/it]

–í–æ–ø—Ä–æ—Å: –í —á–µ–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º–∏ –∏ —Å–≤–µ—Ä—Ç–æ—á–Ω—ã–º–∏ —Å–ª–æ—è–º–∏ –≤ –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –∏ –¥–ª–∏–Ω—ã –ø—É—Ç–∏ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–µ–π?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ü—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–µ—Ä–µ–¥ —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º–∏ –∏ —Å–≤–µ—Ä—Ç–æ—á–Ω—ã–º–∏ —Å–ª–æ—è–º–∏ –∑–∞–∫–ª—é—á–∞—é—Ç—Å—è –≤ —Å–ª–µ–¥—É—é—â–µ–º:
1. **–í—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–∞—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å**: –í –æ—Ç–ª–∏—á–∏–µ –æ—Ç —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã—Ö —Å–ª–æ–µ–≤, –∫–æ—Ç–æ—Ä—ã–µ —Ç—Ä–µ–±—É—é—Ç *O(n)* –æ–ø–µ—Ä–∞—Ü–∏–π –¥–ª—è –æ–±—Ä–∞–±–æ—Ç–∫–∏ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –¥–ª–∏–Ω–æ–π *n*, self-attention –∏—Å–ø–æ–ª—å–∑—É–µ—Ç *O(d)* –æ–ø–µ—Ä–∞—Ü–∏–π, –≥–¥–µ *d* ‚Äî —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏—è. –≠—Ç–æ –æ—Å–æ–±–µ–Ω–Ω–æ –≤—ã–≥–æ–¥–Ω–æ –¥–ª—è –¥–ª–∏–Ω–Ω—ã—Ö –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–µ–π.
2. **–ü–∞—Ä–∞–ª–ª–µ–ª—å–Ω–∞—è –æ–±—Ä–∞–±–æ—Ç–∫–∞**:

 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [01:24<00:25, 12.76s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –î–ª—è –æ–±—É—á–µ–Ω–∏—è Transformer –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Å–ª–µ–¥—É—é—â–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏:

1. **Dropout**: –ü—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è –Ω–∞ –≤—Å–µ—Ö —Å–ª–æ—è—Ö, –∫—Ä–æ–º–µ –ø–æ—Å–ª–µ–¥–Ω–µ–≥–æ, —Å –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç—å—é 0.1.
2. **Weight Decay**: –†–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏—è L2, –∫–æ—Ç–æ—Ä–∞—è –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è –∫ –≤–µ—Å–∞–º –º–æ–¥–µ–ª–∏, —á—Ç–æ–±—ã –ø—Ä–µ–¥–æ—Ç–≤—Ä–∞—Ç–∏—Ç—å –ø–µ—Ä–µ–æ–±—É—á–µ–Ω–∏–µ.
3. **Label Smoothing**: –í–≤–µ–¥–µ–Ω–∏–µ –Ω–µ–±–æ–ª—å—à–æ–π –æ—à–∏–±–∫–∏ –≤ –º–µ—Ç–∫–∏ —Ü–µ–ª–µ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö, —á—Ç–æ –ø–æ–º–æ–≥–∞–µ—Ç –º–æ–¥–µ–ª–∏ –ª—É—á—à–µ –æ–±–æ–±—â–∞—Ç—å –∏ —É–º–µ–Ω—å—à–∞–µ—Ç –µ—ë —É–≤–µ—Ä–µ–Ω–Ω–æ—Å—Ç—å –≤ –Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –æ—Ç–≤–µ—Ç–∞—Ö.

–≠—Ç–∏ –º–µ—Ç–æ–¥—ã –ø–æ–º–æ–≥–∞—é—Ç —É–ª—É—á—à–∏—Ç—å —É—Å—Ç–æ–π—á–∏–≤–æ—Å—Ç—å –∏ –æ–±–æ–±—â–∞—é—â—É—é —Å–ø–æ—Å–æ–±–Ω–æ

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [01:33<00:11, 11.54s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Transformer –ø–æ–∫–∞–∑–∞–ª –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíDE –∏ EN‚ÜíFR –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–∏–º–∏ SOTA?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ù–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ —Å –∞–Ω–≥–ª–∏–π—Å–∫–æ–≥–æ –Ω–∞ —Ñ—Ä–∞–Ω—Ü—É–∑—Å–∫–∏–π –∏ –Ω–µ–º–µ—Ü–∫–∏–π —è–∑—ã–∫–∏, –º–æ–¥–µ–ª—å Transformer –ø–æ–∫–∞–∑–∞–ª–∞ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–∏–º–∏ –ª–∏–¥–µ—Ä–∞–º–∏. –û–Ω–∞ –¥–æ—Å—Ç–∏–≥–ª–∞ –≤—ã—Å–æ–∫–æ–≥–æ BLEU-–æ—Ü–µ–Ω–∫–∏ (41.0) –∏ –ø—Ä–∏ —ç—Ç–æ–º –ø–æ—Ç—Ä–µ–±–æ–≤–∞–ª–∞ –º–µ–Ω—å—à–∏—Ö —Ä–µ—Å—É—Ä—Å–æ–≤ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è, —á–µ–º –ø—Ä–µ–¥—ã–¥—É—â–∏–µ –ª—É—á—à–∏–µ –º–æ–¥–µ–ª–∏. –í–∞–∂–Ω—ã–º —Ñ–∞–∫—Ç–æ—Ä–æ–º —Å—Ç–∞–ª–æ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ –±–æ–ª–µ–µ –≤—ã—Å–æ–∫–æ–π –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–∏ dropout (0.1 –≤–º–µ—Å—Ç–æ 0.3), —á—Ç–æ —Å–ø–æ—Å–æ–±—Å—Ç–≤–æ–≤–∞–ª–æ –ª—É—á—à–µ–π –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:42<00:00, 10.23s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—é –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ü–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—è –≤ Transformer –≤–æ–∑–º–æ–∂–Ω–∞ –∏–∑-–∑–∞ –æ—Ç—Å—É—Ç—Å—Ç–≤–∏—è —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã—Ö —Å–ª–æ–µ–≤, –∫–æ—Ç–æ—Ä—ã–µ —Ç—Ä–µ–±—É—é—Ç –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ–≥–æ –≤—ã–ø–æ–ª–Ω–µ–Ω–∏—è –∏ –æ–≥—Ä–∞–Ω–∏—á–∏–≤–∞—é—Ç –≤–æ–∑–º–æ–∂–Ω–æ—Å—Ç–∏ —Ä–∞—Å–ø–∞—Ä–∞–ª–ª–µ–ª–∏–≤–∞–Ω–∏—è. –í–º–µ—Å—Ç–æ —ç—Ç–æ–≥–æ Transformer –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏–º–∞–Ω–∏—è, –∫–æ—Ç–æ—Ä—ã–π –º–æ–∂–µ—Ç –æ–±—Ä–∞–±–∞—Ç—ã–≤–∞—Ç—å –¥–∞–Ω–Ω—ã–µ –ø–∞—Ä–∞–ª–ª–µ–ª—å–Ω–æ, —Ç–∞–∫ –∫–∞–∫ –æ–Ω –Ω–µ —Ç—Ä–µ–±—É–µ—Ç –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ–≥–æ –æ–±—Ö–æ–¥–∞ –≤—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö. –≠—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–µ–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –º–Ω–æ–≥–æ—è–¥–µ—Ä–Ω—ã–µ –∏ –º–Ω–æ–≥–æ–ø—Ä–æ—Ü–µ—Å—Å–æ—Ä–Ω—ã–µ —Å–∏—Å—Ç–µ–º—ã, —á—Ç–æ —É—Å–∫–æ—Ä—è–µ—Ç –ø—Ä–æ—Ü–µ—Å—Å –æ–±—É—á–µ–Ω–∏—è –∏ —É–ª—É—á—à–∞–




In [29]:
times_1 = times

In [30]:
np.mean(times_1)

np.float64(10.112690758705138)

In [22]:
df = pd.read_csv('test_results/article_1_questions.csv')

In [31]:
df['Tbank'] = outputs

In [33]:
df.to_csv('test_results/article_1_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è TTRL

In [35]:
questions = ['–í —á–µ–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –æ—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL)?',
             '–ö–∞–∫–∏–º –±—Ä–∞–∑–æ–º –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –æ—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º?',
             '–ö–∞–∫—É—é —Ä–æ–ª—å –∏–≥—Ä–∞–µ—Ç –º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ (majority voting) –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL?',
             '–ö–∞–∫–∏–µ —É–ª—É—á—à–µ–Ω–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã —Å –ø–æ–º–æ—â—å—é TTRL –Ω–∞ –±–µ–Ω—á–º–∞—Ä–∫–µ AIME 2024?',
             '–ö–∞–∫ TTRL —Å—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç—Å—è —Å RL –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –ø–æ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏?',
             '–ü–æ—á–µ–º—É TTRL —Å–ø–æ—Å–æ–±–µ–Ω —Ä–∞–±–æ—Ç–∞—Ç—å –¥–∞–∂–µ –ø—Ä–∏ –Ω–µ—Ç–æ—á–Ω—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö –º–µ—Ç–æ–∫?',
             '–ö–∞–∫–∏–µ –º–æ–¥–µ–ª–∏ –∏ –±–µ–Ω—á–º–∞—Ä–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è –æ—Ü–µ–Ω–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?',
             '–ö–∞–∫–∏–µ —Ñ–∞–∫—Ç–æ—Ä—ã –º–æ–≥—É—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ —Å–±–æ—é –∏–ª–∏ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?',
             '–ö–∞–∫ TTRL –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?',
             '–° –∫–∞–∫–∏–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å–æ–≤–º–µ—Å—Ç–∏–º TTRL –∏ –∫–∞–∫–∏–µ –∏–∑ –Ω–∏—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö?'
            ]

In [36]:
arxiv_url = "https://arxiv.org/pdf/2504.16084"
arxiv_id = extract_arxiv_id(arxiv_url)

In [39]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, eos_token_id=terminators)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:09<01:29,  9.93s/it]

–í–æ–ø—Ä–æ—Å: –í —á–µ–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –æ—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL)?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –û—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL) –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º (RL) –¥–ª—è —É–ª—É—á—à–µ–Ω–∏—è –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLMs) –Ω–∞ –æ—Å–Ω–æ–≤–µ –Ω–µ–æ—Ç–º–µ—á–µ–Ω–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö. –í –æ—Ç–ª–∏—á–∏–µ –æ—Ç —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö –ø–æ–¥—Ö–æ–¥–æ–≤, TTRL –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –∏–∑ —Å–∞–º–æ–≥–æ –ø—Ä–æ—Ü–µ—Å—Å–∞ –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞ –∏ –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–Ω—ã—Ö –º–æ–¥–µ–ª–µ–π –¥–ª—è –æ—Ü–µ–Ω–∫–∏ –∏ –æ–ø—Ç–∏–º–∏–∑–∞—Ü–∏–∏ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ –≤ —Ä–µ–∂–∏–º–µ —Ä–µ–∞–ª—å–Ω–æ–≥–æ –≤—Ä–µ–º–µ–Ω–∏, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ —Å–∞–º–æ—Å—Ç–æ—è—Ç–µ–ª—å–Ω–æ –∞–¥–∞–ø—Ç–∏—Ä–æ–≤–∞—Ç—å—Å—è –∏ —É–ª—É—á—à–∞—Ç—å—Å—è –±–µ–∑ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ—Å—Ç–∏ –≤—Ä—É—á–Ω—É—

 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:17<01:08,  8.58s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–º –±—Ä–∞–∑–æ–º –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –æ—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –û—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL (Test-Time Reinforcement Learning) –ø—Ä–æ–∏—Å—Ö–æ–¥–∏—Ç —á–µ—Ä–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ –º–µ—Ç–æ–¥–æ–≤, –∫–æ—Ç–æ—Ä—ã–µ –Ω–µ —Ç—Ä–µ–±—É—é—Ç —Ç–æ—á–Ω—ã—Ö –º–µ—Ç–æ–∫. –í —á–∞—Å—Ç–Ω–æ—Å—Ç–∏, –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è Test-Time Scaling (TTS), –≥–¥–µ –¥–ª—è –ø–æ–ª—É—á–µ–Ω–∏—è –ø–æ–ª–µ–∑–Ω—ã—Ö –æ—Ü–µ–Ω–æ–∫ –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è, –Ω–∞–ø—Ä–∏–º–µ—Ä, –º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤. –≠—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç —Å–æ–∑–¥–∞—Ç—å —Ñ—É–Ω–∫—Ü–∏—é –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è, –∫–æ—Ç–æ—Ä–∞—è –ø–æ–º–æ–≥–∞–µ—Ç –æ–±—É—á–µ–Ω–∏—é –º–æ–¥–µ–ª–∏ –Ω–∞ –æ—Å–Ω–æ–≤–µ –æ–±—Ä–∞—Ç–Ω–æ–π —Å–≤—è–∑–∏, –ø–æ–ª—É—á–µ–Ω–Ω–æ–π –∏–∑ –Ω–µ–æ—Ç–º–µ—á–µ–Ω–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö.




 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:23<00:51,  7.41s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫—É—é —Ä–æ–ª—å –∏–≥—Ä–∞–µ—Ç –º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ (majority voting) –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ú–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è —Å–æ–∑–¥–∞–Ω–∏—è –Ω–∞–≥—Ä–∞–¥–Ω–æ–π —Ñ—É–Ω–∫—Ü–∏–∏, –∫–æ—Ç–æ—Ä–∞—è –æ—Å–Ω–æ–≤—ã–≤–∞–µ—Ç—Å—è –Ω–∞ —Å–æ–≥–ª–∞—Å–æ–≤–∞–Ω–Ω–æ—Å—Ç–∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –º–æ–¥–µ–ª–∏. –≠—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –æ–±—É—á–∞—Ç—å –º–æ–¥–µ–ª–∏ –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –±–µ–∑ –º–µ—Ç–æ–∫, —á—Ç–æ —è–≤–ª—è–µ—Ç—Å—è –≤–∞–∂–Ω—ã–º —à–∞–≥–æ–º –≤ —Ä–∞–∑–≤–∏—Ç–∏–∏ —Å–∞–º–æ–æ–±—É—á–∞—é—â–∏—Ö—Å—è —Å–∏—Å—Ç–µ–º –∏ RL —Å —Å–∞–º–æ–ø–æ–¥—Ç–≤–µ—Ä–∂–¥–µ–Ω–Ω—ã–º–∏ –Ω–∞–≥—Ä–∞–¥–∞–º–∏.




 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [00:33<00:49,  8.29s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —É–ª—É—á—à–µ–Ω–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã —Å –ø–æ–º–æ—â—å—é TTRL –Ω–∞ –±–µ–Ω—á–º–∞—Ä–∫–µ AIME 2024?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ù–∞ –±–µ–Ω—á–º–∞—Ä–∫–µ AIME 2024 –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–µ TTRL (—Ç–µ—Ö–Ω–∏–∫–∏ –æ–±—É—á–µ–Ω–∏—è –±–µ–∑ —É—á–∏—Ç–µ–ª—è) –∫ –º–æ–¥–µ–ª–∏ Qwen2.5-Math-7B –ø—Ä–∏–≤–µ–ª–æ –∫ —É–ª—É—á—à–µ–Ω–∏—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –Ω–∞ 159% (—Å 13.3 –¥–æ 43.3). –≠—Ç–æ –¥–æ—Å—Ç–∏–≥–∞–µ—Ç—Å—è –∑–∞ —Å—á—ë—Ç —Å–∞–º–æ–æ–±—É—á–µ–Ω–∏—è –∏ –Ω–µ —Ç—Ä–µ–±—É–µ—Ç –º–µ—Ç–æ–∫ –¥–ª—è —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∏, —á—Ç–æ –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç –æ–±–æ–±—â–µ–Ω–∏–µ –Ω–∞ –¥—Ä—É–≥–∏–µ –∑–∞–¥–∞—á–∏. –¢–µ—Ö–Ω–∏–∫–∞ —Ç–∞–∫–∂–µ –ø–æ–ª–æ–∂–∏—Ç–µ–ª—å–Ω–æ –≤–ª–∏—è–µ—Ç –Ω–∞ –¥—Ä—É–≥–∏–µ –º–µ—Ç—Ä–∏–∫–∏ –∏ –º–æ–∂–µ—Ç –±—ã—Ç—å –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∞ —Å —Ä–∞–∑–Ω—ã–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏, –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É—è –≤—ã—Å–æ–∫–∏–π –ø–æ—Ç–æ–ª–æ–∫ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏.




 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [00:39<00:38,  7.73s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ TTRL —Å—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç—Å—è —Å RL –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –ø–æ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ù–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö, **TTRL** (Transferable Reinforcement Learning) –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É–µ—Ç —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å, —Å—Ä–∞–≤–Ω–∏–º—É—é —Å **RL** (Reinforcement Learning), —á—Ç–æ –ø–æ–¥—Ç–≤–µ—Ä–∂–¥–∞–µ—Ç—Å—è –±–ª–∏–∑–æ—Å—Ç—å—é –∏—Ö –∫—Ä–∏–≤—ã—Ö –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ (—Å–º. –†–∏—Å. 7). –≠—Ç–æ —É–∫–∞–∑—ã–≤–∞–µ—Ç –Ω–∞ –≤—ã—Å–æ–∫—É—é —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å **TTRL** –≤ —É–ª—É—á—à–µ–Ω–∏–∏ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤, –ø—Ä–µ–≤–æ—Å—Ö–æ–¥—è—â—É—é —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã–µ –º–µ—Ç–æ–¥—ã –æ–±—É—á–µ–Ω–∏—è –∏ –æ—Ü–µ–Ω–∫–∏.




 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [00:47<00:31,  7.77s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É TTRL —Å–ø–æ—Å–æ–±–µ–Ω —Ä–∞–±–æ—Ç–∞—Ç—å –¥–∞–∂–µ –ø—Ä–∏ –Ω–µ—Ç–æ—á–Ω—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö –º–µ—Ç–æ–∫?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: TTRL (Temporal Reward-Driven Learning) –æ—Å—Ç–∞—ë—Ç—Å—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã–º, –Ω–µ—Å–º–æ—Ç—Ä—è –Ω–∞ –Ω–µ—Ç–æ—á–Ω—ã–µ –æ—Ü–µ–Ω–∫–∏ –º–µ—Ç–æ–∫, –ø–æ—Ç–æ–º—É —á—Ç–æ —Å–∏—Å—Ç–µ–º–∞ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –ø—Ä–∞–≤–∏–ª–æ, —Å–æ–≥–ª–∞—Å–Ω–æ –∫–æ—Ç–æ—Ä–æ–º—É –Ω–∞–≥—Ä–∞–¥—ã –≤—ã—á–∏—Å–ª—è—é—Ç—Å—è –Ω–∞ –æ—Å–Ω–æ–≤–µ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏—è –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω–æ–≥–æ –æ—Ç–≤–µ—Ç–∞ –∏ "–º–µ—Ç–∫–∏". –î–∞–∂–µ –µ—Å–ª–∏ –æ—Ü–µ–Ω–∫–∞ –º–µ—Ç–∫–∏ –Ω–µ —Ç–æ—á–Ω–∞, –Ω–æ –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç –Ω–µ–≤–µ—Ä–Ω–æ–≥–æ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è, –æ–Ω–∞ –≤—Å—ë —Ä–∞–≤–Ω–æ –ø–æ–º–æ–≥–∞–µ—Ç –∫–æ—Ä—Ä–µ–∫—Ç–∏—Ä–æ–≤–∞—Ç—å –ø–æ–≤–µ–¥–µ–Ω–∏–µ –º–æ–¥–µ–ª–∏, –æ–±–µ—Å–ø–µ—á–∏–≤–∞—è –æ—Ç—Ä–∏—Ü–∞—Ç–µ–ª—å–Ω—É—é –Ω–∞–≥—Ä–∞–¥—É –∑–∞ –æ—à–∏–±–∫–∏.




 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [00:55<00:23,  7.80s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –º–æ–¥–µ–ª–∏ –∏ –±–µ–Ω—á–º–∞—Ä–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è –æ—Ü–µ–Ω–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –î–ª—è –æ—Ü–µ–Ω–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL (Test-Time Reinforcement Learning) –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Ä–∞–∑–ª–∏—á–Ω—ã–µ –º–æ–¥–µ–ª–∏ –∏ –±–µ–Ω—á–º–∞—Ä–∫–∏. –í —á–∞—Å—Ç–Ω–æ—Å—Ç–∏, –ø—Ä–∏–º–µ–Ω—è–ª–∏—Å—å –±–∞–∑–æ–≤—ã–µ –º–æ–¥–µ–ª–∏ Qwen2.5-Math-1.5B –∏ Qwen2.5-Math-7B, –∞ —Ç–∞–∫–∂–µ –∏–Ω—Å—Ç—Ä—É–∫—Ü–∏–æ–Ω–Ω–∞—è –º–æ–¥–µ–ª—å LLaMA-3.1-8B-Instruct. –≠—Ç–∏ –º–æ–¥–µ–ª–∏ –ø—Ä–æ–≤–µ—Ä—è–ª–∏—Å—å –Ω–∞ —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –∑–∞–¥–∞—á–∞—Ö, —á—Ç–æ–±—ã –ø–æ–∫–∞–∑–∞—Ç—å —É–Ω–∏–≤–µ—Ä—Å–∞–ª—å–Ω–æ—Å—Ç—å –ø–æ–¥—Ö–æ–¥–∞ –∏ –µ–≥–æ –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ–º–æ—Å—Ç—å.




 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [01:05<00:17,  8.54s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ñ–∞–∫—Ç–æ—Ä—ã –º–æ–≥—É—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ —Å–±–æ—é –∏–ª–∏ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –û—Å–Ω–æ–≤–Ω—ã–µ –ø—Ä–∏—á–∏–Ω—ã –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ –∏–ª–∏ —Å–±–æ—è –≤ TTRL (Transfer and Training with Reinforcement Learning) —Å–≤—è–∑–∞–Ω—ã —Å –Ω–µ–¥–æ—Å—Ç–∞—Ç–∫–æ–º –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω—ã—Ö –∑–Ω–∞–Ω–∏–π –æ —Ü–µ–ª–µ–≤–æ–π –∑–∞–¥–∞—á–µ. –≠—Ç–æ –æ—Å–æ–±–µ–Ω–Ω–æ –∫—Ä–∏—Ç–∏—á–Ω–æ, –∫–æ–≥–¥–∞ —Ç–µ—Å—Ç–æ–≤—ã–µ –¥–∞–Ω–Ω—ã–µ —Å–ª–æ–∂–Ω–µ–µ –∏ –≤–∫–ª—é—á–∞—é—Ç –Ω–æ–≤—ã–µ —ç–ª–µ–º–µ–Ω—Ç—ã, –∞ –º–æ–¥–µ–ª—å –Ω–µ –∏–º–µ–µ—Ç –ø–æ–¥—Ö–æ–¥—è—â–∏—Ö –º–µ—Ö–∞–Ω–∏–∑–º–æ–≤ –¥–ª—è –∞–¥–∞–ø—Ç–∞—Ü–∏–∏ –∏ –æ–±—É—á–µ–Ω–∏—è –Ω–∞ –æ—Å–Ω–æ–≤–µ —Å–ª–æ–∂–Ω–æ–≥–æ –º–∞—Ç–µ—Ä–∏–∞–ª–∞. –í —Å–ª—É—á–∞–µ —Å Qwen2.5-Math-1.5B –∏ LLaMA-3.1-8B-Instruct, –æ—Ç—Å—É—Ç—Å—Ç–≤–∏–µ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–≥–æ —É–ª—É—á—à–µ–Ω–∏—è –º–æ–∂–µ—Ç –±—ã—Ç—å —Å–ª–µ–¥—Å—Ç–≤–∏–µ–º –Ω–µ–¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ–π –ø–æ–¥–≥–æ—Ç–æ–≤–ª–µ–Ω–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–µ–π –∫ —Ç–∞–∫–∏–º —Å–ª–æ–

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [01:15<00:08,  8.75s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ TTRL –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: TTRL (Test-Time Reinforcement Learning) –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É–µ—Ç —É–ª—É—á—à–µ–Ω–∏–µ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏. –í –¥–∞–Ω–Ω–æ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ, –±–æ–ª—å—à–∏–µ –º–æ–¥–µ–ª–∏ (–Ω–∞–ø—Ä–∏–º–µ—Ä, 7B –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤) –ø–æ–∫–∞–∑—ã–≤–∞—é—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω—ã–µ —É–ª—É—á—à–µ–Ω–∏—è –≤ –∑–∞–¥–∞—á–∞—Ö, —Ç–∞–∫–∏—Ö –∫–∞–∫ AIME 2024 –∏ AMC, –∑–∞ —Å—á–µ—Ç –±–æ–ª–µ–µ —Ç–æ—á–Ω–æ–≥–æ –ø—Ä–æ–≥–Ω–æ–∑–∏—Ä–æ–≤–∞–Ω–∏—è –∏ –ª—É—á—à–µ–≥–æ –æ–±—É—á–µ–Ω–∏—è –Ω–∞ –Ω–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö. –ú–æ–¥–µ–ª–∏ –º–µ–Ω—å—à–µ–≥–æ —Ä–∞–∑–º–µ—Ä–∞ (1.5B) –Ω–µ –¥–∞—é—Ç —Å—Ç–æ–ª—å –∑–∞–º–µ—Ç–Ω—ã—Ö —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤, —á—Ç–æ —Å–≤—è–∑–∞–Ω–æ —Å –∏—Ö –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω—ã–º–∏ –≤–æ–∑–º–æ–∂–Ω–æ—Å—Ç—è–º–∏.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:21<00:00,  8.11s/it]

–í–æ–ø—Ä–æ—Å: –° –∫–∞–∫–∏–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å–æ–≤–º–µ—Å—Ç–∏–º TTRL –∏ –∫–∞–∫–∏–µ –∏–∑ –Ω–∏—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –¢–µ—Ö–Ω–∏–∫–∞ TTRL (Temporal Training with Reinforcement Learning) —Å–æ–≤–º–µ—Å—Ç–∏–º–∞ —Å –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏, —Ç–∞–∫–∏–º–∏ –∫–∞–∫ PPO (Proximal Policy Optimization) –∏ GRPO (Generalized Reinforcement Policy Optimization). –í —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö, –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–Ω—ã—Ö –≤ –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ, –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è PPO, –∫–æ—Ç–æ—Ä—ã–π –ø–æ–∫–∞–∑–∞–ª —Å—Ç–∞–±–∏–ª—å–Ω—ã–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –∏ —Å–æ–ø–æ—Å—Ç–∞–≤–∏–º—É—é —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å —Å GRPO.







In [40]:
times_2 = times

In [41]:
np.mean(times_2)

np.float64(7.986197686195373)

In [42]:
df = pd.read_csv('test_results/article_2_questions.csv')

In [43]:
df['Tbank'] = outputs

In [45]:
df.to_csv('test_results/article_2_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks

In [46]:
questions = ['–ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –≤ RAG?',
             '–ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –∏ –∫–∞–∫ –æ–Ω–∞ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ—Ç—Å—è?',
             '–ß–µ–º –æ—Ç–ª–∏—á–∞—é—Ç—Å—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã RAG-Token –∏ RAG-Sequence?',
             '–ö–∞–∫–æ–π –æ–±—ä–µ–º –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –≤–µ–∫—Ç–æ—Ä–Ω–æ–º –∏–Ω–¥–µ–∫—Å–µ Wikipedia?',
             '–ö–∞–∫–∏–µ –∑–∞–¥–∞—á–∏ –±—ã–ª–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω—ã –¥–ª—è –æ—Ü–µ–Ω–∫–∏ RAG-–º–æ–¥–µ–ª–µ–π?',
             '–ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ–º –ø—Ä–∏ –æ—Ç–≤–µ—Ç–µ –Ω–∞ –≤–æ–ø—Ä–æ—Å—ã?',
             '–ö–∞–∫—É—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ–∫–∞–∑–∞–ª RAG –Ω–∞ –∑–∞–¥–∞—á–µ Jeopardy Question Generation –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å BART?',
             '–ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å–æ–≤–º–µ—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG?',
             '–ö–∞–∫–æ–π —ç—Ñ—Ñ–µ–∫—Ç –¥–∞—ë—Ç "hot-swapping" –∏–Ω–¥–µ–∫—Å–∞ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ RAG?',
             '–ö–∞–∫–∏–µ –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –ø—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –≤ RAG-Sequence –∏ RAG-Token?'
            ]

In [47]:
arxiv_url = "https://arxiv.org/pdf/2005.11401"
arxiv_id = extract_arxiv_id(arxiv_url)

In [48]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, eos_token_id=terminators)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:09<01:28,  9.85s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –≤ RAG?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í RAG (Retrieval-Augmented Generation) –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –º–æ–¥–µ–ª—å, —Å–æ—Å—Ç–æ—è—â–∞—è –∏–∑ –¥–≤—É—Ö –∫–æ–º–ø–æ–Ω–µ–Ω—Ç–æ–≤: (i) "retriever" (–ø–æ–ª—É—á–∞—Ç–µ–ª—å), –∫–æ—Ç–æ—Ä—ã–π –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤—Ö–æ–¥–Ω–æ–≥–æ –∑–∞–ø—Ä–æ—Å–∞ *x* –≤—ã–±–∏—Ä–∞–µ—Ç –∏ –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç –Ω–∞–∏–±–æ–ª–µ–µ —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã–µ —Ç–µ–∫—Å—Ç–æ–≤—ã–µ —Ñ—Ä–∞–≥–º–µ–Ω—Ç—ã *z*; –∏ (ii) "generator" (–≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä), –∫–æ—Ç–æ—Ä—ã–π, –∏—Å–ø–æ–ª—å–∑—É—è —ç—Ç–∏ —Ñ—Ä–∞–≥–º–µ–Ω—Ç—ã –∏ –∏—Å—Ö–æ–¥–Ω—ã–π –∑–∞–ø—Ä–æ—Å, –≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç —Ü–µ–ª–µ–≤–æ–π —Ç–µ–∫—Å—Ç *y*. –í–∞–∂–Ω–æ –æ—Ç–º–µ—Ç–∏—Ç—å, —á—Ç–æ –æ–±–∞ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç–∞ –∏–º–µ—é—Ç —Å–≤–æ–∏ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏ —Ñ—É–Ω–∫—Ü–∏–∏ –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–∏, –∫–æ—Ç–æ—Ä—ã–µ –æ–ø—Ä–µ–¥–µ–ª—è—é—Ç –∏—Ö —Ä–∞–±–æ—Ç—É.




 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:15<00:59,  7.39s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –∏ –∫–∞–∫ –æ–Ω–∞ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ—Ç—Å—è?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –º–æ–¥–µ–ª—å, –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω–∞—è –Ω–∞ –æ—Å–Ω–æ–≤–µ DPR (Dense Passage Retrieval). –î–ª—è —ç—Ç–æ–≥–æ –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è –æ–±—É—á–µ–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ Natural Questions –∏ TriviaQA, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ –∏–∑–≤–ª–µ–∫–∞—Ç—å –∏ —Ä–∞–Ω–∂–∏—Ä–æ–≤–∞—Ç—å —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã–µ –¥–æ–∫—É–º–µ–Ω—Ç—ã.




 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:25<01:00,  8.71s/it]

–í–æ–ø—Ä–æ—Å: –ß–µ–º –æ—Ç–ª–∏—á–∞—é—Ç—Å—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã RAG-Token –∏ RAG-Sequence?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: **RAG-Token** –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –∞–≤—Ç–æ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–µ, –≥–¥–µ –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç—å –ø–µ—Ä–µ—Ö–æ–¥–∞ *p* ( *y|x* ) –æ–ø—Ä–µ–¥–µ–ª—è–µ—Ç—Å—è —á–µ—Ä–µ–∑ –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç—å *z* ( *¬∑|x* ) –∏ –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç—å *y* ( *|x, z* ). –≠—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –ø—Ä–∏–º–µ–Ω—è—Ç—å —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã–µ –º–µ—Ç–æ–¥—ã –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏—è, —Ç–∞–∫–∏–µ –∫–∞–∫ beam search. –í –æ—Ç–ª–∏—á–∏–µ –æ—Ç —ç—Ç–æ–≥–æ, **RAG-Sequence** —Ç—Ä–µ–±—É–µ—Ç –∏–Ω–æ–≥–æ –ø–æ–¥—Ö–æ–¥–∞, —Ç–∞–∫ –∫–∞–∫ –µ–≥–æ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –Ω–µ —è–≤–ª—è–µ—Ç—Å—è –∞–≤—Ç–æ–∫–æ–¥–∏—Ä–æ–≤–æ—á–Ω–æ–π –∏ —Ç—Ä–µ–±—É–µ—Ç —Å–ø–µ—Ü–∏—Ñ–∏—á–µ—Å–∫–∏—Ö –º–µ—Ç–æ–¥–æ–≤ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–µ–π –∏ –ø–æ—Å–ª–µ–¥—É—é—â–µ–≥–æ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏—è.




 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [00:27<00:36,  6.13s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –æ–±—ä–µ–º –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –≤–µ–∫—Ç–æ—Ä–Ω–æ–º –∏–Ω–¥–µ–∫—Å–µ Wikipedia?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í –≤–µ–∫—Ç–æ—Ä–Ω–æ–º –∏–Ω–¥–µ–∫—Å–µ Wikipedia –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è 21 –º–∏–ª–ª–∏–æ–Ω –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤, –∫–∞–∂–¥—ã–π –∏–∑ –∫–æ—Ç–æ—Ä—ã—Ö —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ 100 —Å–ª–æ–≤.




 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [00:36<00:34,  6.89s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∑–∞–¥–∞—á–∏ –±—ã–ª–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω—ã –¥–ª—è –æ—Ü–µ–Ω–∫–∏ RAG-–º–æ–¥–µ–ª–µ–π?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –î–ª—è –æ—Ü–µ–Ω–∫–∏ RAG-–º–æ–¥–µ–ª–µ–π –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –∑–∞–¥–∞—á–∏, —Ç—Ä–µ–±—É—é—â–∏–µ –≥–ª—É–±–æ–∫–æ–≥–æ –ø–æ–Ω–∏–º–∞–Ω–∏—è –∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –∑–Ω–∞–Ω–∏–π –∏–∑ —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –∏—Å—Ç–æ—á–Ω–∏–∫–æ–≤, —Ç–∞–∫–∏–µ –∫–∞–∫ –æ—Ç–∫—Ä—ã—Ç—ã–µ –≤–æ–ø—Ä–æ—Å—ã –∏ –æ—Ç–≤–µ—Ç—ã (open domain QA), –∞ —Ç–∞–∫–∂–µ –¥—Ä—É–≥–∏–µ –∑–∞–¥–∞—á–∏, —Å–≤—è–∑–∞–Ω–Ω—ã–µ —Å –æ–±—Ä–∞–±–æ—Ç–∫–æ–π –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ–≥–æ —è–∑—ã–∫–∞, –≥–¥–µ –≤–∞–∂–Ω—ã –∫–æ–Ω—Ç–µ–∫—Å—Ç –∏ –¥–æ—Å—Ç—É–ø –∫ –±–æ–ª—å—à–∏–º –æ–±—ä–µ–º–∞–º –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏. –í —á–∞—Å—Ç–Ω–æ—Å—Ç–∏, –º–æ–¥–µ–ª–∏ –ø–æ–∫–∞–∑–∞–ª–∏ –≤—ã—Å–æ–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ —Ç–∞–∫–∏—Ö —Ç–µ—Å—Ç–∞—Ö, –∫–∞–∫ SQuAD, Natural Questions –∏ TriviaQA.




 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [00:42<00:27,  6.75s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ–º –ø—Ä–∏ –æ—Ç–≤–µ—Ç–µ –Ω–∞ –≤–æ–ø—Ä–æ—Å—ã?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ê–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π, –ø–æ—Ç–æ–º—É —á—Ç–æ –æ–Ω–∞ –ø–æ–∑–≤–æ–ª—è–µ—Ç —Å–æ–∑–¥–∞–≤–∞—Ç—å –æ—Ç–≤–µ—Ç—ã –Ω–∞ –æ—Å–Ω–æ–≤–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏, –∫–æ—Ç–æ—Ä–∞—è –Ω–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∞ –≤ –∏—Å—Ö–æ–¥–Ω–æ–º —Ç–µ–∫—Å—Ç–µ –Ω–∞–ø—Ä—è–º—É—é, –Ω–æ –º–æ–∂–µ—Ç –±—ã—Ç—å –≤—ã–≤–µ–¥–µ–Ω–∞ –∏–∑ –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞. –≠—Ç–æ –æ—Å–æ–±–µ–Ω–Ω–æ –ø–æ–ª–µ–∑–Ω–æ –¥–ª—è –∑–∞–¥–∞—á, –≥–¥–µ –æ—Ç–≤–µ—Ç—ã –Ω–µ –≤—Å–µ–≥–¥–∞ —è–≤–Ω–æ —Å—Ñ–æ—Ä–º—É–ª–∏—Ä–æ–≤–∞–Ω—ã –≤ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ö, —á—Ç–æ –Ω–µ–≤–æ–∑–º–æ–∂–Ω–æ –¥–ª—è –º–µ—Ç–æ–¥–æ–≤ –∏–∑–≤–ª–µ—á–µ–Ω–∏—è.




 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [00:49<00:20,  6.82s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫—É—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ–∫–∞–∑–∞–ª RAG –Ω–∞ –∑–∞–¥–∞—á–µ Jeopardy Question Generation –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å BART?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: RAG (Reinforced Attention Generative Adversarial Networks) –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –ª—É—á—à–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–µ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–æ–ø—Ä–æ—Å–æ–≤ –¥–ª—è –∏–≥—Ä—ã Jeopardy –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å BART (Bidirectional and AutoRegressive Transformers). –í —á–∞—Å—Ç–Ω–æ—Å—Ç–∏, RAG –≤—ã–∏–≥—Ä—ã–≤–∞–µ—Ç –ø–æ –º–µ—Ç—Ä–∏–∫–µ Q-BLEU-1 –∏ –ø–æ–ª—É—á–∞–µ—Ç –±–æ–ª–µ–µ –≤—ã—Å–æ–∫–∏–µ –æ—Ü–µ–Ω–∫–∏ –∑–∞ —Ñ–∞–∫—Ç–æ–ª–æ–≥–∏—á–µ—Å–∫—É—é —Ç–æ—á–Ω–æ—Å—Ç—å –∏ —Å–ø–µ—Ü–∏—Ñ–∏—á–Ω–æ—Å—Ç—å –≤ —á–µ–ª–æ–≤–µ—á–µ—Å–∫–æ–π –æ—Ü–µ–Ω–∫–µ.




 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [00:58<00:14,  7.37s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å–æ–≤–º–µ—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í RAG (Retrieval-Augmented Generation) –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –ø—Ä–æ–∏—Å—Ö–æ–¥–∏—Ç —Å–æ–≤–º–µ—Å—Ç–Ω–æ. –ì–µ–Ω–µ—Ä–∞—Ç–æ—Ä (–Ω–∞–ø—Ä–∏–º–µ—Ä, –º–æ–¥–µ–ª—å –Ω–∞ –æ—Å–Ω–æ–≤–µ T5 –∏–ª–∏ BART) –∏ —Å–∏—Å—Ç–µ–º–∞ –ø–æ–∏—Å–∫–∞ (—Ä–µ—Ç—Ä–∏–≤–µ—Ä) —Ä–∞–±–æ—Ç–∞—é—Ç –≤–º–µ—Å—Ç–µ, —á—Ç–æ–±—ã —É–ª—É—á—à–∏—Ç—å –∫–∞—á–µ—Å—Ç–≤–æ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ —Ç–µ–∫—Å—Ç–∞. –û–±–∞ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç–∞ –ø—Ä–æ—Ö–æ–¥—è—Ç –æ–±—É—á–µ–Ω–∏–µ –Ω–∞ –æ–¥–Ω–æ–º –∏ —Ç–æ–º –∂–µ –Ω–∞–±–æ—Ä–µ –¥–∞–Ω–Ω—ã—Ö, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –∏–º –∞–¥–∞–ø—Ç–∏—Ä–æ–≤–∞—Ç—å—Å—è –∏ –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–æ–≤–∞—Ç—å –¥–ª—è —Å–æ–∑–¥–∞–Ω–∏—è –±–æ–ª–µ–µ —Ç–æ—á–Ω—ã—Ö –∏ —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã—Ö –æ—Ç–≤–µ—Ç–æ–≤.




 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [01:04<00:06,  6.92s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π —ç—Ñ—Ñ–µ–∫—Ç –¥–∞—ë—Ç "hot-swapping" –∏–Ω–¥–µ–∫—Å–∞ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ RAG?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: "Hot-swapping" –∏–Ω–¥–µ–∫—Å–∞ –≤ RAG (Retrieval-Augmented Generation) –ø–æ–∑–≤–æ–ª—è–µ—Ç –æ–±–Ω–æ–≤–ª—è—Ç—å –∏ –∑–∞–º–µ–Ω—è—Ç—å –±–∞–∑—É –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –ø–æ–∏—Å–∫–∞ –±–µ–∑ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ—Å—Ç–∏ –ø–æ–≤—Ç–æ—Ä–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è –≤—Å–µ–π –º–æ–¥–µ–ª–∏. –≠—Ç–æ –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç –≥–∏–±–∫–æ—Å—Ç—å –∏ –≤–æ–∑–º–æ–∂–Ω–æ—Å—Ç—å –±—ã—Å—Ç—Ä–æ–≥–æ –æ–±–Ω–æ–≤–ª–µ–Ω–∏—è –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏, —á—Ç–æ –≤–∞–∂–Ω–æ –¥–ª—è –ø–æ–¥–¥–µ—Ä–∂–∞–Ω–∏—è –∞–∫—Ç—É–∞–ª—å–Ω–æ—Å—Ç–∏ –∏ —Ç–æ—á–Ω–æ—Å—Ç–∏ –æ—Ç–≤–µ—Ç–æ–≤ –Ω–∞ –∑–∞–ø—Ä–æ—Å—ã.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:14<00:00,  7.40s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –ø—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –≤ RAG-Sequence –∏ RAG-Token?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –î–ª—è –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏—è –≤ RAG-Sequence –∏ RAG-Token –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è —Ä–∞–∑–ª–∏—á–Ω—ã–µ –º–µ—Ç–æ–¥—ã –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏. –í RAG-Token –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã–π –∞–≤—Ç–æ–∫–æ–¥–∏—Ä–æ–≤–æ—á–Ω—ã–π –ø–æ–¥—Ö–æ–¥, –≥–¥–µ –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç—å –ø–µ—Ä–µ—Ö–æ–¥–∞ *p* ( *y|x* ) –∞–ø–ø—Ä–æ–∫—Å–∏–º–∏—Ä—É–µ—Ç—Å—è —á–µ—Ä–µ–∑ –≤—ã–±–æ—Ä–∫—É –∏–∑ –≤–µ—Ä—Ö–Ω–∏—Ö *k* –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–µ–π *p* ( *¬∑|x* ) –∏ –ø–æ—Å–ª–µ–¥—É—é—â–µ–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ —ç—Ç–∏—Ö –∑–Ω–∞—á–µ–Ω–∏–π –≤ —Ä–∞–º–∫–∞—Ö —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω–æ–≥–æ –±–∏–º –¥–µ–∫–æ–¥–µ—Ä–∞. –í RAG-Sequence, –≤–µ—Ä–æ—è—Ç–Ω–æ, –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è –¥—Ä—É–≥–æ–π –º–µ—Ç–æ–¥, –Ω–æ –∫–æ–Ω–∫—Ä–µ—Ç–Ω—ã–µ –¥–µ—Ç–∞–ª–∏ –Ω–µ —É–∫–∞–∑–∞–Ω—ã –≤ –¥–∞–Ω–Ω–æ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ.







In [49]:
times_3 = times

In [50]:
np.mean(times_3)

np.float64(7.293178129196167)

In [51]:
df = pd.read_csv('test_results/article_3_questions.csv')

In [52]:
df['Tbank'] = outputs

In [54]:
df.to_csv('test_results/article_3_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è Efficient Estimation of Word Representations in Vector Space

In [55]:
questions = ['–ö–∞–∫–∏–µ –¥–≤–µ –º–æ–¥–µ–ª–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä –±—ã–ª–∏ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –∞–≤—Ç–æ—Ä–∞–º–∏ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤?',
             '–í —á—ë–º –æ—Å–Ω–æ–≤–Ω–æ–µ —Ä–∞–∑–ª–∏—á–∏–µ –º–µ–∂–¥—É –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏ CBOW –∏ Skip-gram?',
             '–ö–∞–∫–∏–µ —Ç–∏–ø—ã –æ—Ç–Ω–æ—à–µ–Ω–∏–π –≤–∫–ª—é—á–µ–Ω—ã –≤ Semantic-Syntactic Word Relationship test set?',
             '–ö–∞–∫–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –ø–æ–∫–∞–∑–∞–ª–∞ –º–æ–¥–µ–ª—å Skip-gram –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –ø–æ–¥–∑–∞–¥–∞—á–∞—Ö?',
             '–ö–∞–∫–æ–π –ø–æ–¥—Ö–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ Skip-gram –º–æ–¥–µ–ª–∏ –¥–ª—è –≤—ã–±–æ—Ä–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω—ã—Ö —Å–ª–æ–≤?',
             '–ö–∞–∫–æ–≤–∞ —Ñ–æ—Ä–º—É–ª–∞ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ CBOW?',
             '–ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –≤ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö?',
             '–ö–∞–∫ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∞—Å—å —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∞ –º–æ–¥–µ–ª–µ–π –≤ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω–æ–π —Å–∏—Å—Ç–µ–º–µ DistBelief?',
             '–ö–∞–∫ –±—ã–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω—ã –≤–æ–ø—Ä–æ—Å—ã –≤ —Ç–µ—Å—Ç–æ–≤–æ–º –Ω–∞–±–æ—Ä–µ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ word vectors?',
             '–ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –æ—Ç–≤–µ—Ç–∞ –Ω–∞ –∞–Ω–∞–ª–æ–≥–∏–∏ –≤—Ä–æ–¥–µ ‚Äúking - man + woman = ?‚Äù?'
            ]

In [56]:
arxiv_url = "https://arxiv.org/pdf/1301.3781"
arxiv_id = extract_arxiv_id(arxiv_url)

In [57]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, eos_token_id=terminators)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:10<01:34, 10.48s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –¥–≤–µ –º–æ–¥–µ–ª–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä –±—ã–ª–∏ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –∞–≤—Ç–æ—Ä–∞–º–∏ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥–ª–æ–∂–∏–ª–∏ –¥–≤–µ –º–æ–¥–µ–ª–∏ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤: 
1. **–ú–æ–¥–µ–ª—å –Ω–∞ –æ—Å–Ω–æ–≤–µ –Ω–µ–π—Ä–æ–Ω–Ω–æ–π —Å–µ—Ç–∏ —Å –ª–∏–Ω–µ–π–Ω—ã–º —Å–ª–æ–µ–º –∏ –Ω–µ–ª–∏–Ω–µ–π–Ω—ã–º —Å–∫—Ä—ã—Ç—ã–º —Å–ª–æ–µ–º** (feedforward neural network with a linear projection layer and a non-linear hidden layer). 
2. **–ú–æ–¥–µ–ª—å, –∏—Å–ø–æ–ª—å–∑—É—é—â–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—É –Ω–µ–π—Ä–æ–Ω–Ω–æ–π —Å–µ—Ç–∏ –¥–ª—è —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (NNLM)**, –∫–æ—Ç–æ—Ä–∞—è —Ç–∞–∫–∂–µ –ø—Ä–∏–º–µ–Ω—è–ª–∞—Å—å –≤ –ø—Ä–µ–¥—ã–¥—É—â–∏—Ö –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏—è—Ö, –Ω–æ –±—ã–ª–∞ —É—Å–æ–≤–µ—Ä—à–µ–Ω—Å—Ç–≤–æ–≤–∞–Ω–∞ –∏ –∞–¥–∞–ø—Ç–∏—Ä–æ–≤–∞–Ω–∞ –¥–ª—è –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –

 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:24<01:38, 12.36s/it]

–í–æ–ø—Ä–æ—Å: –í —á—ë–º –æ—Å–Ω–æ–≤–Ω–æ–µ —Ä–∞–∑–ª–∏—á–∏–µ –º–µ–∂–¥—É –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏ CBOW –∏ Skip-gram?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –û—Å–Ω–æ–≤–Ω–æ–µ —Ä–∞–∑–ª–∏—á–∏–µ –º–µ–∂–¥—É –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏ CBOW (Continuous Bag of Words) –∏ Skip-gram –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ –∏—Ö –ø–æ–¥—Ö–æ–¥–µ –∫ –æ–±—É—á–µ–Ω–∏—é. CBOW –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç —Ç–µ–∫—É—â–µ–µ —Å–ª–æ–≤–æ, –∏—Å–ø–æ–ª—å–∑—É—è –∫–æ–Ω—Ç–µ–∫—Å—Ç (–æ–∫—Ä—É–∂–µ–Ω–∏–µ) –∏–∑ –Ω–µ—Å–∫–æ–ª—å–∫–∏—Ö —Å–ª–æ–≤, —Ç–æ–≥–¥–∞ –∫–∞–∫ Skip-gram –¥–µ–ª–∞–µ—Ç –æ–±—Ä–∞—Ç–Ω–æ–µ: –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç –±–ª–∏–∂–∞–π—à–∏–µ —Å–ª–æ–≤–∞ –≤–æ–∫—Ä—É–≥ –∑–∞–¥–∞–Ω–Ω–æ–≥–æ —Å–ª–æ–≤–∞. –≠—Ç–æ –ø—Ä–∏–≤–æ–¥–∏—Ç –∫ —Ä–∞–∑–Ω—ã–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞–º –Ω–∞ –∑–∞–¥–∞—á–∞—Ö, —Ç—Ä–µ–±—É—é—â–∏—Ö –∞–Ω–∞–ª–∏–∑–∞ —Å–µ–º–∞–Ω—Ç–∏–∫–∏ –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—Å–∞ —Ç–µ–∫—Å—Ç–∞. Skip-gram –ª—É—á—à–µ —Å–ø—Ä–∞–≤–ª—è–µ—Ç—Å—è —Å –≤—ã—è–≤–ª–µ–Ω–∏–µ–º —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö —Å–≤—è–∑–µ–π, –∞ CBOW —Å–∏–ª—å–Ω–µ–µ –≤ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –∑–∞–¥–∞—á–∞—Ö.




 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:33<01:17, 11.13s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ç–∏–ø—ã –æ—Ç–Ω–æ—à–µ–Ω–∏–π –≤–∫–ª—é—á–µ–Ω—ã –≤ Semantic-Syntactic Word Relationship test set?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í Semantic-Syntactic Word Relationship test set –≤–∫–ª—é—á–µ–Ω—ã –ø—è—Ç—å —Ç–∏–ø–æ–≤ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –≤–æ–ø—Ä–æ—Å–æ–≤ –∏ –¥–µ–≤—è—Ç—å —Ç–∏–ø–æ–≤ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –≤–æ–ø—Ä–æ—Å–æ–≤. –°–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏–µ –≤–æ–ø—Ä–æ—Å—ã –∫–∞—Å–∞—é—Ç—Å—è, –Ω–∞–ø—Ä–∏–º–µ—Ä, —Å–≤—è–∑–µ–π –º–µ–∂–¥—É —Å–ª–æ–≤–∞–º–∏, –∫–∞–∫ "—Å—Ç—Ä–∞–Ω–∞-–≥–æ—Ä–æ–¥" (–Ω–∞–ø—Ä–∏–º–µ—Ä, –§—Ä–∞–Ω—Ü–∏—è –∏ –ü–∞—Ä–∏–∂), –∞ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏–µ –≤–æ–ø—Ä–æ—Å—ã –º–æ–≥—É—Ç –ø—Ä–æ–≤–µ—Ä—è—Ç—å –≥—Ä–∞–º–º–∞—Ç–∏—á–µ—Å–∫–∏–µ —Å–≤—è–∑–∏ –∏ –æ—Ç–Ω–æ—à–µ–Ω–∏—è, —Ç–∞–∫–∏–µ –∫–∞–∫ "–∫—Ç–æ" –∏ "—á—Ç–æ" –≤ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏–∏. –í–æ–ø—Ä–æ—Å—ã —Å–æ–∑–¥–∞—é—Ç—Å—è –Ω–∞ –æ—Å–Ω–æ–≤–µ –ø–∞—Ä —Å–ª–æ–≤, –∫–æ—Ç–æ—Ä—ã–µ –≤—ã–±–∏—Ä–∞—é—Ç—Å—è –∏–∑ —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –∫–∞—Ç–µ–≥–æ—Ä–∏–π, —Ç–∞–∫–∏—Ö –∫–∞–∫ –≥–æ—Ä–æ–¥–∞ –∏ –∏—Ö —à—Ç–∞—Ç—ã, –∏ –∑–∞—Ç–µ–º —Å–æ–µ–¥–∏–Ω—è—é—Ç—Å—è –

 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [00:36<00:47,  7.94s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –ø–æ–∫–∞–∑–∞–ª–∞ –º–æ–¥–µ–ª—å Skip-gram –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –ø–æ–¥–∑–∞–¥–∞—á–∞—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ú–æ–¥–µ–ª—å Skip-gram –ø–æ–∫–∞–∑–∞–ª–∞ —Ç–æ—á–Ω–æ—Å—Ç—å 24% –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∑–∞–¥–∞—á–∞—Ö –∏ 30.1% –Ω–∞ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –∑–∞–¥–∞—á–∞—Ö –≤ –ø—Ä–∏–≤–µ–¥—ë–Ω–Ω–æ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ.




 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [00:46<00:42,  8.56s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –ø–æ–¥—Ö–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ Skip-gram –º–æ–¥–µ–ª–∏ –¥–ª—è –≤—ã–±–æ—Ä–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω—ã—Ö —Å–ª–æ–≤?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í –º–æ–¥–µ–ª–∏ Skip-gram –¥–ª—è –≤—ã–±–æ—Ä–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω—ã—Ö —Å–ª–æ–≤ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –º–µ—Ç–æ–¥, –Ω–∞–∑—ã–≤–∞–µ–º—ã–π "skip-gram", –∫–æ—Ç–æ—Ä—ã–π –ø—Ä–µ–¥–ø–æ–ª–∞–≥–∞–µ—Ç, —á—Ç–æ –∫–∞–∂–¥–æ–µ —Å–ª–æ–≤–æ –≤ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏–∏ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞–µ—Ç—Å—è –∫–∞–∫ –∫–æ–Ω—Ç–µ–∫—Å—Ç –¥–ª—è –≤—Å–µ—Ö –æ—Å—Ç–∞–ª—å–Ω—ã—Ö —Å–ª–æ–≤. –í –ø—Ä–æ—Ü–µ—Å—Å–µ –æ–±—É—á–µ–Ω–∏—è –º–æ–¥–µ–ª—å —É—á–∏—Ç—Å—è –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞—Ç—å –±–ª–∏–∂–∞–π—à–∏–µ —Å–ª–æ–≤–∞ –≤–æ–∫—Ä—É–≥ —Ç–µ–∫—É—â–µ–≥–æ —Å–ª–æ–≤–∞, —á—Ç–æ –ø–æ–º–æ–≥–∞–µ—Ç —Å–æ–∑–¥–∞—Ç—å –≤–µ–∫—Ç–æ—Ä–Ω—ã–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏—è —Å–ª–æ–≤, –æ—Ç—Ä–∞–∂–∞—é—â–∏–µ –∏—Ö —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫—É—é –±–ª–∏–∑–æ—Å—Ç—å. –í–∞–∂–Ω–æ, —á—Ç–æ –ø—Ä–∏ —ç—Ç–æ–º —É—á–∏—Ç—ã–≤–∞—é—Ç—Å—è –∫–∞–∫ —Å–ª–æ–≤–∞, –Ω–µ–ø–æ—Å—Ä–µ–¥—Å—Ç–≤–µ–Ω–Ω–æ –æ–∫—Ä—É–∂–∞—é—â–∏–µ —Ç–µ–∫—É—â–µ–µ —Å–ª–æ–≤

 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [00:56<00:35,  8.89s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–≤–∞ —Ñ–æ—Ä–º—É–ª–∞ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ CBOW?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –§–æ—Ä–º—É–ª–∞ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –¥–ª—è –º–æ–¥–µ–ª–∏ CBOW (Continuous Bag of Words) –≤—ã–≥–ª—è–¥–∏—Ç —Å–ª–µ–¥—É—é—â–∏–º –æ–±—Ä–∞–∑–æ–º:

*Q* = *C √ó* ( *D* + *D √ó log* 2 ( *V* )) (5)

–ó–¥–µ—Å—å:
- *C* ‚Äî –º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ–µ —Ä–∞—Å—Å—Ç–æ—è–Ω–∏–µ –º–µ–∂–¥—É —Å–ª–æ–≤–∞–º–∏ (context window size).
- *D* ‚Äî —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å –≤–µ–∫—Ç–æ—Ä–æ–≤ —Å–ª–æ–≤.
- *V* ‚Äî —Ä–∞–∑–º–µ—Ä —Å–ª–æ–≤–∞—Ä—è.

–≠—Ç–∞ —Ñ–æ—Ä–º—É–ª–∞ –æ–ø–∏—Å—ã–≤–∞–µ—Ç –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –æ–ø–µ—Ä–∞—Ü–∏–π, –Ω–µ–æ–±—Ö–æ–¥–∏–º—ã—Ö –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –º–æ–¥–µ–ª–∏, –∏ —É—á–∏—Ç—ã–≤–∞–µ—Ç –∫–∞–∫ —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å –≤–µ–∫—Ç–æ—Ä–æ–≤, —Ç–∞–∫ –∏ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Å–ª–æ–≤ –≤ —Å–ª–æ–≤–∞—Ä–µ.




 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [01:04<00:26,  8.72s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –≤ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ê–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –≤ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö, —á—Ç–æ–±—ã —É–º–µ–Ω—å—à–∏—Ç—å –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω—É—é —Å–ª–æ–∂–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–µ–π. –≠—Ç–æ –ø–æ–∑–≤–æ–ª–∏–ª–æ –∏–º –∏—Å—Å–ª–µ–¥–æ–≤–∞—Ç—å –±–æ–ª–µ–µ –ø—Ä–æ—Å—Ç—ã–µ –º–æ–¥–µ–ª–∏, –∫–æ—Ç–æ—Ä—ã–µ –º–æ–≥—É—Ç –±—ã—Ç—å –æ–±—É—á–µ–Ω—ã –Ω–∞ –±–æ–ª—å—à–∏—Ö –æ–±—ä–µ–º–∞—Ö –¥–∞–Ω–Ω—ã—Ö, —á—Ç–æ –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª—å–Ω–æ —É–ª—É—á—à–∞–µ—Ç —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å –æ–±—É—á–µ–Ω–∏—è –∏ –æ–±—Ä–∞–±–æ—Ç–∫–∏ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏. –í–º–µ—Å—Ç–æ —ç—Ç–æ–≥–æ –æ–Ω–∏ –ø—Ä–∏–º–µ–Ω—è—é—Ç –º–µ—Ç–æ–¥—ã, —Ç–∞–∫–∏–µ –∫–∞–∫ "bag-of-words" –∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ –Ω–µ—Å–∫–æ–ª—å–∫–∏—Ö —Å–ª–æ–≤ –∏–∑ –∏—Å—Ç–æ—Ä–∏–∏ –∏ –±—É–¥—É—â–µ–≥–æ –¥–ª—è –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏ —Ç–µ–∫—É—â–µ–≥–æ —Å–ª–æ

 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [01:14<00:18,  9.23s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∞—Å—å —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∞ –º–æ–¥–µ–ª–µ–π –≤ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω–æ–π —Å–∏—Å—Ç–µ–º–µ DistBelief?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ú–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–Ω–∏–µ —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∏ –º–æ–¥–µ–ª–µ–π –≤ —Å–∏—Å—Ç–µ–º–µ DistBelief –ø—Ä–æ–∏—Å—Ö–æ–¥–∏–ª–æ –∑–∞ —Å—á–µ—Ç —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω—ã—Ö –∑–∞–¥–∞—á –º–µ–∂–¥—É –º–Ω–æ–∂–µ—Å—Ç–≤–æ–º —Ä–µ–ø–ª–∏–∫ (–∫–æ–ø–∏–π) –º–æ–¥–µ–ª–∏. –í —á–∞—Å—Ç–Ω–æ—Å—Ç–∏, –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∞—Å—å –∞—Å–∏–Ω—Ö—Ä–æ–Ω–Ω–∞—è –≥—Ä–∞–¥–∏–µ–Ω—Ç–Ω–∞—è —Å—Ö–µ–º–∞ —Å –º–∏–Ω–∏-–ø–∞–∫–µ—Ç–∞–º–∏ –∏ –∞–¥–∞–ø—Ç–∏–≤–Ω—ã–º –º–µ—Ç–æ–¥–æ–º –æ–±—É—á–µ–Ω–∏—è Adagrad. –ö–∞–∂–¥–∞—è —Ä–µ–ø–ª–∏–∫–∞ —Ä–∞–±–æ—Ç–∞–ª–∞ –Ω–∞ –º–Ω–æ–∂–µ—Å—Ç–≤–µ CPU-—è–¥–µ—Ä, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–ª–æ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å —Ä–µ—Å—É—Ä—Å—ã –∫–ª–∞—Å—Ç–µ—Ä–∞. –í —Å—Ä–µ–¥–Ω–µ–º, –ø—Ä–∏–º–µ–Ω—è–ª–æ—Å—å –æ—Ç 50 –¥–æ 100 —Ä–µ–ø–ª–∏–∫, —á—Ç–æ –æ–±–µ—Å–ø–µ—á–∏–≤–∞–ª–æ –ø–∞—Ä–∞–ª–ª–µ–ª—å–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –Ω–∞ –±–æ–

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [01:24<00:09,  9.25s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –±—ã–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω—ã –≤–æ–ø—Ä–æ—Å—ã –≤ —Ç–µ—Å—Ç–æ–≤–æ–º –Ω–∞–±–æ—Ä–µ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ word vectors?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í–æ–ø—Ä–æ—Å—ã –¥–ª—è —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è word vectors –±—ã–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω—ã –≤ –¥–≤–∞ —ç—Ç–∞–ø–∞. –°–Ω–∞—á–∞–ª–∞ –≤—Ä—É—á–Ω—É—é —Å–æ—Å—Ç–∞–≤–ª—è–ª–∏—Å—å —Å–ø–∏—Å–∫–∏ –ø–æ—Ö–æ–∂–∏—Ö –ø–∞—Ä —Å–ª–æ–≤. –ó–∞—Ç–µ–º, –Ω–∞ –æ—Å–Ω–æ–≤–µ —ç—Ç–∏—Ö —Å–ø–∏—Å–∫–æ–≤, —Å–æ–∑–¥–∞–≤–∞–ª–∏—Å—å –≤–æ–ø—Ä–æ—Å—ã, —Å–æ–µ–¥–∏–Ω—è—è —Å–ª—É—á–∞–π–Ω—ã–µ –ø–∞—Ä—ã —Å–ª–æ–≤. –ù–∞–ø—Ä–∏–º–µ—Ä, –¥–ª—è —Å–æ–∑–¥–∞–Ω–∏—è –≤–æ–ø—Ä–æ—Å–æ–≤ –æ –≥–æ—Ä–æ–¥–∞—Ö –∏ —à—Ç–∞—Ç–∞—Ö –°–®–ê, –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Å–ø–∏—Å–∫–∏ –≥–æ—Ä–æ–¥–æ–≤ –∏ –∏—Ö —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É—é—â–∏—Ö —à—Ç–∞—Ç–æ–≤. –í –∏—Ç–æ–≥–µ, —Ç–µ—Å—Ç–æ–≤—ã–π –Ω–∞–±–æ—Ä –≤–∫–ª—é—á–∞–µ—Ç –≤–æ–ø—Ä–æ—Å—ã, –∫–æ—Ç–æ—Ä—ã–µ –ø—Ä–æ–≤–µ—Ä—è—é—Ç –∫–∞–∫ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏–µ, —Ç–∞–∫ –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏–µ —Å–≤—è–∑–∏ —Å–ª–æ–≤, –∏ –æ—Ö–≤–∞—Ç—ã–≤–∞–µ—Ç —Ç–æ–ª—å–∫–æ –æ–¥–Ω–æ—Å

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:34<00:00,  9.44s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –æ—Ç–≤–µ—Ç–∞ –Ω–∞ –∞–Ω–∞–ª–æ–≥–∏–∏ –≤—Ä–æ–¥–µ ‚Äúking - man + woman = ?‚Äù?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –î–ª—è —Ä–µ—à–µ–Ω–∏—è –∞–Ω–∞–ª–æ–≥–∏–π, —Ç–∞–∫–∏—Ö –∫–∞–∫ "king - man + woman =?", –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –º–µ—Ç–æ–¥, –æ—Å–Ω–æ–≤–∞–Ω–Ω—ã–π –Ω–∞ –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏—è—Ö —Å–ª–æ–≤. –í —ç—Ç–æ–º –º–µ—Ç–æ–¥–µ –∏–∑ –≤–µ–∫—Ç–æ—Ä–Ω–æ–≥–æ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏—è —Å–ª–æ–≤–∞ "King" –≤—ã—á–∏—Ç–∞–µ—Ç—Å—è –≤–µ–∫—Ç–æ—Ä —Å–ª–æ–≤–∞ "Man", –∞ –∑–∞—Ç–µ–º –ø—Ä–∏–±–∞–≤–ª—è–µ—Ç—Å—è –≤–µ–∫—Ç–æ—Ä —Å–ª–æ–≤–∞ "Woman". –ü–æ–ª—É—á–µ–Ω–Ω—ã–π –≤–µ–∫—Ç–æ—Ä —Å—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç—Å—è —Å –≤–µ–∫—Ç–æ—Ä–∞–º–∏ –¥—Ä—É–≥–∏—Ö —Å–ª–æ–≤, –∏ –±–ª–∏–∂–∞–π—à–∏–π –ø–æ —Å–º—ã—Å–ª—É —Å—á–∏—Ç–∞–µ—Ç—Å—è –æ—Ç–≤–µ—Ç–æ–º. –í –¥–∞–Ω–Ω–æ–º —Å–ª—É—á–∞–µ, —ç—Ç–æ –º–æ–∂–µ—Ç –±—ã—Ç—å —Å–ª–æ–≤–æ "Queen". 

–¢–∞–∫–∏–º –æ–±—Ä–∞–∑–æ–º, –∞–ª–≥–µ–±—Ä–∞–∏—á–µ—Å–∫–∏–µ –æ–ø–µ—Ä–∞—Ü–∏–∏ –Ω–∞–¥ –≤–µ–∫—Ç–æ—Ä–∞–º–∏ —Å–ª–æ–≤ –ø–æ–∑–≤–æ–ª—è—é—Ç –≤—ã—è–≤–∏—Ç—å —




In [59]:
times_4 = times

In [60]:
np.mean(times_4)

np.float64(9.327714204788208)

In [61]:
df = pd.read_csv('test_results/article_4_questions.csv')

In [62]:
df['Tbank'] = outputs

In [64]:
df.to_csv('test_results/article_4_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è LADDER: SELF-IMPROVING LLMS THROUGH RECURSIVE PROBLEM DECOMPOSITION

In [65]:
questions = ['–í —á—ë–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –∫–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER?',
             '–ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER?',
             '–ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –≤ LADDER?',
             '–ß—Ç–æ —Ç–∞–∫–æ–µ TTRL –∏ —á–µ–º –æ–Ω –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç LADDER?',
             '–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã –º–æ–¥–µ–ª—å—é Qwen2.5 7B –Ω–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee –ø–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL?',
             '–ö–∞–∫–∞—è —Ñ—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ GRPO –≤ —Ä–∞–º–∫–∞—Ö –æ–±—É—á–µ–Ω–∏—è –≤ LADDER?',
             '–ü–æ—á–µ–º—É RL –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –ø–ª–æ—Ö–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è?',
             '–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏—è –≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã—Ö –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?',
             '–ö–∞–∫ LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ –±–µ–∑ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?',
             '–ö–∞–∫–∏–µ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –∏–ª–∏ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –∑–∞–º–µ—á–µ–Ω—ã –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?'
            ]

In [66]:
arxiv_url = "https://arxiv.org/pdf/2503.00735"
arxiv_id = extract_arxiv_id(arxiv_url)

In [67]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, eos_token_id=terminators)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:07<01:05,  7.30s/it]

–í–æ–ø—Ä–æ—Å: –í —á—ë–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –∫–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ö–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–∏ —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–≥–æ –ø–æ–¥—Ö–æ–¥–∞ –∫ –æ–±—É—á–µ–Ω–∏—é, –≥–¥–µ –º–æ–¥–µ–ª—å —Å–∞–º–∞ –≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç –∏ —Ä–µ—à–∞–µ—Ç —É–ø—Ä–æ—â–µ–Ω–Ω—ã–µ –≤–µ—Ä—Å–∏–∏ —Å–ª–æ–∂–Ω—ã—Ö –∑–∞–¥–∞—á. –≠—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç —Å–æ–∑–¥–∞—Ç—å –ø–æ—Å—Ç–µ–ø–µ–Ω–Ω—ã–π –≥—Ä–∞–¥–∏–µ–Ω—Ç —Å–ª–æ–∂–Ω–æ—Å—Ç–∏, —á—Ç–æ —Å–ø–æ—Å–æ–±—Å—Ç–≤—É–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–º—É –∏ —Å–∞–º–æ—Å—Ç–æ—è—Ç–µ–ª—å–Ω–æ–º—É –ø—Ä–æ—Ü–µ—Å—Å—É –æ–±—É—á–µ–Ω–∏—è, –Ω–µ —Ç—Ä–µ–±—É—é—â–µ–º—É –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω—ã—Ö –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω—ã—Ö —Ä–µ—Å—É—Ä—Å–æ–≤ –∏–ª–∏ –≤–Ω–µ—à–Ω–µ–≥–æ –∫–æ–Ω—Ç—Ä–æ–ª—è.




 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:11<00:46,  5.76s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –î–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è —á–∏—Å–ª–µ–Ω–Ω—ã–π –º–µ—Ç–æ–¥ –ø—Ä–æ–≤–µ—Ä–∫–∏, –∫–æ—Ç–æ—Ä—ã–π —Å–æ—á–µ—Ç–∞–µ—Ç —Ç–æ—á–Ω–æ—Å—Ç—å —Å —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å—é –≤—ã—á–∏—Å–ª–µ–Ω–∏–π. –≠—Ç–æ—Ç –ø—Ä–æ—Ü–µ—Å—Å –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç –Ω–∞–¥–µ–∂–Ω—É—é –ø—Ä–æ–≤–µ—Ä–∫—É –Ω–∞ —Ä–∞–∑–ª–∏—á–Ω—ã—Ö —Ç–∏–ø–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤, —É—á–∏—Ç—ã–≤–∞—è –æ—Å–æ–±–µ–Ω–Ω–æ—Å—Ç–∏ –∏ –≤–æ–∑–º–æ–∂–Ω—ã–µ —á–∏—Å–ª–µ–Ω–Ω—ã–µ –ø—Ä–æ–±–ª–µ–º—ã.




 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:21<00:51,  7.38s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –≤ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í LADDER –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –ø—Ä–æ–∏—Å—Ö–æ–¥–∏—Ç –≤ –¥–≤–∞ —ç—Ç–∞–ø–∞. –ù–∞ –ø–µ—Ä–≤–æ–º —ç—Ç–∞–ø–µ –¥–ª—è –∫–∞–∂–¥–æ–≥–æ –∏–Ω—Ç–µ–≥—Ä–∞–ª–∞ –∏–∑ –æ–±—É—á–∞—é—â–µ–π –≤—ã–±–æ—Ä–∫–∏ —Å–æ–∑–¥–∞—ë—Ç—Å—è –¥–µ—Ä–µ–≤–æ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ (—Å–º. —Ä–∞–∑–¥–µ–ª 3.1.2). –ù–∞ –≤—Ç–æ—Ä–æ–º —ç—Ç–∞–ø–µ –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è –ø—Ä–æ—Ç–æ–∫–æ–ª –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º (—Å–º. —Ä–∞–∑–¥–µ–ª 3.1.4), –≥–¥–µ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã–π –Ω–∞–±–æ—Ä –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∫–∞–∫ –æ–±—É—á–∞—é—â–∏–π –º–∞—Ç–µ—Ä–∏–∞–ª. –≠—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç —É–ª—É—á—à–∏—Ç—å –∏ –æ–±–æ–±—â–∏—Ç—å —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ –∫ –∏–Ω—Ç–µ–≥—Ä–∞—Ü–∏–∏. –í –∏—Ç–æ–≥–µ, –º–æ–¥–µ–ª—å –º–æ–∂–µ—Ç —Ä–µ—à–∞—Ç—å –Ω–æ–≤—ã–µ –∑–∞–¥–∞—á–∏.




 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [00:30<00:49,  8.19s/it]

–í–æ–ø—Ä–æ—Å: –ß—Ç–æ —Ç–∞–∫–æ–µ TTRL –∏ —á–µ–º –æ–Ω –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: TTRL (Test-Time Recursive Learning) ‚Äî —ç—Ç–æ –º–µ—Ç–æ–¥, –∫–æ—Ç–æ—Ä—ã–π —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏ –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö, –ø—Ä–∏–º–µ–Ω—è—è —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–µ —Ä–∞–∑–ª–æ–∂–µ–Ω–∏–µ –∑–∞–¥–∞—á. –í –æ—Ç–ª–∏—á–∏–µ –æ—Ç LADDER, –∫–æ—Ç–æ—Ä—ã–π –∏—Å–ø–æ–ª—å–∑—É–µ—Ç —Å—Ç—Ä—É–∫—Ç—É—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã–π –ø–æ–¥—Ö–æ–¥ –¥–ª—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –∏ —Ä–µ—à–µ–Ω–∏—è –∑–∞–¥–∞—á, TTRL –Ω–µ–ø–æ—Å—Ä–µ–¥—Å—Ç–≤–µ–Ω–Ω–æ –ø—Ä–∏–º–µ–Ω—è–µ—Ç —ç—Ç–æ—Ç –º–µ—Ç–æ–¥ –∫ —Å–∞–º–∏–º —Ç–µ—Å—Ç–æ–≤—ã–º –≤–æ–ø—Ä–æ—Å–∞–º, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ –ª—É—á—à–µ —Å–ø—Ä–∞–≤–ª—è—Ç—å—Å—è —Å —Ç—Ä—É–¥–Ω—ã–º–∏ —Å–ª—É—á–∞—è–º–∏. –í —Ä–µ–∑—É–ª—å—Ç–∞—Ç–µ, TTRL –ø–æ–º–æ–≥–∞–µ—Ç –º–æ–¥–µ–ª–∏, –æ–±—É—á–µ–Ω–Ω–æ–π –≤ —Ä–∞–º–∫–∞—Ö LADDER, –ø–æ–∫–∞–∑—ã–≤–∞—Ç—å –ª—É—á—à–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã, —á–µ–º –±–æ–ª–µ–µ –∫—Ä—É–ø–Ω—ã–µ –º–æ–¥–µ–ª–∏, —Ç–∞–∫–∏–µ –∫–∞–∫ o1

 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [00:35<00:34,  6.86s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã –º–æ–¥–µ–ª—å—é Qwen2.5 7B –Ω–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee –ø–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ù–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee –º–æ–¥–µ–ª—å Qwen2.5 7B, –ø–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL, –¥–æ—Å—Ç–∏–≥–ª–∞ 90% —Ç–æ—á–Ω–æ—Å—Ç–∏, —á—Ç–æ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –ø—Ä–µ–≤—ã—Å–∏–ª–æ —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã–π –ø–æ—Ä–æ–≥ –≤ 73% –∏ —É—Å—Ç–∞–Ω–æ–≤–∏–ª–æ –Ω–æ–≤—ã–π —Ä–µ–∫–æ—Ä–¥ –¥–ª—è –º–æ–¥–µ–ª–µ–π —Ç–∞–∫–æ–≥–æ —Ä–∞–∑–º–µ—Ä–∞.




 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [00:41<00:26,  6.60s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∞—è —Ñ—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ GRPO –≤ —Ä–∞–º–∫–∞—Ö –æ–±—É—á–µ–Ω–∏—è –≤ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í —Ä–∞–º–∫–∞—Ö LADDER –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è —Ñ—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è, –∫–æ—Ç–æ—Ä–∞—è –æ—Å–Ω–æ–≤–∞–Ω–∞ –Ω–∞ —Å—Ä–∞–≤–Ω–µ–Ω–∏–∏ —Ç–µ–∫—É—â–∏—Ö –∏ –ø—Ä–µ–¥—ã–¥—É—â–∏—Ö —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ –º–æ–¥–µ–ª–∏. –í —á–∞—Å—Ç–Ω–æ—Å—Ç–∏, –≤ GRPO –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è –æ—Ü–µ–Ω–∫–∞, –∑–∞–≤–∏—Å—è—â–∞—è –æ—Ç —Ä–∞–∑–Ω–∏—Ü—ã –º–µ–∂–¥—É –Ω–æ–≤—ã–º–∏ –∏ —Å—Ç–∞—Ä—ã–º–∏ –æ—Ç–≤–µ—Ç–∞–º–∏ –≥—Ä—É–ø–ø—ã, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ –æ—Ü–µ–Ω–∏–≤–∞—Ç—å —Å–≤–æ—é —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å –∏ –∫–æ—Ä—Ä–µ–∫—Ç–∏—Ä–æ–≤–∞—Ç—å —Å–µ–±—è –Ω–∞ –æ—Å–Ω–æ–≤–µ —ç—Ç–æ–π –æ–±—Ä–∞—Ç–Ω–æ–π —Å–≤—è–∑–∏.




 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [00:49<00:21,  7.03s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É RL –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –ø–ª–æ—Ö–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ò—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ RL –±–µ–∑ —Å–ø–µ—Ü–∏–∞–ª—å–Ω—ã—Ö –º–µ—Ç–æ–¥–æ–≤ –∏ –ø–æ–¥—Ö–æ–¥–æ–≤ –∫ –∑–∞–¥–∞—á–∞–º –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è, —Ç–∞–∫–∏–º –∫–∞–∫ LADDER, –ø—Ä–∏–≤–æ–¥–∏—Ç –∫ –Ω–∏–∑–∫–∏–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞–º –∏–∑-–∑–∞ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –∏ —Å–ø–µ—Ü–∏—Ñ–∏—á–Ω–æ—Å—Ç–∏ —ç—Ç–∏—Ö –∑–∞–¥–∞—á. –í–∞–∂–Ω–æ, —á—Ç–æ —É—Å–ø–µ—Ö –¥–æ—Å—Ç–∏–≥–∞–µ—Ç—Å—è –∏–º–µ–Ω–Ω–æ –±–ª–∞–≥–æ–¥–∞—Ä—è —Ç—â–∞—Ç–µ–ª—å–Ω–æ —Ä–∞–∑—Ä–∞–±–æ—Ç–∞–Ω–Ω–æ–π —Å—Ç—Ä—É–∫—Ç—É—Ä–µ –∑–∞–¥–∞—á, –∞ –Ω–µ —Ç–æ–ª—å–∫–æ –∞–ª–≥–æ—Ä–∏—Ç–º—É RL. –≠—Ç–æ —É–∫–∞–∑—ã–≤–∞–µ—Ç –Ω–∞ –ø–µ—Ä—Å–ø–µ–∫—Ç–∏–≤—ã –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è –ø–æ–¥–æ–±–Ω—ã—Ö –º–µ—Ç–æ–¥–æ–≤ –≤ –¥—Ä—É–≥–∏—Ö —Å–ª–æ–∂–Ω—ã—Ö –∑–∞–¥–∞—á–∞—Ö, –≥–¥–µ –ø—Ä—è–º–æ–µ –æ–±—É—á–µ–Ω–∏–µ –æ–∫–∞–∑—ã–≤–∞–µ—Ç—Å—è –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã–º.




 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [01:04<00:19,  9.71s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏—è –≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã—Ö –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –î–ª—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏—è –≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã—Ö –∑–∞–¥–∞—á –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–≤–µ –∫–ª—é—á–µ–≤—ã–µ —Ç–µ—Ö–Ω–∏–∫–∏:

1. **–¢–µ–º–ø–µ—Ä–∞—Ç—É—Ä–Ω–æ–µ –∏–∑–º–µ–Ω–µ–Ω–∏–µ (Temperature Cycling):** –í –ø—Ä–æ—Ü–µ—Å—Å–µ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –ø—Ä–∏–º–µ–Ω—è–ª—Å—è –¥–∏–Ω–∞–º–∏—á–µ—Å–∫–∏–π –¥–∏–∞–ø–∞–∑–æ–Ω —Ç–µ–º–ø–µ—Ä–∞—Ç—É—Ä –æ—Ç 0.8 –¥–æ 1.4. –≠—Ç–æ –ø–æ–∑–≤–æ–ª–∏–ª–æ —É–ø—Ä–∞–≤–ª—è—Ç—å –±–∞–ª–∞–Ω—Å–æ–º –º–µ–∂–¥—É –Ω–æ–≤–∏–∑–Ω–æ–π –∏ –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–æ–π –∫–æ—Ä—Ä–µ–∫—Ç–Ω–æ—Å—Ç—å—é.

2. **–ü–µ—Ä—Å–æ–Ω–∏—Ñ–∏—Ü–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –∑–∞–ø—Ä–æ—Å—ã (Persona-based Prompting):** –ú–æ–¥–µ–ª—å –ø–æ–ª—É—á–∞–ª–∞ –∏–Ω—Å—Ç—Ä—É–∫—Ü–∏–∏, –∫–æ—Ç–æ—Ä—ã–µ —Ç—Ä–µ–±–æ–≤–∞–ª–∏ –æ—Ç –Ω–µ—ë —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—Ç—å –∑–∞–¥–∞—á–∏ —Å —Ä–∞–∑–Ω—ã—Ö —Ç

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [01:12<00:09,  9.20s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ –±–µ–∑ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ, –∏—Å–ø–æ–ª—å–∑—É—è –º–µ—Ç–æ–¥—ã —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–≥–æ —Ä–∞–∑–ª–æ–∂–µ–Ω–∏—è –∑–∞–¥–∞—á –∏ –æ–±—É—á–µ–Ω–∏—è –Ω–∞ –æ—Å–Ω–æ–≤–µ –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏—è. –ú–æ–¥–µ–ª—å —Ä–∞–∑–±–∏–≤–∞–µ—Ç —Å–ª–æ–∂–Ω—ã–µ –ø—Ä–æ–±–ª–µ–º—ã –Ω–∞ –±–æ–ª–µ–µ –ø—Ä–æ—Å—Ç—ã–µ —á–∞—Å—Ç–∏, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –µ–π –∞–¥–∞–ø—Ç–∏—Ä–æ–≤–∞—Ç—å—Å—è –∏ —Å–æ–≤–µ—Ä—à–µ–Ω—Å—Ç–≤–æ–≤–∞—Ç—å—Å—è –≤ –ø—Ä–æ—Ü–µ—Å—Å–µ –≤—ã–ø–æ–ª–Ω–µ–Ω–∏—è –∑–∞–¥–∞—á, –Ω–µ —Ç—Ä–µ–±—É—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ —Å–∞–º–æ–π –º–æ–¥–µ–ª–∏ –∏–ª–∏ –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö. –≠—Ç–æ —Å–ø–æ—Å–æ–±—Å—Ç–≤—É–µ—Ç –¥–∏–Ω–∞–º–∏—á–µ—Å–∫–æ–º—É —Ä–∞–∑–≤–∏—Ç–∏—é –Ω–∞–≤—ã–∫–æ–≤ –∏ –ø–æ–≤—ã—à–µ–Ω–∏—é —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ –±–µ–∑ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ—Å—Ç–∏ –≤ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ–º —Ä–æ—Å—Ç–µ —Ä–µ—Å—É—Ä—Å–æ–≤.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:20<00:00,  8.02s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –∏–ª–∏ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –∑–∞–º–µ—á–µ–Ω—ã –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –û—Å–Ω–æ–≤–Ω—ã–µ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –∑–∞–¥–∞—á —Å–≤—è–∑–∞–Ω—ã —Å —Ç–µ–º, —á—Ç–æ –Ω–µ–±–æ–ª—å—à–∏–µ –∏–∑–º–µ–Ω–µ–Ω–∏—è –≤ –∫–æ—ç—Ñ—Ñ–∏—Ü–∏–µ–Ω—Ç–∞—Ö –∏–ª–∏ —Å—Ç—Ä—É–∫—Ç—É—Ä–µ —Ñ—É–Ω–∫—Ü–∏–π –º–æ–≥—É—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –ø–æ–≤—ã—Å–∏—Ç—å —Å–ª–æ–∂–Ω–æ—Å—Ç—å –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤. –ù–∞–ø—Ä–∏–º–µ—Ä, –∏–∑–º–µ–Ω–µ–Ω–∏–µ –∫–æ—ç—Ñ—Ñ–∏—Ü–∏–µ–Ω—Ç–∞ –≤ —Ä–∞—Ü–∏–æ–Ω–∞–ª—å–Ω–æ–π —Ñ—É–Ω–∫—Ü–∏–∏ –º–æ–∂–µ—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ –ø–æ—è–≤–ª–µ–Ω–∏—é –∫–æ—Ä–Ω–µ–π, –∫–æ—Ç–æ—Ä—ã–µ —É—Å–ª–æ–∂–Ω—è—é—Ç –≤—ã—á–∏—Å–ª–µ–Ω–∏—è. –≠—Ç–æ –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç, —á—Ç–æ –¥–∞–∂–µ –Ω–µ–∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω—ã–µ –º–æ–¥–∏—Ñ–∏–∫–∞—Ü–∏–∏ –º–æ–≥—É—Ç —Å–∏–ª—å–Ω–æ –≤–ª–∏—è—Ç—å –Ω–∞ —É—Ä–æ–≤–µ–Ω—å —Ç—Ä—É–¥–Ω–æ—Å—Ç–∏ –∑–∞–¥–∞—á–∏.







In [68]:
times_5 = times

In [69]:
np.mean(times_5)

np.float64(7.925498390197754)

In [70]:
df = pd.read_csv('test_results/article_5_questions.csv')

In [71]:
df['Tbank'] = outputs

In [73]:
df.to_csv('test_results/article_5_questions.csv', index=False)

In [15]:
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–∞ Tbank: 8.5 —Å–µ–∫')

–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–∞ Tbank: 8.5 —Å–µ–∫


### YandexGPT 5

In [5]:
model_name = "yandex/YandexGPT-5-Lite-8B-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
quant_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quant_config, device_map="auto")

llm = pipeline("text-generation", model=model, tokenizer=tokenizer, return_full_text=False)

embedding_model = HuggingFaceBgeEmbeddings(
        model_name="BAAI/bge-m3",
        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
    )

reranker = FlagReranker("BAAI/bge-reranker-v2-m3", use_fp16=True)

vectorstore = Chroma(persist_directory="test_chroma_db", embedding_function=embedding_model)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0
  embedding_model = HuggingFaceBgeEmbeddings(
  vectorstore = Chroma(persist_directory="test_chroma_db", embedding_function=embedding_model)


In [6]:
SYSTEM_MSG = (
    "–¢—ã ‚Äî –ø–æ–º–æ—â–Ω–∏–∫ –ø–æ –Ω–∞—É—á–Ω—ã–º —Å—Ç–∞—Ç—å—è–º. "
    "–¢–≤–æ—è –∑–∞–¥–∞—á–∞ ‚Äî –¥–∞—Ç—å –∫–æ—Ä–æ—Ç–∫–∏–π, —Ç–æ—á–Ω—ã–π –∏ –æ–¥–Ω–æ–∑–Ω–∞—á–Ω—ã–π –æ—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å, "
    "–∏—Å–ø–æ–ª—å–∑—É—è —Ç–æ–ª—å–∫–æ –ø—Ä–∏–≤–µ–¥—ë–Ω–Ω—ã–π –∫–æ–Ω—Ç–µ–∫—Å—Ç. "
    "–ù–µ–ª—å–∑—è –ø—Ä–æ–¥–æ–ª–∂–∞—Ç—å –¥–∏–∞–ª–æ–≥, –∑–∞–¥–∞–≤–∞—Ç—å –≤—Å—Ç—Ä–µ—á–Ω—ã–µ –≤–æ–ø—Ä–æ—Å—ã –∏–ª–∏ –ø–æ–≤—Ç–æ—Ä—è—Ç—å—Å—è. –û—Ç–≤–µ—á–∞–π —Å—Ç—Ä–æ–≥–æ –Ω–∞ —Ä—É—Å—Å–∫–æ–º —è–∑—ã–∫–µ"
)


def build_messages(question: str, context: str) -> List[dict]:
    """–§–æ—Ä–º–∏—Ä—É–µ—Ç —Å–æ–æ–±—â–µ–Ω–∏—è –≤ –Ω—É–∂–Ω–æ–º –¥–ª—è chat_template —Ñ–æ—Ä–º–∞—Ç–µ."""
    return [
        {"role": "user", "content": SYSTEM_MSG},
        {"role": "user", "content": f"–í–æ–ø—Ä–æ—Å:\n{question}"},
        {"role": "user", "content": f"–ö–æ–Ω—Ç–µ–∫—Å—Ç:\n{context}"},
    ]

#### Attention is all you need

In [9]:
questions = ['–ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?',
            '–ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?',
             '–ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?',
             '–ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?',
             '–ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –æ–¥–Ω–æ–π –≥–æ–ª–æ–≤–æ–π –≤–Ω–∏–º–∞–Ω–∏—è?',
             '–ö–∞–∫–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –∏ –ø–æ—á–µ–º—É –±—ã–ª–∏ –≤—ã–±—Ä–∞–Ω—ã —Å–∏–Ω—É—Å–æ–∏–¥—ã?',
             '–í —á–µ–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º–∏ –∏ —Å–≤–µ—Ä—Ç–æ—á–Ω—ã–º–∏ —Å–ª–æ—è–º–∏ –≤ –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –∏ –¥–ª–∏–Ω—ã –ø—É—Ç–∏ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–µ–π?',
             '–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer?',
             '–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Transformer –ø–æ–∫–∞–∑–∞–ª –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíDE –∏ EN‚ÜíFR –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–∏–º–∏ SOTA?',
             '–ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—é –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏?'
            ]

In [10]:
arxiv_url = "https://arxiv.org/abs/1706.03762"
arxiv_id = extract_arxiv_id(arxiv_url)

In [13]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, top_p=None, temperature=None)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                             | 0/10 [00:00<?, ?it/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 1/10 [00:03<00:32,  3.66s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ú—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏ –æ–ø—Ç–∏–º–∏–∑–∞—Ç–æ—Ä Adam —Å –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º–∏ Œ≤1 = 0,9, Œ≤2 = 0,98 –∏ œµ = 10^[-9]^.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 2/10 [00:06<00:24,  3.05s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  Transformer –∑–∞–º–µ–Ω—è–µ—Ç —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏, –Ω–∞–∏–±–æ–ª–µ–µ —á–∞—Å—Ç–æ –∏—Å–ø–æ–ª—å–∑—É–µ–º—ã–µ –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö encoder-decoder, –Ω–∞ –º–µ—Ö–∞–Ω–∏–∑–º multi-headed self-attention.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 3/10 [00:15<00:40,  5.78s/it]


–í–æ–ø—Ä–æ—Å: –ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  Scaled Dot-Product Attention ‚Äî —ç—Ç–æ –º–µ—Ç–æ–¥ –≤–Ω–∏–º–∞–Ω–∏—è, –ø—Ä–∏ –∫–æ—Ç–æ—Ä–æ–º –≤—Ö–æ–¥–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ —Å–æ—Å—Ç–æ—è—Ç –∏–∑ –∑–∞–ø—Ä–æ—Å–æ–≤ –∏ –∫–ª—é—á–µ–π —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ \(d_k\), –∏ –∑–Ω–∞—á–µ–Ω–∏–π —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ \(d_v\). –°–Ω–∞—á–∞–ª–∞ –≤—ã—á–∏—Å–ª—è—é—Ç—Å—è —Å–∫–∞–ª—è—Ä–Ω—ã–µ –ø—Ä–æ–∏–∑–≤–µ–¥–µ–Ω–∏—è –∑–∞–ø—Ä–æ—Å–∞ —Å–æ –≤—Å–µ–º–∏ –∫–ª—é—á–∞–º–∏, –∑–∞—Ç–µ–º –∫–∞–∂–¥–æ–µ –ø—Ä–æ–∏–∑–≤–µ–¥–µ–Ω–∏–µ –¥–µ–ª–∏—Ç—Å—è –Ω–∞ \(\sqrt{d_k}\), –ø–æ—Å–ª–µ —á–µ–≥–æ –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è —Ñ—É–Ω–∫—Ü–∏—è softmax –¥–ª—è –ø–æ–ª—É—á–µ–Ω–∏—è –≤–µ—Å–æ–≤ –∑–Ω–∞—á–µ–Ω–∏–π. –î–µ–ª–µ–Ω–∏–µ –Ω–∞ \(\sqrt{d_k}\) –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏–∏ –∏ —Å—Ç–∞–±–∏–ª–∏–∑–∞—Ü–∏–∏ –æ–±—É—á–µ–Ω–∏—è.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 4/10 [00:21<00:36,  6.04s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –°—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ stacked self-attention –∏ point-wise, fully connected layers. Encoder –ø—Ä–µ–æ–±—Ä–∞–∑—É–µ—Ç –≤—Ö–æ–¥–Ω—É—é –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç—å —Å–∏–º–≤–æ–ª—å–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π –≤ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç—å –Ω–µ–ø—Ä–µ—Ä—ã–≤–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π. Decoder –≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç –≤—ã—Ö–æ–¥–Ω—É—é –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç—å —Å–∏–º–≤–æ–ª–æ–≤, –∏—Å–ø–æ–ª—å–∑—É—è –ø–æ–ª—É—á–µ–Ω–Ω—ã–µ –æ—Ç encoder –¥–∞–Ω–Ω—ã–µ –∏ –ø–æ—Ç—Ä–µ–±–ª—è—è —Ä–∞–Ω–µ–µ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ —Å–∏–º–≤–æ–ª—ã –∫–∞–∫ –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã–π –≤—Ö–æ–¥ –¥–ª—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ —Å–ª–µ–¥—É—é—â–µ–≥–æ —ç–ª–µ–º–µ–Ω—Ç–∞.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 5/10 [00:24<00:24,  4.89s/it]


–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –æ–¥–Ω–æ–π –≥–æ–ª–æ–≤–æ–π –≤–Ω–∏–º–∞–Ω–∏—è?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  Multi-head attention –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ —Å–æ–≤–º–µ—Å—Ç–Ω–æ —É—á–∏—Ç—ã–≤–∞—Ç—å –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –∏–∑ —Ä–∞–∑–Ω—ã—Ö –ø–æ–¥–ø—Ä–æ—Å—Ç—Ä–∞–Ω—Å—Ç–≤ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π –≤ —Ä–∞–∑–Ω—ã—Ö –ø–æ–∑–∏—Ü–∏—è—Ö. –° –æ–¥–Ω–æ–π –≥–æ–ª–æ–≤–æ–π –≤–Ω–∏–º–∞–Ω–∏—è —É—Å—Ä–µ–¥–Ω–µ–Ω–∏–µ —ç—Ç–æ –∏–Ω–≥–∏–±–∏—Ä—É–µ—Ç.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 6/10 [00:31<00:22,  5.56s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –∏ –ø–æ—á–µ–º—É –±—ã–ª–∏ –≤—ã–±—Ä–∞–Ω—ã —Å–∏–Ω—É—Å–æ–∏–¥—ã?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç —Ñ—É–Ω–∫—Ü–∏—é –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏—è –ø–æ–∑–∏—Ü–∏–∏ –≤ –≤–∏–¥–µ —Å–∏–Ω—É—Å–æ–∏–¥. –û–Ω–∏ –ø–æ–∑–≤–æ–ª—è—é—Ç –º–æ–¥–µ–ª–∏ –ª–µ–≥–∫–æ –Ω–∞—É—á–∏—Ç—å—Å—è —É—á–∏—Ç—ã–≤–∞—Ç—å –æ—Ç–Ω–æ—Å–∏—Ç–µ–ª—å–Ω—ã–µ –ø–æ–∑–∏—Ü–∏–∏, –ø–æ—Å–∫–æ–ª—å–∫—É –¥–ª—è –ª—é–±–æ–≥–æ —Ñ–∏–∫—Å–∏—Ä–æ–≤–∞–Ω–Ω–æ–≥–æ —Å–º–µ—â–µ–Ω–∏—è k, PE(pos + k) –º–æ–∂–Ω–æ –ø—Ä–µ–¥—Å—Ç–∞–≤–∏—Ç—å –∫–∞–∫ –ª–∏–Ω–µ–π–Ω—É—é —Ñ—É–Ω–∫—Ü–∏—é –æ—Ç PE(pos). –°–∏–Ω—É—Å–æ–∏–¥—ã –±—ã–ª–∏ –≤—ã–±—Ä–∞–Ω—ã –∏–∑-–∑–∞ –≥–∏–ø–æ—Ç–µ–∑—ã, —á—Ç–æ –æ–Ω–∏ –ø–æ–º–æ–≥—É—Ç –º–æ–¥–µ–ª–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ —Ä–∞–±–æ—Ç–∞—Ç—å —Å –æ—Ç–Ω–æ—Å–∏—Ç–µ–ª—å–Ω—ã–º–∏ –ø–æ–∑–∏—Ü–∏—è–º–∏.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 7/10 [00:37<00:17,  5.72s/it]


–í–æ–ø—Ä–æ—Å: –í —á–µ–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º–∏ –∏ —Å–≤–µ—Ä—Ç–æ—á–Ω—ã–º–∏ —Å–ª–æ—è–º–∏ –≤ –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –∏ –¥–ª–∏–Ω—ã –ø—É—Ç–∏ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–µ–π?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –í –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ —Å–ª–æ–π self-attention –±—ã—Å—Ç—Ä–µ–µ —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã—Ö —Å–ª–æ—ë–≤, –∫–æ–≥–¥–∞ –¥–ª–∏–Ω–∞ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ $n$ –º–µ–Ω—å—à–µ —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏—è $d$. –°–∞–º–æ–≤–Ω–∏–º–∞–Ω–∏–µ —Å–æ–µ–¥–∏–Ω—è–µ—Ç –≤—Å–µ –ø–æ–∑–∏—Ü–∏–∏ —Å –ø–æ—Å—Ç–æ—è–Ω–Ω—ã–º —á–∏—Å–ª–æ–º –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ –≤—ã–ø–æ–ª–Ω—è–µ–º—ã—Ö –æ–ø–µ—Ä–∞—Ü–∏–π, –≤ —Ç–æ –≤—Ä–µ–º—è –∫–∞–∫ —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–π —Å–ª–æ–π —Ç—Ä–µ–±—É–µ—Ç $O(n)$ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω—ã—Ö –æ–ø–µ—Ä–∞—Ü–∏–π.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [00:38<00:08,  4.25s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ü—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Ç—Ä–∏ —Ç–∏–ø–∞ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [00:43<00:04,  4.52s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Transformer –ø–æ–∫–∞–∑–∞–ª –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíDE –∏ EN‚ÜíFR –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–∏–º–∏ SOTA?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ù–∞ –∑–∞–¥–∞—á–µ –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíFR –ø–æ WMT 2014 –º–æ–¥–µ–ª—å Transformer (big) –¥–æ—Å—Ç–∏–≥–ª–∞ –ø–æ–∫–∞–∑–∞—Ç–µ–ª—è BLEU 41,0, –ø—Ä–µ–≤–∑–æ–π–¥—è –≤—Å–µ —Ä–∞–Ω–µ–µ –æ–ø—É–±–ª–∏–∫–æ–≤–∞–Ω–Ω—ã–µ –æ–¥–∏–Ω–æ—á–Ω—ã–µ –º–æ–¥–µ–ª–∏ –ø—Ä–∏ –º–µ–Ω—å—à–µ–π –≤ —á–µ—Ç—ã—Ä–µ —Ä–∞–∑–∞ —Å—Ç–æ–∏–º–æ—Å—Ç–∏ –æ–±—É—á–µ–Ω–∏—è –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–µ–π –ø–µ—Ä–µ–¥–æ–≤–æ–π –º–æ–¥–µ–ª—å—é.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:47<00:00,  4.74s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—é –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –±–æ–ª—å—à–µ –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏–∏, –ø–æ—Å–∫–æ–ª—å–∫—É –ø–æ–ª–Ω–æ—Å—Ç—å—é –ø–æ–ª–∞–≥–∞–µ—Ç—Å—è –Ω–∞ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏–º–∞–Ω–∏—è –¥–ª—è –≤—ã—è–≤–ª–µ–Ω–∏—è –≥–ª–æ–±–∞–ª—å–Ω—ã—Ö –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–µ–π –º–µ–∂–¥—É –≤—Ö–æ–¥–Ω—ã–º–∏ –∏ –≤—ã—Ö–æ–¥–Ω—ã–º–∏ –¥–∞–Ω–Ω—ã–º–∏, –∏–∑–±–µ–≥–∞—è –ø—Ä–∏ —ç—Ç–æ–º —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã—Ö —Å–≤—è–∑–µ–π.







In [16]:
times_1 = times

In [17]:
np.mean(times_1)

np.float64(4.636782193183899)

In [18]:
df = pd.read_csv('test_results/article_1_questions.csv')

In [20]:
df['YandexGPT'] = outputs

In [22]:
df.to_csv('test_results/article_1_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è TTRL

In [23]:
questions = ['–í —á–µ–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –æ—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL)?',
             '–ö–∞–∫–∏–º –±—Ä–∞–∑–æ–º –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –æ—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º?',
             '–ö–∞–∫—É—é —Ä–æ–ª—å –∏–≥—Ä–∞–µ—Ç –º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ (majority voting) –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL?',
             '–ö–∞–∫–∏–µ —É–ª—É—á—à–µ–Ω–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã —Å –ø–æ–º–æ—â—å—é TTRL –Ω–∞ –±–µ–Ω—á–º–∞—Ä–∫–µ AIME 2024?',
             '–ö–∞–∫ TTRL —Å—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç—Å—è —Å RL –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –ø–æ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏?',
             '–ü–æ—á–µ–º—É TTRL —Å–ø–æ—Å–æ–±–µ–Ω —Ä–∞–±–æ—Ç–∞—Ç—å –¥–∞–∂–µ –ø—Ä–∏ –Ω–µ—Ç–æ—á–Ω—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö –º–µ—Ç–æ–∫?',
             '–ö–∞–∫–∏–µ –º–æ–¥–µ–ª–∏ –∏ –±–µ–Ω—á–º–∞—Ä–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è –æ—Ü–µ–Ω–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?',
             '–ö–∞–∫–∏–µ —Ñ–∞–∫—Ç–æ—Ä—ã –º–æ–≥—É—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ —Å–±–æ—é –∏–ª–∏ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?',
             '–ö–∞–∫ TTRL –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?',
             '–° –∫–∞–∫–∏–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å–æ–≤–º–µ—Å—Ç–∏–º TTRL –∏ –∫–∞–∫–∏–µ –∏–∑ –Ω–∏—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö?'
            ]

In [24]:
arxiv_url = "https://arxiv.org/pdf/2504.16084"
arxiv_id = extract_arxiv_id(arxiv_url)

In [25]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, top_p=None, temperature=None)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                             | 0/10 [00:00<?, ?it/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 1/10 [00:06<00:55,  6.20s/it]


–í–æ–ø—Ä–æ—Å: –í —á–µ–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –æ—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL)?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –û—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL) –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–µ –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLMs) —Å –ø–æ–º–æ—â—å—é RL –Ω–∞ –Ω–µ–º–∞—Ä–∫–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö. TTRL –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª—è–º ¬´—Å–∞–º–æ—ç–≤–æ–ª—é—Ü–∏–æ–Ω–∏—Ä–æ–≤–∞—Ç—å¬ª, –∏—Å–ø–æ–ª—å–∑—É—è –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω—ã–µ –∑–Ω–∞–Ω–∏—è, –∑–∞–ª–æ–∂–µ–Ω–Ω—ã–µ –≤ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—É—á–µ–Ω–Ω—ã—Ö –º–æ–¥–µ–ª—è—Ö.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 2/10 [00:10<00:39,  4.97s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–º –±—Ä–∞–∑–æ–º –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –æ—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –û—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å –ø–æ–º–æ—â—å—é —Ä–∞—Å–ø—Ä–æ—Å—Ç—Ä–∞–Ω—ë–Ω–Ω—ã—Ö –ø—Ä–∞–∫—Ç–∏–∫ –≤ Test-Time Scaling (TTS), —Ç–∞–∫–∏—Ö –∫–∞–∫ majority voting (–º–∞–∂–æ—Ä–∏—Ç–∞—Ä–Ω–æ–µ –≥–æ–ª–æ—Å–æ–≤–∞–Ω–∏–µ).




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 3/10 [00:13<00:30,  4.36s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫—É—é —Ä–æ–ª—å –∏–≥—Ä–∞–µ—Ç –º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ (majority voting) –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ú–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ (majority voting) –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL –∏–≥—Ä–∞–µ—Ç –∫–ª—é—á–µ–≤—É—é —Ä–æ–ª—å, –ø–æ—Å–∫–æ–ª—å–∫—É –≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç –æ—Å–Ω–æ–≤–∞–Ω–Ω—ã–µ –Ω–∞ –ø—Ä–∞–≤–∏–ª–∞—Ö –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –Ω–∞ –æ—Å–Ω–æ–≤–µ –∫–æ–Ω—Å–µ–Ω—Å—É—Å–∞ —Å—Ä–µ–¥–∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –º–æ–¥–µ–ª–∏.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 4/10 [00:20<00:31,  5.31s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —É–ª—É—á—à–µ–Ω–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã —Å –ø–æ–º–æ—â—å—é TTRL –Ω–∞ –±–µ–Ω—á–º–∞—Ä–∫–µ AIME 2024?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ü—Ä–∏–º–µ–Ω–µ–Ω–∏–µ TTRL –∫ Qwen2.5-Math-7B –ø—Ä–∏–≤–µ–ª–æ –∫ —É–ª—É—á—à–µ–Ω–∏—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –Ω–∞ AIME 2024 –Ω–∞ 159% (—Å 13,3 –¥–æ 43,3), —Å–æ —Å—Ä–µ–¥–Ω–∏–º –ø—Ä–∏—Ä–æ—Å—Ç–æ–º –≤ 84% –ø–æ AMC, AIME –∏ MATH-500.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 5/10 [00:23<00:21,  4.34s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫ TTRL —Å—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç—Å—è —Å RL –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –ø–æ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ü–æ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞–º –æ—Ü–µ–Ω–∫–∏ –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö, –∫—Ä–∏–≤–∞—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ TTRL –±–ª–∏–∑–∫–æ –ø—Ä–∏–±–ª–∏–∂–∞–µ—Ç—Å—è –∫ –∫—Ä–∏–≤–æ–π RL (leakage).




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 6/10 [00:28<00:18,  4.67s/it]


–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É TTRL —Å–ø–æ—Å–æ–±–µ–Ω —Ä–∞–±–æ—Ç–∞—Ç—å –¥–∞–∂–µ –ø—Ä–∏ –Ω–µ—Ç–æ—á–Ω—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö –º–µ—Ç–æ–∫?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  TTRL –æ—Å—Ç–∞—ë—Ç—Å—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã–º –¥–∞–∂–µ –ø—Ä–∏ –Ω–µ—Ç–æ—á–Ω—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö –º–µ—Ç–æ–∫, –ø–æ—Ç–æ–º—É —á—Ç–æ –Ω–∞–≥—Ä–∞–¥—ã –≤ RL –Ω–∞–∑–Ω–∞—á–∞—é—Ç—Å—è –Ω–∞ –æ—Å–Ω–æ–≤–µ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏—è –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω–æ–≥–æ –æ—Ç–≤–µ—Ç–∞ ¬´–º–µ—Ç–∫–µ¬ª. –°–∏—Å—Ç–µ–º–∞ –º–æ–∂–µ—Ç –Ω–∞–∑–Ω–∞—á–∏—Ç—å –∫–æ—Ä—Ä–µ–∫—Ç–Ω–æ–µ ¬´–æ—Ç—Ä–∏—Ü–∞—Ç–µ–ª—å–Ω–æ–µ¬ª –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏–µ, –µ—Å–ª–∏ –ø—Ä–µ–¥–ø–æ–ª–∞–≥–∞–µ–º–∞—è –º–µ—Ç–∫–∞ –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç –Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω–æ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω–æ–≥–æ –æ—Ç–≤–µ—Ç–∞.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 7/10 [00:32<00:13,  4.56s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –º–æ–¥–µ–ª–∏ –∏ –±–µ–Ω—á–º–∞—Ä–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è –æ—Ü–µ–Ω–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –î–ª—è –æ—Ü–µ–Ω–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –º–æ–¥–µ–ª–∏ Qwen2.5-Math-1.5B, Qwen2.5-Math-7B –∏ LLaMA-3.1-8B-Instruct.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [00:36<00:08,  4.12s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ñ–∞–∫—Ç–æ—Ä—ã –º–æ–≥—É—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ —Å–±–æ—é –∏–ª–∏ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –§–∞–∫—Ç–æ—Ä–æ–º, –∫–æ—Ç–æ—Ä—ã–π –º–æ–∂–µ—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ —Å–±–æ—é –∏–ª–∏ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL, —è–≤–ª—è–µ—Ç—Å—è –Ω–µ–¥–æ—Å—Ç–∞—Ç–æ–∫ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω—ã—Ö –∑–Ω–∞–Ω–∏–π –º–æ–¥–µ–ª–∏ –¥–ª—è –≤—ã–ø–æ–ª–Ω–µ–Ω–∏—è —Ü–µ–ª–µ–≤–æ–π –∑–∞–¥–∞—á–∏ –∏–∑-–∑–∞ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –¥–∞–Ω–Ω—ã—Ö.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [00:42<00:04,  4.72s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫ TTRL –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  TTRL –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è: —Å —É–≤–µ–ª–∏—á–µ–Ω–∏–µ–º —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏ (–æ—Ç 1,5B –¥–æ 7B) –Ω–∞–±–ª—é–¥–∞–µ—Ç—Å—è –ø—Ä–∏—Ä–æ—Å—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –Ω–∞ AIME 2024 –∏ AMC. –ë–æ–ª—å—à–∏–µ –º–æ–¥–µ–ª–∏ –º–æ–≥—É—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç—å –±–æ–ª–µ–µ —Ç–æ—á–Ω—ã–µ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –∑–∞ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–æ –≥–æ–ª–æ—Å–æ–≤ –≤–æ –≤—Ä–µ–º—è —Å–∞–º–æ—Å–æ–≤–µ—Ä—à–µ–Ω—Å—Ç–≤–æ–≤–∞–Ω–∏—è, —á—Ç–æ –ø—Ä–∏–≤–æ–¥–∏—Ç –∫ –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–º—É –æ–±—É—á–µ–Ω–∏—é –Ω–∞ –Ω–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:45<00:00,  4.53s/it]

–í–æ–ø—Ä–æ—Å: –° –∫–∞–∫–∏–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å–æ–≤–º–µ—Å—Ç–∏–º TTRL –∏ –∫–∞–∫–∏–µ –∏–∑ –Ω–∏—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  TTRL —Å–æ–≤–º–µ—Å—Ç–∏–º —Å —Ä–∞–∑–Ω—ã–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º. –í —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è –∞–ª–≥–æ—Ä–∏—Ç–º PPO (Schulman et al., 2017).







In [26]:
times_2 = times

In [27]:
np.mean(times_2)

np.float64(4.416370105743408)

In [28]:
df = pd.read_csv('test_results/article_2_questions.csv')

In [29]:
df['YandexGPT'] = outputs

In [31]:
df.to_csv('test_results/article_2_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks

In [32]:
questions = ['–ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –≤ RAG?',
             '–ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –∏ –∫–∞–∫ –æ–Ω–∞ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ—Ç—Å—è?',
             '–ß–µ–º –æ—Ç–ª–∏—á–∞—é—Ç—Å—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã RAG-Token –∏ RAG-Sequence?',
             '–ö–∞–∫–æ–π –æ–±—ä–µ–º –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –≤–µ–∫—Ç–æ—Ä–Ω–æ–º –∏–Ω–¥–µ–∫—Å–µ Wikipedia?',
             '–ö–∞–∫–∏–µ –∑–∞–¥–∞—á–∏ –±—ã–ª–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω—ã –¥–ª—è –æ—Ü–µ–Ω–∫–∏ RAG-–º–æ–¥–µ–ª–µ–π?',
             '–ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ–º –ø—Ä–∏ –æ—Ç–≤–µ—Ç–µ –Ω–∞ –≤–æ–ø—Ä–æ—Å—ã?',
             '–ö–∞–∫—É—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ–∫–∞–∑–∞–ª RAG –Ω–∞ –∑–∞–¥–∞—á–µ Jeopardy Question Generation –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å BART?',
             '–ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å–æ–≤–º–µ—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG?',
             '–ö–∞–∫–æ–π —ç—Ñ—Ñ–µ–∫—Ç –¥–∞—ë—Ç "hot-swapping" –∏–Ω–¥–µ–∫—Å–∞ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ RAG?',
             '–ö–∞–∫–∏–µ –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –ø—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –≤ RAG-Sequence –∏ RAG-Token?'
            ]

In [33]:
arxiv_url = "https://arxiv.org/pdf/2005.11401"
arxiv_id = extract_arxiv_id(arxiv_url)

In [34]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, top_p=None, temperature=None)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                             | 0/10 [00:00<?, ?it/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 1/10 [00:03<00:29,  3.32s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –≤ RAG?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –í RAG –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä $p_{\theta} (y_i|x, z, y_{1:i‚àí1})$.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 2/10 [00:06<00:25,  3.15s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –∏ –∫–∞–∫ –æ–Ω–∞ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ—Ç—Å—è?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –í RAG –≤ –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è —Ä–µ—Ç—Ä–∏–≤–µ—Ä –∏–∑ DPR, –∫–æ—Ç–æ—Ä—ã–π –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ—Ç—Å—è —Å –ø–æ–º–æ—â—å—é –∫–æ–Ω—Ç—Ä–æ–ª—è –∏–∑–≤–ª–µ—á–µ–Ω–∏—è –Ω–∞ Natural Questions –∏ TriviaQA.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 3/10 [00:13<00:35,  5.13s/it]


–í–æ–ø—Ä–æ—Å: –ß–µ–º –æ—Ç–ª–∏—á–∞—é—Ç—Å—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã RAG-Token –∏ RAG-Sequence?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  RAG-Token –º–æ–∂–Ω–æ —Ä–∞—Å—Å–º–∞—Ç—Ä–∏–≤–∞—Ç—å –∫–∞–∫ —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã–π –∞–≤—Ç–æ—Ä–µ–≥—Ä–µ—Å—Å–∏–æ–Ω–Ω—ã–π seq2seq –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä. –í —Ç–æ –≤—Ä–µ–º—è –∫–∞–∫ –¥–ª—è RAG-Sequence –∏ RAG-Token —Ç—Ä–µ–±—É—é—Ç—Å—è —Ä–∞–∑–Ω—ã–µ —Å–ø–æ—Å–æ–±—ã –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ arg max y p(y|x), –ø—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è —Ä–∞–∑–ª–∏—á–Ω—ã–µ –ø–æ–¥—Ö–æ–¥—ã. RAG-Token –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –ø–µ—Ä–µ—Ö–æ–¥–Ω—É—é –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç—å —Å —É—á—ë—Ç–æ–º top-k (p(¬∑|x)) –∏ —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω–æ–≥–æ –ª—É—á–µ–≤–æ–≥–æ –¥–µ–∫–æ–¥–µ—Ä–∞.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 4/10 [00:14<00:20,  3.41s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –æ–±—ä–µ–º –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –≤–µ–∫—Ç–æ—Ä–Ω–æ–º –∏–Ω–¥–µ–∫—Å–µ Wikipedia?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  21 –º–ª–Ω –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 5/10 [00:17<00:15,  3.12s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∑–∞–¥–∞—á–∏ –±—ã–ª–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω—ã –¥–ª—è –æ—Ü–µ–Ω–∫–∏ RAG-–º–æ–¥–µ–ª–µ–π?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ú–æ–¥–µ–ª–∏ –æ—Ü–µ–Ω–∏–≤–∞–ª–∏—Å—å –Ω–∞ —à–∏—Ä–æ–∫–æ–º —Å–ø–µ–∫—Ç—Ä–µ –∑–∞–¥–∞—á, —Ç—Ä–µ–±—É—é—â–∏—Ö –≥–ª—É–±–æ–∫–∏—Ö –∑–Ω–∞–Ω–∏–π –≤ –æ–±–ª–∞—Å—Ç–∏ NLP, –∏ —É—Å—Ç–∞–Ω–æ–≤–∏–ª–∏ –Ω–æ–≤—ã–π —Å—Ç–∞–Ω–¥–∞—Ä—Ç –≤ —Ç—Ä—ë—Ö –∑–∞–¥–∞—á–∞—Ö –æ—Ç–∫—Ä—ã—Ç–æ–≥–æ –¥–æ–º–µ–Ω–∞ QA.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 6/10 [00:21<00:13,  3.49s/it]


–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ–º –ø—Ä–∏ –æ—Ç–≤–µ—Ç–µ –Ω–∞ –≤–æ–ø—Ä–æ—Å—ã?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ê–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ–º, –ø–æ—Ç–æ–º—É —á—Ç–æ –¥–æ–∫—É–º–µ–Ω—Ç—ã, —Å–æ–¥–µ—Ä–∂–∞—â–∏–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ –æ–± –æ—Ç–≤–µ—Ç–µ, –Ω–æ –Ω–µ —Å–æ–¥–µ—Ä–∂–∞—â–∏–µ –µ–≥–æ –¥–æ—Å–ª–æ–≤–Ω–æ, –≤—Å—ë —Ä–∞–≤–Ω–æ –º–æ–≥—É—Ç —Å–ø–æ—Å–æ–±—Å—Ç–≤–æ–≤–∞—Ç—å —Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏—é –ø—Ä–∞–≤–∏–ª—å–Ω–æ–≥–æ –æ—Ç–≤–µ—Ç–∞. –≠—Ç–æ –Ω–µ–≤–æ–∑–º–æ–∂–Ω–æ –ø—Ä–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–∏ —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã—Ö —ç–∫—Å—Ç—Ä–∞–∫—Ç–∏–≤–Ω—ã—Ö –ø–æ–¥—Ö–æ–¥–æ–≤.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 7/10 [00:27<00:12,  4.24s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫—É—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ–∫–∞–∑–∞–ª RAG –Ω–∞ –∑–∞–¥–∞—á–µ Jeopardy Question Generation –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å BART?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  RAG-Token –ø–æ–∫–∞–∑–∞–ª –ª—É—á—à–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã, —á–µ–º RAG-Sequence, –∏ –æ–±–µ –º–æ–¥–µ–ª–∏ –ø—Ä–µ–≤–∑–æ—à–ª–∏ BART –ø–æ Q-BLEU-1. –¢–∞–∫–∂–µ –æ—Ü–µ–Ω—â–∏–∫–∏ –æ—Ç–º–µ—Ç–∏–ª–∏, —á—Ç–æ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ RAG –±–æ–ª–µ–µ —Ñ–∞–∫—Ç—É—Ä–Ω—ã –≤ 42,7% —Å–ª—É—á–∞–µ–≤ –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å 7,1% —É BART.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [00:29<00:07,  3.57s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å–æ–≤–º–µ—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –û–±–∞ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –æ–±—É—á–∞—é—Ç—Å—è —Å–æ–≤–º–µ—Å—Ç–Ω–æ –ø—É—Ç—ë–º —Ç–æ–Ω–∫–æ–π –Ω–∞—Å—Ç—Ä–æ–π–∫–∏ –Ω–∞ –ª—é–±–æ–π seq2seq –∑–∞–¥–∞—á–µ.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [00:31<00:03,  3.07s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π —ç—Ñ—Ñ–µ–∫—Ç –¥–∞—ë—Ç "hot-swapping" –∏–Ω–¥–µ–∫—Å–∞ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ RAG?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  ¬´Hot-swapping¬ª –∏–Ω–¥–µ–∫—Å–∞ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ RAG –ø–æ–∑–≤–æ–ª—è–µ—Ç –æ–±–Ω–æ–≤–ª—è—Ç—å –º–æ–¥–µ–ª—å –±–µ–∑ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ—Å—Ç–∏ –ø–µ—Ä–µ–æ–±—É—á–µ–Ω–∏—è.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:37<00:00,  3.79s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –ø—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –≤ RAG-Sequence –∏ RAG-Token?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ü—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –≤ RAG-Sequence –∏ RAG-Token –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è —Ä–∞–∑–ª–∏—á–Ω—ã–µ —Å–ø–æ—Å–æ–±—ã –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ arg max $y$ $p$ ($y|x$). –í —á–∞—Å—Ç–Ω–æ—Å—Ç–∏, –¥–ª—è RAG-Token –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è –ø–æ–¥—Ö–æ–¥ —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω–æ–≥–æ beam decoder –∏ –ø–µ—Ä–µ—Ö–æ–¥–Ω—ã—Ö –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–µ–π —Å —É—á—ë—Ç–æ–º top-$k$ ($p$ ($¬∑|x$)).







In [35]:
times_3 = times

In [36]:
np.mean(times_3)

np.float64(3.6867867946624755)

In [37]:
df = pd.read_csv('test_results/article_3_questions.csv')

In [38]:
df['YandexGPT'] = outputs

In [39]:
df.to_csv('test_results/article_3_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è Efficient Estimation of Word Representations in Vector Space

In [40]:
questions = ['–ö–∞–∫–∏–µ –¥–≤–µ –º–æ–¥–µ–ª–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä –±—ã–ª–∏ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –∞–≤—Ç–æ—Ä–∞–º–∏ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤?',
             '–í —á—ë–º –æ—Å–Ω–æ–≤–Ω–æ–µ —Ä–∞–∑–ª–∏—á–∏–µ –º–µ–∂–¥—É –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏ CBOW –∏ Skip-gram?',
             '–ö–∞–∫–∏–µ —Ç–∏–ø—ã –æ—Ç–Ω–æ—à–µ–Ω–∏–π –≤–∫–ª—é—á–µ–Ω—ã –≤ Semantic-Syntactic Word Relationship test set?',
             '–ö–∞–∫–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –ø–æ–∫–∞–∑–∞–ª–∞ –º–æ–¥–µ–ª—å Skip-gram –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –ø–æ–¥–∑–∞–¥–∞—á–∞—Ö?',
             '–ö–∞–∫–æ–π –ø–æ–¥—Ö–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ Skip-gram –º–æ–¥–µ–ª–∏ –¥–ª—è –≤—ã–±–æ—Ä–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω—ã—Ö —Å–ª–æ–≤?',
             '–ö–∞–∫–æ–≤–∞ —Ñ–æ—Ä–º—É–ª–∞ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ CBOW?',
             '–ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –≤ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö?',
             '–ö–∞–∫ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∞—Å—å —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∞ –º–æ–¥–µ–ª–µ–π –≤ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω–æ–π —Å–∏—Å—Ç–µ–º–µ DistBelief?',
             '–ö–∞–∫ –±—ã–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω—ã –≤–æ–ø—Ä–æ—Å—ã –≤ —Ç–µ—Å—Ç–æ–≤–æ–º –Ω–∞–±–æ—Ä–µ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ word vectors?',
             '–ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –æ—Ç–≤–µ—Ç–∞ –Ω–∞ –∞–Ω–∞–ª–æ–≥–∏–∏ –≤—Ä–æ–¥–µ ‚Äúking - man + woman = ?‚Äù?'
            ]

In [41]:
arxiv_url = "https://arxiv.org/pdf/1301.3781"
arxiv_id = extract_arxiv_id(arxiv_url)

In [42]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, top_p=None, temperature=None)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                             | 0/10 [00:00<?, ?it/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 1/10 [00:04<00:42,  4.76s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –¥–≤–µ –º–æ–¥–µ–ª–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä –±—ã–ª–∏ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –∞–≤—Ç–æ—Ä–∞–º–∏ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥–ª–æ–∂–∏–ª–∏ –¥–≤–µ –º–æ–¥–µ–ª–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä: 
1. –ú–æ–¥–µ–ª—å –Ω–∞ –æ—Å–Ω–æ–≤–µ –ø–æ–ª–Ω–æ—Å–≤—è–∑–Ω–æ–π –Ω–µ–π—Ä–æ–Ω–Ω–æ–π —Å–µ—Ç–∏ —Å –ª–∏–Ω–µ–π–Ω—ã–º –ø—Ä–æ–µ–∫—Ü–∏–æ–Ω–Ω—ã–º —Å–ª–æ–µ–º –∏ –Ω–µ–ª–∏–Ω–µ–π–Ω—ã–º —Å–∫—Ä—ã—Ç—ã–º —Å–ª–æ–µ–º.
2. –î—Ä—É–≥–∞—è –º–æ–¥–µ–ª—å–Ω–∞—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ –¥–ª—è –≤—ã—á–∏—Å–ª–µ–Ω–∏—è –Ω–µ–ø—Ä–µ—Ä—ã–≤–Ω—ã—Ö –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 2/10 [00:07<00:26,  3.33s/it]


–í–æ–ø—Ä–æ—Å: –í —á—ë–º –æ—Å–Ω–æ–≤–Ω–æ–µ —Ä–∞–∑–ª–∏—á–∏–µ –º–µ–∂–¥—É –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏ CBOW –∏ Skip-gram?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  CBOW –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç —Ç–µ–∫—É—â–µ–µ —Å–ª–æ–≤–æ –Ω–∞ –æ—Å–Ω–æ–≤–µ –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞, –∞ Skip-gram –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç –æ–∫—Ä—É–∂–∞—é—â–∏–µ —Å–ª–æ–≤–∞ –ø–æ –∑–∞–¥–∞–Ω–Ω–æ–º—É —Ç–µ–∫—É—â–µ–º—É —Å–ª–æ–≤—É.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 3/10 [00:11<00:25,  3.62s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ç–∏–ø—ã –æ—Ç–Ω–æ—à–µ–Ω–∏–π –≤–∫–ª—é—á–µ–Ω—ã –≤ Semantic-Syntactic Word Relationship test set?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –í Semantic-Syntactic Word Relationship test set –≤–∫–ª—é—á–µ–Ω—ã –ø—è—Ç—å —Ç–∏–ø–æ–≤ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –æ—Ç–Ω–æ—à–µ–Ω–∏–π, –Ω–∞–ø—Ä–∏–º–µ—Ä, –æ—Ç–Ω–æ—à–µ–Ω–∏–µ –º–µ–∂–¥—É —Å—Ç—Ä–∞–Ω–æ–π –∏ –≥–æ—Ä–æ–¥–æ–º, –∫–æ—Ç–æ—Ä—ã–π –µ–π –ø—Ä–∏–Ω–∞–¥–ª–µ–∂–∏—Ç (–∫–∞–∫ –§—Ä–∞–Ω—Ü–∏—è –æ—Ç–Ω–æ—Å–∏—Ç—Å—è –∫ –ü–∞—Ä–∏–∂—É, —Ç–∞–∫ –ì–µ—Ä–º–∞–Ω–∏—è –æ—Ç–Ω–æ—Å–∏—Ç—Å—è –∫ –ë–µ—Ä–ª–∏–Ω—É).




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 4/10 [00:13<00:19,  3.28s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –ø–æ–∫–∞–∑–∞–ª–∞ –º–æ–¥–µ–ª—å Skip-gram –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –ø–æ–¥–∑–∞–¥–∞—á–∞—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ú–æ–¥–µ–ª—å Skip-gram –ø–æ–∫–∞–∑–∞–ª–∞ —Ç–æ—á–Ω–æ—Å—Ç—å 55% –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –ø–æ–¥–∑–∞–¥–∞—á–∞—Ö –∏ 59% –Ω–∞ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –ø–æ–¥–∑–∞–¥–∞—á–∞—Ö.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 5/10 [00:19<00:20,  4.18s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –ø–æ–¥—Ö–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ Skip-gram –º–æ–¥–µ–ª–∏ –¥–ª—è –≤—ã–±–æ—Ä–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω—ã—Ö —Å–ª–æ–≤?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –í Skip-gram –º–æ–¥–µ–ª–∏ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –ø–æ–¥—Ö–æ–¥, –ø—Ä–∏ –∫–æ—Ç–æ—Ä–æ–º –∫–∞–∂–¥–æ–µ —Ç–µ–∫—É—â–µ–µ —Å–ª–æ–≤–æ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –∫–∞–∫ –≤—Ö–æ–¥ –¥–ª—è –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–≥–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞ —Å –Ω–µ–ø—Ä–µ—Ä—ã–≤–Ω—ã–º –ø—Ä–æ–µ–∫—Ü–∏–æ–Ω–Ω—ã–º —Å–ª–æ–µ–º, –∏ –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞—é—Ç—Å—è —Å–ª–æ–≤–∞ –≤ –æ–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω–æ–º –¥–∏–∞–ø–∞–∑–æ–Ω–µ –¥–æ –∏ –ø–æ—Å–ª–µ —Ç–µ–∫—É—â–µ–≥–æ —Å–ª–æ–≤–∞. –ü—Ä–∏ —ç—Ç–æ–º –±–æ–ª–µ–µ –æ—Ç–¥–∞–ª—ë–Ω–Ω—ã–º —Å–ª–æ–≤–∞–º –¥–∞—ë—Ç—Å—è –º–µ–Ω—å—à–µ –≤–µ—Å–∞ –∑–∞ —Å—á—ë—Ç —É–º–µ–Ω—å—à–µ–Ω–∏—è –∏—Ö –≤—ã–±–æ—Ä–∫–∏ –≤ –æ–±—É—á–∞—é—â–∏—Ö –ø—Ä–∏–º–µ—Ä–∞—Ö.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 6/10 [00:21<00:13,  3.35s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–≤–∞ —Ñ–æ—Ä–º—É–ª–∞ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ CBOW?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  $Q = C √ó (D + D √ó log_2(V))$




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 7/10 [00:27<00:12,  4.20s/it]


–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –≤ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ê–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –≤ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö, –ø–æ—Ç–æ–º—É —á—Ç–æ –ø–æ—Å—á–∏—Ç–∞–ª–∏, —á—Ç–æ –Ω–µ–ª–∏–Ω–µ–π–Ω—ã–π —Å–∫—Ä—ã—Ç—ã–π —Å–ª–æ–π —è–≤–ª—è–µ—Ç—Å—è –∏—Å—Ç–æ—á–Ω–∏–∫–æ–º –±–æ–ª—å—à–µ–π —á–∞—Å—Ç–∏ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –≤—ã—á–∏—Å–ª–µ–Ω–∏–π. –û–Ω–∏ —Ä–µ—à–∏–ª–∏ –∏—Å—Å–ª–µ–¥–æ–≤–∞—Ç—å –±–æ–ª–µ–µ –ø—Ä–æ—Å—Ç—ã–µ –º–æ–¥–µ–ª–∏, –∫–æ—Ç–æ—Ä—ã–µ –º–æ–≥—É—Ç –±—ã—Ç—å –æ–±—É—á–µ–Ω—ã –Ω–∞ –≥–æ—Ä–∞–∑–¥–æ –±–æ–ª—å—à–µ–º –æ–±—ä—ë–º–µ –¥–∞–Ω–Ω—ã—Ö –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ, –¥–∞–∂–µ –µ—Å–ª–∏ –æ–Ω–∏ –Ω–µ —Å–º–æ–≥—É—Ç –ø—Ä–µ–¥—Å—Ç–∞–≤–ª—è—Ç—å –¥–∞–Ω–Ω—ã–µ —Ç–∞–∫ –∂–µ —Ç–æ—á–Ω–æ, –∫–∞–∫ –Ω–µ–π—Ä–æ–Ω–Ω—ã–µ —Å–µ—Ç–∏.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [00:32<00:09,  4.60s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∞—Å—å —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∞ –º–æ–¥–µ–ª–µ–π –≤ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω–æ–π —Å–∏—Å—Ç–µ–º–µ DistBelief?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –í —Å–∏—Å—Ç–µ–º–µ DistBelief —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∞ –º–æ–¥–µ–ª–µ–π –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∞—Å—å –∑–∞ —Å—á—ë—Ç –∑–∞–ø—É—Å–∫–∞ –º–Ω–æ–∂–µ—Å—Ç–≤–∞ —Ä–µ–ø–ª–∏–∫ –æ–¥–Ω–æ–π –∏ —Ç–æ–π –∂–µ –º–æ–¥–µ–ª–∏ –ø–∞—Ä–∞–ª–ª–µ–ª—å–Ω–æ. –ò—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–æ—Å—å –æ—Ç 50 –¥–æ 100 —Ä–µ–ø–ª–∏–∫ –º–æ–¥–µ–ª–∏ –≤–æ –≤—Ä–µ–º—è —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∏. –ö–∞–∂–¥–∞—è —Ä–µ–ø–ª–∏–∫–∞ —Å–∏–Ω—Ö—Ä–æ–Ω–∏–∑–∏—Ä—É–µ—Ç —Å–≤–æ–∏ –æ–±–Ω–æ–≤–ª–µ–Ω–∏—è –≥—Ä–∞–¥–∏–µ–Ω—Ç–∞ —á–µ—Ä–µ–∑ —Ü–µ–Ω—Ç—Ä–∞–ª–∏–∑–æ–≤–∞–Ω–Ω—ã–π —Å–µ—Ä–≤–µ—Ä, –∫–æ—Ç–æ—Ä—ã–π —Ö—Ä–∞–Ω–∏—Ç –≤—Å–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [00:36<00:04,  4.33s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –±—ã–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω—ã –≤–æ–ø—Ä–æ—Å—ã –≤ —Ç–µ—Å—Ç–æ–≤–æ–º –Ω–∞–±–æ—Ä–µ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ word vectors?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –í–æ–ø—Ä–æ—Å—ã –≤ —Ç–µ—Å—Ç–æ–≤–æ–º –Ω–∞–±–æ—Ä–µ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ word vectors –±—ã–ª–∏ —Å–æ–∑–¥–∞–Ω—ã –≤ –¥–≤–∞ —ç—Ç–∞–ø–∞: —Å–Ω–∞—á–∞–ª–∞ –≤—Ä—É—á–Ω—É—é –±—ã–ª —Å–æ—Å—Ç–∞–≤–ª–µ–Ω —Å–ø–∏—Å–æ–∫ –ø–æ—Ö–æ–∂–∏—Ö –ø–∞—Ä —Å–ª–æ–≤, –∑–∞—Ç–µ–º –Ω–∞ –∏—Ö –æ—Å–Ω–æ–≤–µ –±—ã–ª —Å—Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–Ω –±–æ–ª—å—à–æ–π —Å–ø–∏—Å–æ–∫ –≤–æ–ø—Ä–æ—Å–æ–≤ –ø—É—Ç—ë–º —Å–æ–µ–¥–∏–Ω–µ–Ω–∏—è –¥–≤—É—Ö –ø–∞—Ä —Å–ª–æ–≤.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:38<00:00,  3.81s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –æ—Ç–≤–µ—Ç–∞ –Ω–∞ –∞–Ω–∞–ª–æ–≥–∏–∏ –≤—Ä–æ–¥–µ ‚Äúking - man + woman = ?‚Äù?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ú–µ—Ç–æ–¥ word offset, –≥–¥–µ –≤—ã–ø–æ–ª–Ω—è—é—Ç—Å—è –ø—Ä–æ—Å—Ç—ã–µ –∞–ª–≥–µ–±—Ä–∞–∏—á–µ—Å–∫–∏–µ –æ–ø–µ—Ä–∞—Ü–∏–∏ –Ω–∞–¥ –≤–µ–∫—Ç–æ—Ä–∞–º–∏ —Å–ª–æ–≤.







In [43]:
times_4 = times

In [44]:
np.mean(times_4)

np.float64(3.7225289344787598)

In [45]:
df = pd.read_csv('test_results/article_4_questions.csv')

In [46]:
df['YandexGPT'] = outputs

In [47]:
df.to_csv('test_results/article_4_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è LADDER: SELF-IMPROVING LLMS THROUGH RECURSIVE PROBLEM DECOMPOSITION

In [48]:
questions = ['–í —á—ë–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –∫–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER?',
             '–ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER?',
             '–ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –≤ LADDER?',
             '–ß—Ç–æ —Ç–∞–∫–æ–µ TTRL –∏ —á–µ–º –æ–Ω –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç LADDER?',
             '–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã –º–æ–¥–µ–ª—å—é Qwen2.5 7B –Ω–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee –ø–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL?',
             '–ö–∞–∫–∞—è —Ñ—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ GRPO –≤ —Ä–∞–º–∫–∞—Ö –æ–±—É—á–µ–Ω–∏—è –≤ LADDER?',
             '–ü–æ—á–µ–º—É RL –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –ø–ª–æ—Ö–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è?',
             '–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏—è –≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã—Ö –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?',
             '–ö–∞–∫ LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ –±–µ–∑ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?',
             '–ö–∞–∫–∏–µ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –∏–ª–∏ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –∑–∞–º–µ—á–µ–Ω—ã –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?'
            ]

In [49]:
arxiv_url = "https://arxiv.org/pdf/2503.00735"
arxiv_id = extract_arxiv_id(arxiv_url)

In [50]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, top_p=None, temperature=None)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                             | 0/10 [00:00<?, ?it/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 1/10 [00:04<00:37,  4.14s/it]


–í–æ–ø—Ä–æ—Å: –í —á—ë–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –∫–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ö–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ —Ç–æ–º, —á—Ç–æ –º–æ–¥–µ–ª–∏ –º–æ–≥—É—Ç —Å–∞–º–æ—Å—Ç–æ—è—Ç–µ–ª—å–Ω–æ —É–ª—É—á—à–∞—Ç—å —Å–≤–æ–∏ —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏ –∫ —Ä–µ—à–µ–Ω–∏—é –∑–∞–¥–∞—á –ø—É—Ç—ë–º —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–≥–æ –≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–∏—è –∏ —Ä–µ—à–µ–Ω–∏—è –ø–æ—Å—Ç–µ–ø–µ–Ω–Ω–æ —É–ø—Ä–æ—â–∞—é—â–∏—Ö—Å—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ —Å–ª–æ–∂–Ω—ã—Ö –∑–∞–¥–∞—á.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 2/10 [00:05<00:21,  2.75s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –î–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è —á–∏—Å–ª–µ–Ω–Ω—ã–π –º–µ—Ç–æ–¥ –∏–Ω—Ç–µ–≥—Ä–∞—Ü–∏–∏.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 3/10 [00:10<00:23,  3.41s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –≤ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ì–µ–Ω–µ—Ä–∞—Ü–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –≤ LADDER –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –≤ –¥–≤–∞ —ç—Ç–∞–ø–∞: —Å–Ω–∞—á–∞–ª–∞ —Å–æ–±–∏—Ä–∞–µ—Ç—Å—è —Ç—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã–π –Ω–∞–±–æ—Ä –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤, –∑–∞—Ç–µ–º –¥–ª—è –∫–∞–∂–¥–æ–≥–æ –∏–Ω—Ç–µ–≥—Ä–∞–ª–∞ –≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç—Å—è –¥–µ—Ä–µ–≤–æ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤. –≠—Ç–æ—Ç –ø—Ä–æ—Ü–µ—Å—Å –æ–ø–∏—Å–∞–Ω –≤ —Ä–∞–∑–¥–µ–ª–µ 3.1.2.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 4/10 [00:20<00:36,  6.02s/it]


–í–æ–ø—Ä–æ—Å: –ß—Ç–æ —Ç–∞–∫–æ–µ TTRL –∏ —á–µ–º –æ–Ω –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  TTRL ‚Äî —ç—Ç–æ –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã–π –º–µ—Ç–æ–¥ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–Ω–∏—è –≤–æ –≤—Ä–µ–º—è —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è, —á–µ–º –º–Ω–æ–≥–æ–∫—Ä–∞—Ç–Ω—ã–µ –ø–æ–ø—ã—Ç–∫–∏ —Ä–µ—à–∏—Ç—å —Ç–µ—Å—Ç–æ–≤—ã–π –≤–æ–ø—Ä–æ—Å. –í –æ—Ç–ª–∏—á–∏–µ –æ—Ç LADDER, TTRL –ø—Ä–∏–º–µ–Ω—è–µ—Ç —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–µ —Ä–∞–∑–ª–æ–∂–µ–Ω–∏–µ –∑–∞–¥–∞—á–∏ –Ω–µ–ø–æ—Å—Ä–µ–¥—Å—Ç–≤–µ–Ω–Ω–æ –∫ —Ç–µ—Å—Ç–æ–≤—ã–º –≤–æ–ø—Ä–æ—Å–∞–º, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –∏—Å—Ö–æ–¥–Ω–æ–π –º–æ–¥–µ–ª–∏ LADDER –∫–æ—Ä—Ä–µ–∫—Ç–Ω–æ —Ä–µ—à–∞—Ç—å –Ω–µ–∫–æ—Ç–æ—Ä—ã–µ –∏–∑ —Å–∞–º—ã—Ö —Å–ª–æ–∂–Ω—ã—Ö –≤–æ–ø—Ä–æ—Å–æ–≤. LADDER –≤–∫–ª—é—á–∞–µ—Ç –≤ —Å–µ–±—è –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤, –ø—Ä–æ–≤–µ—Ä–∫—É —Ä–µ—à–µ–Ω–∏–π –∏ –æ–±—É—á–µ–Ω–∏–µ —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º, –≤ —Ç–æ –≤—Ä–µ–º—è –∫–∞–∫ TTRL –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä—É–µ—Ç —É–ª—É—á—à–µ–Ω–∏–µ –ø–æ–∫–∞–∑–∞—Ç–µ–ª–µ–π –∑–∞ —Å—á—ë—Ç —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–≥–æ –ø–æ–¥—Ö–æ–¥–∞ –

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 5/10 [00:23<00:24,  4.98s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã –º–æ–¥–µ–ª—å—é Qwen2.5 7B –Ω–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee –ø–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ü–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL –º–æ–¥–µ–ª—å Qwen2.5 7B –¥–æ—Å—Ç–∏–≥–ª–∞ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞ –≤ 90% –Ω–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 6/10 [00:25<00:16,  4.10s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∞—è —Ñ—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ GRPO –≤ —Ä–∞–º–∫–∞—Ö –æ–±—É—á–µ–Ω–∏—è –≤ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –í GRPO –≤ —Ä–∞–º–∫–∞—Ö –æ–±—É—á–µ–Ω–∏—è –≤ LADDER –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è —Ñ—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è, –æ—Å–Ω–æ–≤–∞–Ω–Ω–∞—è –Ω–∞ –≥—Ä—É–ø–ø–æ–≤—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö (baseline from group scores).




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                     | 7/10 [00:29<00:12,  4.17s/it]


–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É RL –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –ø–ª–æ—Ö–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –£—Å–ø–µ—Ö RL —Å –≤–∞—Ä–∏–∞–Ω—Ç–∞–º–∏ –∑–∞–¥–∞—á –æ–±—É—Å–ª–æ–≤–ª–µ–Ω —Ç—â–∞—Ç–µ–ª—å–Ω–æ —Å–∫–æ–Ω—Å—Ç—Ä—É–∏—Ä–æ–≤–∞–Ω–Ω—ã–º –≥—Ä–∞–¥–∏–µ–Ω—Ç–æ–º —Å–ª–æ–∂–Ω–æ—Å—Ç–∏, –∞ –Ω–µ —Å–∞–º–∏–º –∞–ª–≥–æ—Ä–∏—Ç–º–æ–º RL. RL –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –ø–ª–æ—Ö–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è –∏–∑-–∑–∞ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ –ø—Ä—è–º–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è –≤ —Ç–∞–∫–∏—Ö –∑–∞–¥–∞—á–∞—Ö.




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [00:38<00:11,  5.63s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏—è –≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã—Ö –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –î–ª—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏—è –≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã—Ö –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Å–ª–µ–¥—É—é—â–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏:
1. –¢–µ–º–ø–µ—Ä–∞—Ç—É—Ä–Ω—ã–π —Ü–∏–∫–ª (Temperature cycling) ‚Äî –¥–∏–Ω–∞–º–∏—á–µ—Å–∫–æ–µ –∏–∑–º–µ–Ω–µ–Ω–∏–µ —Ç–µ–º–ø–µ—Ä–∞—Ç—É—Ä—ã –≤—ã–±–æ—Ä–∫–∏ –º–µ–∂–¥—É 0,8 –∏ 1,4 –¥–ª—è —Ä–∞–∑–Ω—ã—Ö –∑–∞–ø—Ä–æ—Å–æ–≤.
2. –ü–µ—Ä—Å–æ–Ω–∏—Ñ–∏—Ü–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ (Persona-based prompting) ‚Äî –º–æ–¥–µ–ª—å –ø–æ–ª—É—á–∞–ª–∞ –∑–∞–¥–∞–Ω–∏–µ –ø—Ä–∏–Ω—è—Ç—å —Ä–∞–∑–Ω—ã–µ –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–µ –ø–µ—Ä—Å–ø–µ–∫—Ç–∏–≤—ã (–Ω–∞–ø—Ä–∏–º–µ—Ä, ¬´–¥—É–º–∞–π –∫–∞–∫ –≠–π–ª–µ—Ä, —Ñ–æ–∫—É—Å–∏—Ä—É—è—Å—å –Ω–∞ —Ä—è–¥–∞—Ö¬ª, ¬´–ø–æ–¥—Ö–æ–¥–∏ –∫–∞–∫ –ì–∞—É—Å—Å, –∏—â–∞ –∑–∞–∫–æ–Ω–æ–º–µ—Ä–Ω–æ—Å—Ç–∏¬ª).




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [00:42<00:04,  4.91s/it]


–í–æ–ø—Ä–æ—Å: –ö–∞–∫ LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ –±–µ–∑ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ –±–µ–∑ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏ –∑–∞ —Å—á—ë—Ç —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–≥–æ —Ä–∞–∑–ª–æ–∂–µ–Ω–∏—è –∑–∞–¥–∞—á –Ω–∞ –±–æ–ª–µ–µ –ø—Ä–æ—Å—Ç—ã–µ –ø–æ–¥–∑–∞–¥–∞—á–∏ –∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å –ø—Ä–æ–≤–µ—Ä—è–µ–º—ã–º–∏ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è–º–∏.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:46<00:00,  4.67s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –∏–ª–∏ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –∑–∞–º–µ—á–µ–Ω—ã –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å:  –ü—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –±—ã–ª–∏ –∑–∞–º–µ—á–µ–Ω—ã —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ —Å –∫–æ–Ω—Ç—Ä–æ–ª–µ–º –∫–∞—á–µ—Å—Ç–≤–∞. –ú–æ–¥–µ–ª—å –≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–ª–∞ –∏–Ω—Ç–µ–≥—Ä–∞–ª—ã, —Å–ª–æ–∂–Ω–æ—Å—Ç—å –∫–æ—Ç–æ—Ä—ã—Ö —Å—É—â–µ—Å—Ç–≤–µ–Ω–Ω–æ –æ—Ç–ª–∏—á–∞–ª–∞—Å—å –æ—Ç –∑–∞–¥–∞–Ω–Ω–æ–≥–æ —É—Ä–æ–≤–Ω—è. –ù–µ–±–æ–ª—å—à–∏–µ –∏–∑–º–µ–Ω–µ–Ω–∏—è –≤ –∫–æ—ç—Ñ—Ñ–∏—Ü–∏–µ–Ω—Ç–∞—Ö –∏–ª–∏ —Å–æ—Å—Ç–∞–≤–µ —Ñ—É–Ω–∫—Ü–∏–π –º–æ–≥–ª–∏ –ø—Ä–µ–≤—Ä–∞—Ç–∏—Ç—å –∫–∞–∂—É—â–∏–µ—Å—è –ø—Ä–æ—Å—Ç—ã–º–∏ –∏–Ω—Ç–µ–≥—Ä–∞–ª—ã –≤ –≥–æ—Ä–∞–∑–¥–æ –±–æ–ª–µ–µ —Å–ª–æ–∂–Ω—ã–µ.







In [51]:
times_5 = times

In [52]:
np.mean(times_5)

np.float64(4.579136824607849)

In [53]:
df = pd.read_csv('test_results/article_5_questions.csv')

In [54]:
df['YandexGPT'] = outputs

In [55]:
df.to_csv('test_results/article_5_questions.csv', index=False)

In [56]:
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–∞ Yandex: {np.mean(times_1 + times_2 + times_3 + times_4 + times_5):.1f} —Å–µ–∫')

–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–∞ Yandex: 4.2 —Å–µ–∫


### QWEN 3

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
quant_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    #quantization_config=quant_config,
    torch_dtype="auto",
    device_map="cuda"
)

llm = pipeline("text-generation", model=model, tokenizer=tokenizer, return_full_text=False)

embedding_model = HuggingFaceBgeEmbeddings(
        model_name="BAAI/bge-m3",
        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
    )

reranker = FlagReranker("BAAI/bge-reranker-v2-m3", use_fp16=True)

vectorstore = Chroma(persist_directory="test_chroma_db", embedding_function=embedding_model)

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Device set to use cuda
  embedding_model = HuggingFaceBgeEmbeddings(
  vectorstore = Chroma(persist_directory="test_chroma_db", embedding_function=embedding_model)


In [5]:
SYSTEM_MSG = (
    "–¢—ã ‚Äî –ø–æ–º–æ—â–Ω–∏–∫ –ø–æ –Ω–∞—É—á–Ω—ã–º —Å—Ç–∞—Ç—å—è–º. "
    "–¢–≤–æ—è –∑–∞–¥–∞—á–∞ ‚Äî –¥–∞—Ç—å –∫–æ—Ä–æ—Ç–∫–∏–π, —Ç–æ—á–Ω—ã–π –∏ –æ–¥–Ω–æ–∑–Ω–∞—á–Ω—ã–π –æ—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å, –∏—Å–ø–æ–ª—å–∑—É—è —Ç–æ–ª—å–∫–æ –ø—Ä–∏–≤–µ–¥—ë–Ω–Ω—ã–π –∫–æ–Ω—Ç–µ–∫—Å—Ç. "
    "–ù–µ–ª—å–∑—è –ø—Ä–æ–¥–æ–ª–∂–∞—Ç—å –¥–∏–∞–ª–æ–≥, –∑–∞–¥–∞–≤–∞—Ç—å –≤—Å—Ç—Ä–µ—á–Ω—ã–µ –≤–æ–ø—Ä–æ—Å—ã –∏–ª–∏ –ø–æ–≤—Ç–æ—Ä—è—Ç—å—Å—è. –û—Ç–≤–µ—á–∞–π —Å—Ç—Ä–æ–≥–æ –Ω–∞ —Ä—É—Å—Å–∫–æ–º —è–∑—ã–∫–µ"
)


def build_messages(question: str, context: str) -> List[dict]:
    """–§–æ—Ä–º–∏—Ä—É–µ—Ç —Å–æ–æ–±—â–µ–Ω–∏—è –≤ –Ω—É–∂–Ω–æ–º –¥–ª—è chat_template —Ñ–æ—Ä–º–∞—Ç–µ."""
    return [
        {"role": "system", "content": SYSTEM_MSG},
        {"role": "user", "content": f"–í–æ–ø—Ä–æ—Å:\n{question}"},
        {"role": "assistant", "content": f"–ö–æ–Ω—Ç–µ–∫—Å—Ç:\n{context}"}
    ]

#### Attention is all you need

In [6]:
questions = ['–ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?',
            '–ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?',
             '–ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?',
             '–ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?',
             '–ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –æ–¥–Ω–æ–π –≥–æ–ª–æ–≤–æ–π –≤–Ω–∏–º–∞–Ω–∏—è?',
             '–ö–∞–∫–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –∏ –ø–æ—á–µ–º—É –±—ã–ª–∏ –≤—ã–±—Ä–∞–Ω—ã —Å–∏–Ω—É—Å–æ–∏–¥—ã?',
             '–í —á–µ–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º–∏ –∏ —Å–≤–µ—Ä—Ç–æ—á–Ω—ã–º–∏ —Å–ª–æ—è–º–∏ –≤ –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –∏ –¥–ª–∏–Ω—ã –ø—É—Ç–∏ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–µ–π?',
             '–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer?',
             '–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Transformer –ø–æ–∫–∞–∑–∞–ª –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíDE –∏ EN‚ÜíFR –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–∏–º–∏ SOTA?',
             '–ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—é –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏?'
            ]

In [7]:
arxiv_url = "https://arxiv.org/abs/1706.03762"
arxiv_id = extract_arxiv_id(arxiv_url)

In [8]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    messages = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
    start = time.time()
    
    answer = llm(messages, max_new_tokens=256, do_sample=False, temperature=None, top_p=None, top_k=None)[0]['generated_text']
    
    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

  candidates = retriever.get_relevant_documents(question)
You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:11<01:40, 11.18s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –û–ø—Ç–∏–º–∞–π–∑–µ—Ä ‚Äî Adam, –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã: Œ≤‚ÇÅ = 0.9, Œ≤‚ÇÇ = 0.98, Œµ = 10‚Åª‚Åπ.




 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:16<01:00,  7.52s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –†–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏.




 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:29<01:10, 10.13s/it]

–í–æ–ø—Ä–æ—Å: –ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: Scaled Dot-Product Attention ‚Äî —ç—Ç–æ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏–º–∞–Ω–∏—è, –≤ –∫–æ—Ç–æ—Ä–æ–º –≤—ã—á–∏—Å–ª—è–µ—Ç—Å—è —Å–∫–∞–ª—è—Ä–Ω–æ–µ –ø—Ä–æ–∏–∑–≤–µ–¥–µ–Ω–∏–µ –º–µ–∂–¥—É –≤–µ–∫—Ç–æ—Ä–∞–º–∏ –∑–∞–ø—Ä–æ—Å–æ–≤ –∏ –∫–ª—é—á–µ–π, –∑–∞—Ç–µ–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç –¥–µ–ª–∏—Ç—Å—è –Ω–∞ ‚àödk, —á—Ç–æ–±—ã —Å—Ç–∞–±–∏–ª–∏–∑–∏—Ä–æ–≤–∞—Ç—å –∑–Ω–∞—á–µ–Ω–∏—è –ø–µ—Ä–µ–¥ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–µ–º softmax. –î–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –ø—Ä–µ–¥–æ—Ç–≤—Ä–∞—â–µ–Ω–∏—è –±–æ–ª—å—à–∏—Ö –∑–Ω–∞—á–µ–Ω–∏–π —Å–∫–∞–ª—è—Ä–Ω–æ–≥–æ –ø—Ä–æ–∏–∑–≤–µ–¥–µ–Ω–∏—è, —á—Ç–æ –º–æ–∂–µ—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ –Ω–µ—Å—Ç–∞–±–∏–ª—å–Ω–æ—Å—Ç–∏ –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏.




 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [00:44<01:13, 12.23s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: Encoder —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ —Å—Ç–µ–∫–∞ –∏–∑ 6 –∏–¥–µ–Ω—Ç–∏—á–Ω—ã—Ö —Å–ª–æ–µ–≤, –∫–∞–∂–¥—ã–π –∏–∑ –∫–æ—Ç–æ—Ä—ã—Ö –≤–∫–ª—é—á–∞–µ—Ç –¥–≤–∞ –ø–æ–¥—Å–ª–æ—è: —Å–∞–º–æ–ø—Ä–æ–∏–∑–≤–æ–ª—å–Ω–æ–µ –≤–Ω–∏–º–∞–Ω–∏–µ –∏ –ø–æ–ª–Ω–æ—Å—Ç—å—é —Å–≤—è–∑–∞–Ω–Ω—ã–π —Å–ª–æ–π. Decoder —Ç–∞–∫–∂–µ —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ —Å—Ç–µ–∫–∞ –∏–∑ 6 –∏–¥–µ–Ω—Ç–∏—á–Ω—ã—Ö —Å–ª–æ–µ–≤, –∫–∞–∂–¥—ã–π –∏–∑ –∫–æ—Ç–æ—Ä—ã—Ö –≤–∫–ª—é—á–∞–µ—Ç —Ç—Ä–∏ –ø–æ–¥—Å–ª–æ—è: —Å–∞–º–æ–ø—Ä–æ–∏–∑–≤–æ–ª—å–Ω–æ–µ –≤–Ω–∏–º–∞–Ω–∏–µ, –ø–æ–ª–Ω–æ—Å—Ç—å—é —Å–≤—è–∑–∞–Ω–Ω—ã–π —Å–ª–æ–π –∏ –≤–Ω–∏–º–∞–Ω–∏–µ –∫ encoder'—É. –í decoder'–µ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –º–∞—Å–∫–∏—Ä–æ–≤–∫–∞, —á—Ç–æ–±—ã –ø—Ä–µ–¥–æ—Ç–≤—Ä–∞—Ç–∏—Ç—å –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç—å –æ—Ç –ø–æ—Å–ª–µ–¥—É—é—â–∏—Ö –ø–æ–∑–∏—Ü–∏–π.




 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [00:56<00:59, 11.86s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –æ–¥–Ω–æ–π –≥–æ–ª–æ–≤–æ–π –≤–Ω–∏–º–∞–Ω–∏—è?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å, —Ç–∞–∫ –∫–∞–∫ –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ –æ–¥–Ω–æ–≤—Ä–µ–º–µ–Ω–Ω–æ –æ–±—Ä–∞—â–∞—Ç—å –≤–Ω–∏–º–∞–Ω–∏–µ –∫ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –∏–∑ —Ä–∞–∑–Ω—ã—Ö –ø–æ–¥–ø—Ä–æ—Å—Ç—Ä–∞–Ω—Å—Ç–≤ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π, —á—Ç–æ –ø–æ–≤—ã—à–∞–µ—Ç —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç—å –∫ –∑–∞—Ö–≤–∞—Ç—É —Ä–∞–∑–ª–∏—á–Ω—ã—Ö –∞—Å–ø–µ–∫—Ç–æ–≤ –¥–∞–Ω–Ω—ã—Ö. –û–¥–Ω–∞ –≥–æ–ª–æ–≤–∞ –≤–Ω–∏–º–∞–Ω–∏—è –æ–≥—Ä–∞–Ω–∏—á–∏–≤–∞–µ—Ç —ç—Ç—É —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç—å, —Ç–∞–∫ –∫–∞–∫ –µ—ë –≤—ã—Ö–æ–¥—ã —É—Å—Ä–µ–¥–Ω—è—é—Ç—Å—è.




 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [01:08<00:47, 11.91s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –∏ –ø–æ—á–µ–º—É –±—ã–ª–∏ –≤—ã–±—Ä–∞–Ω—ã —Å–∏–Ω—É—Å–æ–∏–¥—ã?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –ø–æ–∑–≤–æ–ª—è—é—Ç –º–æ–¥–µ–ª–∏ —É—á–∏—Ç—ã–≤–∞—Ç—å –æ—Ç–Ω–æ—Å–∏—Ç–µ–ª—å–Ω—ã–µ –ø–æ–∑–∏—Ü–∏–∏ —Å–ª–æ–≤ –≤ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏. –°–∏–Ω—É—Å–æ–∏–¥—ã –±—ã–ª–∏ –≤—ã–±—Ä–∞–Ω—ã, –ø–æ—Ç–æ–º—É —á—Ç–æ –æ–Ω–∏ –ø–æ–∑–≤–æ–ª—è—é—Ç –º–æ–¥–µ–ª–∏ –ª–µ–≥–∫–æ –≤—ã—á–∏—Å–ª—è—Ç—å –æ—Ç–Ω–æ—Å–∏—Ç–µ–ª—å–Ω—ã–µ –ø–æ–∑–∏—Ü–∏–∏ –∏ —ç–∫—Å—Ç—Ä–∞–ø–æ–ª–∏—Ä–æ–≤–∞—Ç—å –Ω–∞ –±–æ–ª–µ–µ –¥–ª–∏–Ω–Ω—ã–µ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏.




 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [01:25<00:41, 13.81s/it]

–í–æ–ø—Ä–æ—Å: –í —á–µ–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º–∏ –∏ —Å–≤–µ—Ä—Ç–æ—á–Ω—ã–º–∏ —Å–ª–æ—è–º–∏ –≤ –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –∏ –¥–ª–∏–Ω—ã –ø—É—Ç–∏ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–µ–π?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: Self-attention –∏–º–µ–µ—Ç –ø–æ—Å—Ç–æ—è–Ω–Ω–æ–µ —á–∏—Å–ª–æ –æ–ø–µ—Ä–∞—Ü–∏–π –Ω–µ–∑–∞–≤–∏—Å–∏–º–æ –æ—Ç –¥–ª–∏–Ω—ã –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏, –≤ —Ç–æ –≤—Ä–µ–º—è –∫–∞–∫ —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏ —Ç—Ä–µ–±—É—é—Ç *O(n)* –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω—ã—Ö –æ–ø–µ—Ä–∞—Ü–∏–π. –≠—Ç–æ –¥–µ–ª–∞–µ—Ç self-attention –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã–º –ø–æ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –ø—Ä–∏ –¥–ª–∏–Ω–µ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ *n*, –º–µ–Ω—å—à–µ–π, —á–µ–º —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏—è *d*. Self-attention —Ç–∞–∫–∂–µ –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ —É—á–∏—Ç—ã–≤–∞—Ç—å –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –º–µ–∂–¥—É –≤—Å–µ–º–∏ –ø–æ–∑–∏—Ü–∏—è–º–∏ –ø–æ—Å–ª–µ–¥–æ–≤–

 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [01:33<00:23, 11.91s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í –æ–±—É—á–µ–Ω–∏–∏ Transformer –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Ç—Ä–∏ —Ç–∏–ø–∞ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏.




 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [01:40<00:10, 10.37s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Transformer –ø–æ–∫–∞–∑–∞–ª –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíDE –∏ EN‚ÜíFR –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –ø—Ä–µ–¥—ã–¥—É—â–∏–º–∏ SOTA?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: Transformer –ø–æ–∫–∞–∑–∞–ª BLEU-—Å—á–µ—Ç 41.8 –Ω–∞ –∑–∞–¥–∞—á–µ EN‚ÜíFR, —á—Ç–æ –ø—Ä–µ–≤–∑–æ—à–ª–æ –ø—Ä–µ–¥—ã–¥—É—â–∏–µ SOTA –º–æ–¥–µ–ª–∏.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:53<00:00, 11.33s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—é –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ü–æ—Ç–æ–º—É —á—Ç–æ Transformer –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏–º–∞–Ω–∏—è, –∫–æ—Ç–æ—Ä—ã–π –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏—Ä–æ–≤–∞—Ç—å –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –º–µ–∂–¥—É –ª—é–±—ã–º–∏ –ø–æ–∑–∏—Ü–∏—è–º–∏ –≤—Ö–æ–¥–∞ –∏ –≤—ã—Ö–æ–¥–∞ –∑–∞ –ø–æ—Å—Ç–æ—è–Ω–Ω–æ–µ —á–∏—Å–ª–æ –æ–ø–µ—Ä–∞—Ü–∏–π, –≤ –æ—Ç–ª–∏—á–∏–µ –æ—Ç –º–æ–¥–µ–ª–µ–π —Å —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º–∏ –∏–ª–∏ —Å–≤—ë—Ä—Ç–æ—á–Ω—ã–º–∏ —Å–µ—Ç—è–º–∏, –≥–¥–µ —á–∏—Å–ª–æ –æ–ø–µ—Ä–∞—Ü–∏–π —Ä–∞—Å—Ç—ë—Ç —Å —Ä–∞—Å—Å—Ç–æ—è–Ω–∏–µ–º –º–µ–∂–¥—É –ø–æ–∑–∏—Ü–∏—è–º–∏. –≠—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –ø–∞—Ä–∞–ª–ª–µ–ª—å–Ω—ã–µ –≤—ã—á–∏—Å–ª–µ–Ω–∏—è.







In [9]:
times_1 = times

In [10]:
np.mean(times_1)

np.float64(8.321885013580323)

In [11]:
df = pd.read_csv('test_results/article_1_questions.csv')

In [13]:
df['Qwen3'] = outputs

In [14]:
df.to_csv('test_results/article_1_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è TTRL

In [15]:
questions = ['–í —á–µ–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –æ—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL)?',
             '–ö–∞–∫–∏–º –±—Ä–∞–∑–æ–º –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –æ—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º?',
             '–ö–∞–∫—É—é —Ä–æ–ª—å –∏–≥—Ä–∞–µ—Ç –º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ (majority voting) –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL?',
             '–ö–∞–∫–∏–µ —É–ª—É—á—à–µ–Ω–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã —Å –ø–æ–º–æ—â—å—é TTRL –Ω–∞ –±–µ–Ω—á–º–∞—Ä–∫–µ AIME 2024?',
             '–ö–∞–∫ TTRL —Å—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç—Å—è —Å RL –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –ø–æ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏?',
             '–ü–æ—á–µ–º—É TTRL —Å–ø–æ—Å–æ–±–µ–Ω —Ä–∞–±–æ—Ç–∞—Ç—å –¥–∞–∂–µ –ø—Ä–∏ –Ω–µ—Ç–æ—á–Ω—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö –º–µ—Ç–æ–∫?',
             '–ö–∞–∫–∏–µ –º–æ–¥–µ–ª–∏ –∏ –±–µ–Ω—á–º–∞—Ä–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è –æ—Ü–µ–Ω–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?',
             '–ö–∞–∫–∏–µ —Ñ–∞–∫—Ç–æ—Ä—ã –º–æ–≥—É—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ —Å–±–æ—é –∏–ª–∏ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?',
             '–ö–∞–∫ TTRL –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?',
             '–° –∫–∞–∫–∏–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å–æ–≤–º–µ—Å—Ç–∏–º TTRL –∏ –∫–∞–∫–∏–µ –∏–∑ –Ω–∏—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö?'
            ]

In [16]:
arxiv_url = "https://arxiv.org/pdf/2504.16084"
arxiv_id = extract_arxiv_id(arxiv_url)

In [18]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    messages = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, top_p=None, temperature=None, top_k=None)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:12<01:51, 12.36s/it]

–í–æ–ø—Ä–æ—Å: –í —á–µ–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –æ—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL)?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –û—Å–Ω–æ–≤–Ω–∞—è –∏–¥–µ—è –º–µ—Ç–æ–¥–∞ Test-Time Reinforcement Learning (TTRL) –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ –æ–±—É—á–µ–Ω–∏–∏ –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º —É—Å–∏–ª–∏—Ç–µ–ª—å–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è (RL) –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –º–µ—Ç–∫–∞–º, –≥–¥–µ –Ω–∞–≥—Ä–∞–¥—ã –æ—Ü–µ–Ω–∏–≤–∞—é—Ç—Å—è –Ω–∞ –æ—Å–Ω–æ–≤–µ —Å–æ–≥–ª–∞—Å–æ–≤–∞–Ω–Ω–æ—Å—Ç–∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –º–æ–¥–µ–ª–∏ (–º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞).




 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:22<01:27, 10.96s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–º –±—Ä–∞–∑–æ–º –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –æ—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –û—Ü–µ–Ω–∫–∞ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –≤ TTRL –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º –º–µ—Ç–æ–¥–æ–≤, —Ç–∞–∫–∏—Ö –∫–∞–∫ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–æ –≥–æ–ª–æ—Å–æ–≤, –¥–ª—è –æ—Ü–µ–Ω–∫–∏ –º–µ—Ç–æ–∫ –∏ –≤—ã—á–∏—Å–ª–µ–Ω–∏—è —Ä—É–ª–µ—Ç–æ—á–Ω—ã—Ö –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏–π –Ω–∞ –æ—Å–Ω–æ–≤–µ –ø—Ä–∞–≤–∏–ª, –±–µ–∑ –¥–æ—Å—Ç—É–ø–∞ –∫ –∏—Å—Ç–∏–Ω–Ω—ã–º –º–µ—Ç–∫–∞–º.




 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:31<01:11, 10.27s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫—É—é —Ä–æ–ª—å –∏–≥—Ä–∞–µ—Ç –º–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ (majority voting) –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ú–µ—Ç–æ–¥ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–∞ –≥–æ–ª–æ—Å–æ–≤ (majority voting) –≤ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–µ TTRL –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª –Ω–∞ –æ—Å–Ω–æ–≤–µ —Å–æ–≥–ª–∞—Å–∏—è —Å—Ä–µ–¥–∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –º–æ–¥–µ–ª–∏, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç —Ñ–æ—Ä–º–∏—Ä–æ–≤–∞—Ç—å –Ω–∞–≥—Ä–∞–¥—ã –≤ —É—Å–ª–æ–≤–∏—è—Ö –æ—Ç—Å—É—Ç—Å—Ç–≤–∏—è –º–µ—Ç–æ–∫.




 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [00:39<00:55,  9.27s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —É–ª—É—á—à–µ–Ω–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã —Å –ø–æ–º–æ—â—å—é TTRL –Ω–∞ –±–µ–Ω—á–º–∞—Ä–∫–µ AIME 2024?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –£–ª—É—á—à–µ–Ω–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ —Å –ø–æ–º–æ—â—å—é TTRL –Ω–∞ –±–µ–Ω—á–º–∞—Ä–∫–µ AIME 2024 —Å–æ—Å—Ç–∞–≤–∏–ª–∏ 159.3%.




 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [00:46<00:41,  8.33s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ TTRL —Å—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç—Å—è —Å RL –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö –ø–æ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: TTRL –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å, —Å—Ä–∞–≤–Ω–∏–º—É—é —Å RL –Ω–∞ —Ä–∞–∑–º–µ—á–µ–Ω–Ω—ã—Ö —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö, –∏ –≤ –Ω–µ–∫–æ—Ç–æ—Ä—ã—Ö —Å–ª—É—á–∞—è—Ö –ø—Ä–µ–≤–æ—Å—Ö–æ–¥–∏—Ç –∏—Ö.




 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [00:53<00:31,  7.99s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É TTRL —Å–ø–æ—Å–æ–±–µ–Ω —Ä–∞–±–æ—Ç–∞—Ç—å –¥–∞–∂–µ –ø—Ä–∏ –Ω–µ—Ç–æ—á–Ω—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö –º–µ—Ç–æ–∫?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ü–æ—Ç–æ–º—É —á—Ç–æ –Ω–∞–≥—Ä–∞–¥—ã –±–æ–ª–µ–µ –ø–ª–æ—Ç–Ω—ã–µ, —á–µ–º –º–µ—Ç–∫–∏, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –ø–æ–ª—É—á–∞—Ç—å –ø–æ–ª–µ–∑–Ω—ã–µ —Å–∏–≥–Ω–∞–ª—ã –æ–±—É—á–µ–Ω–∏—è –¥–∞–∂–µ –ø—Ä–∏ –Ω–µ—Ç–æ—á–Ω—ã—Ö –æ—Ü–µ–Ω–∫–∞—Ö –º–µ—Ç–æ–∫.




 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [01:03<00:25,  8.60s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –º–æ–¥–µ–ª–∏ –∏ –±–µ–Ω—á–º–∞—Ä–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è –æ—Ü–µ–Ω–∫–∏ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ú–æ–¥–µ–ª–∏: Qwen2.5-Math-7B, DeepSeek-R1. –ë–µ–Ω—á–º–∞—Ä–∫–∏: AIME 2024, AMC, MATH-500.




 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [01:17<00:20, 10.38s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ñ–∞–∫—Ç–æ—Ä—ã –º–æ–≥—É—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ —Å–±–æ—é –∏–ª–∏ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ù–µ–¥–æ—Å—Ç–∞—Ç–æ–∫ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω—ã—Ö –∑–Ω–∞–Ω–∏–π –æ —Ü–µ–ª–µ–≤–æ–π –∑–∞–¥–∞—á–µ, —Å–ª–æ–∂–Ω–æ—Å—Ç—å —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö, –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç—å –æ—Ç –ø—Ä–∏–æ—Ä–æ–≤, —Ä–∏—Å–∫–∏ –∫–æ–ª–ª–∞–ø—Å–∞, –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω–æ—Å—Ç—å –º–µ—Ç–æ–¥–∞ –æ—Ü–µ–Ω–∫–∏ –º–µ—Ç–æ–∫ —á–µ—Ä–µ–∑ –≥–æ–ª–æ—Å–æ–≤–∞–Ω–∏–µ –∏ —Ä–µ–¥–∫–æ—Å—Ç—å –∏ –Ω–µ–∏–∑–≤–µ—Å—Ç–Ω–æ—Å—Ç—å —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö.




 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [01:28<00:10, 10.51s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ TTRL –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è –ø—Ä–∏ —É–≤–µ–ª–∏—á–µ–Ω–∏–∏ —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: TTRL –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç—Å—è —Å —É–≤–µ–ª–∏—á–µ–Ω–∏–µ–º —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏, —Ç–∞–∫ –∫–∞–∫ –±–æ–ª–µ–µ –∫—Ä—É–ø–Ω—ã–µ –º–æ–¥–µ–ª–∏ —Å–ø–æ—Å–æ–±–Ω—ã –≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞—Ç—å –±–æ–ª–µ–µ —Ç–æ—á–Ω—ã–µ –Ω–∞–≥—Ä–∞–¥—ã –ø—Ä–∏ –≥–æ–ª–æ—Å–æ–≤–∞–Ω–∏–∏ –±–æ–ª—å—à–∏–Ω—Å—Ç–≤–æ–º, —á—Ç–æ –ø—Ä–∏–≤–æ–¥–∏—Ç –∫ –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–º—É –æ–±—É—á–µ–Ω–∏—é –Ω–∞ –Ω–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:35<00:00,  9.51s/it]

–í–æ–ø—Ä–æ—Å: –° –∫–∞–∫–∏–º–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ –æ–±—É—á–µ–Ω–∏—è —Å –ø–æ–¥–∫—Ä–µ–ø–ª–µ–Ω–∏–µ–º —Å–æ–≤–º–µ—Å—Ç–∏–º TTRL –∏ –∫–∞–∫–∏–µ –∏–∑ –Ω–∏—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: TTRL —Å–æ–≤–º–µ—Å—Ç–∏–º —Å –∞–ª–≥–æ—Ä–∏—Ç–º–∞–º–∏ PPO –∏ GRPO, –∏ –≤ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è PPO.







In [19]:
times_2 = times

In [20]:
np.mean(times_2)

np.float64(6.568811774253845)

In [21]:
df = pd.read_csv('test_results/article_2_questions.csv')

In [22]:
df['Qwen3'] = outputs

In [23]:
df.to_csv('test_results/article_2_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks

In [24]:
questions = ['–ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –≤ RAG?',
             '–ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –∏ –∫–∞–∫ –æ–Ω–∞ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ—Ç—Å—è?',
             '–ß–µ–º –æ—Ç–ª–∏—á–∞—é—Ç—Å—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã RAG-Token –∏ RAG-Sequence?',
             '–ö–∞–∫–æ–π –æ–±—ä–µ–º –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –≤–µ–∫—Ç–æ—Ä–Ω–æ–º –∏–Ω–¥–µ–∫—Å–µ Wikipedia?',
             '–ö–∞–∫–∏–µ –∑–∞–¥–∞—á–∏ –±—ã–ª–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω—ã –¥–ª—è –æ—Ü–µ–Ω–∫–∏ RAG-–º–æ–¥–µ–ª–µ–π?',
             '–ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ–º –ø—Ä–∏ –æ—Ç–≤–µ—Ç–µ –Ω–∞ –≤–æ–ø—Ä–æ—Å—ã?',
             '–ö–∞–∫—É—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ–∫–∞–∑–∞–ª RAG –Ω–∞ –∑–∞–¥–∞—á–µ Jeopardy Question Generation –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å BART?',
             '–ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å–æ–≤–º–µ—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG?',
             '–ö–∞–∫–æ–π —ç—Ñ—Ñ–µ–∫—Ç –¥–∞—ë—Ç "hot-swapping" –∏–Ω–¥–µ–∫—Å–∞ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ RAG?',
             '–ö–∞–∫–∏–µ –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –ø—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –≤ RAG-Sequence –∏ RAG-Token?'
            ]

In [25]:
arxiv_url = "https://arxiv.org/pdf/2005.11401"
arxiv_id = extract_arxiv_id(arxiv_url)

In [26]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    messages = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, top_p=None, temperature=None, top_k=None)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:05<00:51,  5.73s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –≤ RAG?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: RAG-Sequence –º–æ–¥–µ–ª—å.




 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:15<01:05,  8.24s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –∏ –∫–∞–∫ –æ–Ω–∞ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ—Ç—Å—è?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í RAG –≤ –∫–∞—á–µ—Å—Ç–≤–µ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è Dense Passage Retriever (DPR), –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–π —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–∏—è –Ω–∞ –¥–∞–Ω–Ω—ã—Ö Natural Questions –∏ TriviaQA.




 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:27<01:08,  9.73s/it]

–í–æ–ø—Ä–æ—Å: –ß–µ–º –æ—Ç–ª–∏—á–∞—é—Ç—Å—è –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã RAG-Token –∏ RAG-Sequence?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ê—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ RAG-Token –∏—Å–ø–æ–ª—å–∑—É–µ—Ç —Ç–æ–∫–µ–Ω–∏–∑–∞—Ü–∏—é –¥–ª—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–∞, –∞ RAG-Sequence ‚Äî –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω—É—é –≥–µ–Ω–µ—Ä–∞—Ü–∏—é, –≥–¥–µ –∫–∞–∂–¥—ã–π —à–∞–≥ –∑–∞–≤–∏—Å–∏—Ç –æ—Ç –ø—Ä–µ–¥—ã–¥—É—â–∏—Ö.




 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [00:34<00:52,  8.70s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –æ–±—ä–µ–º –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ –≤–µ–∫—Ç–æ—Ä–Ω–æ–º –∏–Ω–¥–µ–∫—Å–µ Wikipedia?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –û–±—ä–µ–º –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ –≤–µ–∫—Ç–æ—Ä–Ω–æ–º –∏–Ω–¥–µ–∫—Å–µ Wikipedia —Å–æ—Å—Ç–∞–≤–ª—è–µ—Ç 21 –º–∏–ª–ª–∏–æ–Ω.




 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [00:44<00:46,  9.34s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∑–∞–¥–∞—á–∏ –±—ã–ª–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω—ã –¥–ª—è –æ—Ü–µ–Ω–∫–∏ RAG-–º–æ–¥–µ–ª–µ–π?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –î–ª—è –æ—Ü–µ–Ω–∫–∏ RAG-–º–æ–¥–µ–ª–µ–π –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –∑–∞–¥–∞—á–∏, —Å–≤—è–∑–∞–Ω–Ω—ã–µ —Å –æ—Ç–∫—Ä—ã—Ç—ã–º –¥–æ–º–µ–Ω–æ–º, –≤–∫–ª—é—á–∞—è QA (question answering) –Ω–∞ –∑–∞–¥–∞—á–∞—Ö, —Ç–∞–∫–∏—Ö –∫–∞–∫ TQA, Natural Questions –∏ TriviaQA.




 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [00:57<00:42, 10.52s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ–º –ø—Ä–∏ –æ—Ç–≤–µ—Ç–µ –Ω–∞ –≤–æ–ø—Ä–æ—Å—ã?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ê–≤—Ç–æ—Ä—ã —Å—á–∏—Ç–∞—é—Ç –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –ø—Ä–µ–¥–ø–æ—á—Ç–∏—Ç–µ–ª—å–Ω–æ–π –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ–º, –ø–æ—Ç–æ–º—É —á—Ç–æ –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –ø–æ–∑–≤–æ–ª—è–µ—Ç –ø–æ–ª—É—á–∞—Ç—å –±–æ–ª–µ–µ —Ç–æ—á–Ω—ã–µ –∏ –∫–æ–Ω–∫—Ä–µ—Ç–Ω—ã–µ –æ—Ç–≤–µ—Ç—ã, –¥–∞–∂–µ –µ—Å–ª–∏ –¥–æ–∫—É–º–µ–Ω—Ç—ã —Å–æ–¥–µ—Ä–∂–∞—Ç –ø–æ–¥—Å–∫–∞–∑–∫–∏, –Ω–æ –Ω–µ —Å–æ–¥–µ—Ä–∂–∞—Ç –æ—Ç–≤–µ—Ç–∞ –Ω–∞–ø—Ä—è–º—É—é, —á—Ç–æ –Ω–µ–≤–æ–∑–º–æ–∂–Ω–æ —Å –ø–æ–º–æ—â—å—é —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã—Ö –∏–∑–≤–ª–µ–∫–∞—é—â–∏—Ö –º–µ—Ç–æ–¥–æ–≤.




 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [01:05<00:28,  9.65s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫—É—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –ø–æ–∫–∞–∑–∞–ª RAG –Ω–∞ –∑–∞–¥–∞—á–µ Jeopardy Question Generation –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å BART?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: RAG –ø–æ–∫–∞–∑–∞–ª –ª—É—á—à—É—é –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å, —á–µ–º BART, –Ω–∞ –∑–∞–¥–∞—á–µ Jeopardy Question Generation –ø–æ –º–µ—Ç—Ä–∏–∫–µ Q-BLEU-1.




 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [01:21<00:23, 11.54s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å–æ–≤–º–µ—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –°–æ–≤–º–µ—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —á–µ—Ä–µ–∑ –º–∏–Ω–∏–º–∏–∑–∞—Ü–∏—é –æ—Ç—Ä–∏—Ü–∞—Ç–µ–ª—å–Ω–æ–≥–æ –º–∞—Ä–≥–∏–Ω–∞–ª—å–Ω–æ–≥–æ –ª–æ–≥–∞—Ä–∏—Ñ–º–∏—á–µ—Å–∫–æ–≥–æ –ø—Ä–∞–≤–¥–æ–ø–æ–¥–æ–±–∏—è —Ü–µ–ª–µ–≤—ã—Ö –≤—ã—Ö–æ–¥–æ–≤ —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º —Å—Ç–æ—Ö–∞—Å—Ç–∏—á–µ—Å–∫–æ–≥–æ –≥—Ä–∞–¥–∏–µ–Ω—Ç–Ω–æ–≥–æ —Å–ø—É—Å–∫–∞, –ø—Ä–∏ —ç—Ç–æ–º –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤—ã–π —ç–Ω–∫–æ–¥–µ—Ä —Ñ–∏–∫—Å–∏—Ä—É–µ—Ç—Å—è, –∞ —Ç–æ–ª—å–∫–æ query —ç–Ω–∫–æ–¥–µ—Ä –∏ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä –ø–æ–¥–≤–µ—Ä–≥–∞—é—Ç—Å—è —Ç–æ–Ω–∫–æ–π –Ω–∞—Å—Ç—Ä–æ–π–∫–µ.




 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [01:31<00:11, 11.20s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π —ç—Ñ—Ñ–µ–∫—Ç –¥–∞—ë—Ç "hot-swapping" –∏–Ω–¥–µ–∫—Å–∞ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ RAG?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: "Hot-swapping" –∏–Ω–¥–µ–∫—Å–∞ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –≤ RAG –ø–æ–∑–≤–æ–ª—è–µ—Ç –æ–±–Ω–æ–≤–ª—è—Ç—å –∑–Ω–∞–Ω–∏—è –º–æ–¥–µ–ª–∏ –≤ —Ä–µ–∂–∏–º–µ —Ä–µ–∞–ª—å–Ω–æ–≥–æ –≤—Ä–µ–º–µ–Ω–∏ –±–µ–∑ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ—Å—Ç–∏ –ø–µ—Ä–µ–æ–±—É—á–µ–Ω–∏—è, —á—Ç–æ –æ–±–µ—Å–ø–µ—á–∏–≤–∞–µ—Ç –∞–∫—Ç—É–∞–ª—å–Ω–æ—Å—Ç—å –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –ø—Ä–∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–∏ —Å—Ç—Ä—É–∫—Ç—É—Ä—ã –∏ –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤ –º–æ–¥–µ–ª–∏.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:44<00:00, 10.40s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –ø—Ä–∏ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–∏ –≤ RAG-Sequence –∏ RAG-Token?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í RAG-Sequence –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏—è top-K –¥–ª—è –º–∞—Ä–≥–∏–Ω–∞–ª–∏–∑–∞—Ü–∏–∏ –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–∏, –∞ –≤ RAG-Token ‚Äî –∞–ø–ø—Ä–æ–∫—Å–∏–º–∞—Ü–∏—è —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º top-K –∏ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ–≥–æ –¥–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏—è —Å —É—á–µ—Ç–æ–º —Ä–µ—Ç—Ä–∏–≤–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤.







In [27]:
times_3 = times

In [28]:
np.mean(times_3)

np.float64(7.817001175880432)

In [29]:
df = pd.read_csv('test_results/article_3_questions.csv')

In [30]:
df['Qwen3'] = outputs

In [31]:
df.to_csv('test_results/article_3_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è Efficient Estimation of Word Representations in Vector Space

In [32]:
questions = ['–ö–∞–∫–∏–µ –¥–≤–µ –º–æ–¥–µ–ª–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä –±—ã–ª–∏ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –∞–≤—Ç–æ—Ä–∞–º–∏ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤?',
             '–í —á—ë–º –æ—Å–Ω–æ–≤–Ω–æ–µ —Ä–∞–∑–ª–∏—á–∏–µ –º–µ–∂–¥—É –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏ CBOW –∏ Skip-gram?',
             '–ö–∞–∫–∏–µ —Ç–∏–ø—ã –æ—Ç–Ω–æ—à–µ–Ω–∏–π –≤–∫–ª—é—á–µ–Ω—ã –≤ Semantic-Syntactic Word Relationship test set?',
             '–ö–∞–∫–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –ø–æ–∫–∞–∑–∞–ª–∞ –º–æ–¥–µ–ª—å Skip-gram –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –ø–æ–¥–∑–∞–¥–∞—á–∞—Ö?',
             '–ö–∞–∫–æ–π –ø–æ–¥—Ö–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ Skip-gram –º–æ–¥–µ–ª–∏ –¥–ª—è –≤—ã–±–æ—Ä–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω—ã—Ö —Å–ª–æ–≤?',
             '–ö–∞–∫–æ–≤–∞ —Ñ–æ—Ä–º—É–ª–∞ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ CBOW?',
             '–ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –≤ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö?',
             '–ö–∞–∫ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∞—Å—å —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∞ –º–æ–¥–µ–ª–µ–π –≤ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω–æ–π —Å–∏—Å—Ç–µ–º–µ DistBelief?',
             '–ö–∞–∫ –±—ã–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω—ã –≤–æ–ø—Ä–æ—Å—ã –≤ —Ç–µ—Å—Ç–æ–≤–æ–º –Ω–∞–±–æ—Ä–µ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ word vectors?',
             '–ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –æ—Ç–≤–µ—Ç–∞ –Ω–∞ –∞–Ω–∞–ª–æ–≥–∏–∏ –≤—Ä–æ–¥–µ ‚Äúking - man + woman = ?‚Äù?'
            ]

In [33]:
arxiv_url = "https://arxiv.org/pdf/1301.3781"
arxiv_id = extract_arxiv_id(arxiv_url)

In [34]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    messages = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, top_p=None, temperature=None, top_k=None)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:10<01:30, 10.07s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –¥–≤–µ –º–æ–¥–µ–ª–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä –±—ã–ª–∏ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω—ã –∞–≤—Ç–æ—Ä–∞–º–∏ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–π —Å–ª–æ–≤?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –î–≤–µ –º–æ–¥–µ–ª–∏ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä, –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–Ω—ã–µ –∞–≤—Ç–æ—Ä–∞–º–∏, ‚Äî —ç—Ç–æ –º–æ–¥–µ–ª—å —Å –ª–∏–Ω–µ–π–Ω—ã–º –ø—Ä–æ–µ–∫—Ü–∏–æ–Ω–Ω—ã–º —Å–ª–æ–µ–º –∏ –Ω–µ–ª–∏–Ω–µ–π–Ω—ã–º —Å–∫—Ä—ã—Ç—ã–º —Å–ª–æ–µ–º, –∞ —Ç–∞–∫–∂–µ –º–æ–¥–µ–ª—å, –æ—Å–Ω–æ–≤–∞–Ω–Ω–∞—è –Ω–∞ –¥—Ä—É–≥–∏—Ö —Ç–∏–ø–∞—Ö –Ω–µ–π—Ä–æ–Ω–Ω—ã—Ö —Å–µ—Ç–µ–π.




 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:28<01:57, 14.72s/it]

–í–æ–ø—Ä–æ—Å: –í —á—ë–º –æ—Å–Ω–æ–≤–Ω–æ–µ —Ä–∞–∑–ª–∏—á–∏–µ –º–µ–∂–¥—É –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏ CBOW –∏ Skip-gram?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –û—Å–Ω–æ–≤–Ω–æ–µ —Ä–∞–∑–ª–∏—á–∏–µ –º–µ–∂–¥—É –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞–º–∏ CBOW –∏ Skip-gram –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ —Ç–æ–º, —á—Ç–æ CBOW –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç —Ç–µ–∫—É—â–µ–µ —Å–ª–æ–≤–æ –Ω–∞ –æ—Å–Ω–æ–≤–µ –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞, –∞ Skip-gram –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç –æ–∫—Ä—É–∂–∞—é—â–∏–µ —Å–ª–æ–≤–∞ –Ω–∞ –æ—Å–Ω–æ–≤–µ —Ç–µ–∫—É—â–µ–≥–æ —Å–ª–æ–≤–∞.




 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:38<01:29, 12.84s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ç–∏–ø—ã –æ—Ç–Ω–æ—à–µ–Ω–∏–π –≤–∫–ª—é—á–µ–Ω—ã –≤ Semantic-Syntactic Word Relationship test set?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í Semantic-Syntactic Word Relationship test set –≤–∫–ª—é—á–µ–Ω—ã –ø—è—Ç—å —Ç–∏–ø–æ–≤ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∏ –¥–µ–≤—è—Ç—å —Ç–∏–ø–æ–≤ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –æ—Ç–Ω–æ—à–µ–Ω–∏–π.




 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [00:50<01:14, 12.41s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –ø–æ–∫–∞–∑–∞–ª–∞ –º–æ–¥–µ–ª—å Skip-gram –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏—Ö –∏ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–∏—Ö –ø–æ–¥–∑–∞–¥–∞—á–∞—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ú–æ–¥–µ–ª—å Skip-gram –ø–æ–∫–∞–∑–∞–ª–∞ —Ç–æ—á–Ω–æ—Å—Ç—å 50.0% –Ω–∞ —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–æ–π –ø–æ–¥–∑–∞–¥–∞—á–µ –∏ 55.9% –Ω–∞ —Å–∏–Ω—Ç–∞–∫—Å–∏—á–µ—Å–∫–æ–π –ø–æ–¥–∑–∞–¥–∞—á–µ.




 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [00:56<00:51, 10.28s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –ø–æ–¥—Ö–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ Skip-gram –º–æ–¥–µ–ª–∏ –¥–ª—è –≤—ã–±–æ—Ä–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω—ã—Ö —Å–ª–æ–≤?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –°—ç–º–ø–ª–∏—Ä–æ–≤–∞–Ω–∏–µ —Å –≤–µ—Å–∞–º–∏, –≥–¥–µ –±–æ–ª–µ–µ –±–ª–∏–∑–∫–∏–µ —Å–ª–æ–≤–∞ –∏–º–µ—é—Ç –±–æ–ª—å—à–∏–π –≤–µ—Å.




 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [01:05<00:38,  9.55s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–≤–∞ —Ñ–æ—Ä–º—É–ª–∞ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ CBOW?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –§–æ—Ä–º—É–ª–∞ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ CBOW: $ Q = C \times (D + D \times \log_2(V)) $.




 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [01:12<00:26,  8.96s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É –∞–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è –≤ –Ω–æ–≤—ã—Ö –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞—Ö?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ê–≤—Ç–æ—Ä—ã –æ—Ç–∫–∞–∑–∞–ª–∏—Å—å –æ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–∫—Ä—ã—Ç–æ–≥–æ —Å–ª–æ—è, —á—Ç–æ–±—ã —É–º–µ–Ω—å—à–∏—Ç—å –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω—É—é —Å–ª–æ–∂–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏ –∏ –ø–æ–≤—ã—Å–∏—Ç—å —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç—å –æ–±—É—á–µ–Ω–∏—è –Ω–∞ –±–æ–ª—å—à–∏—Ö –æ–±—ä–µ–º–∞—Ö –¥–∞–Ω–Ω—ã—Ö.




 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [01:23<00:18,  9.44s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∞—Å—å —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∞ –º–æ–¥–µ–ª–µ–π –≤ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª—ë–Ω–Ω–æ–π —Å–∏—Å—Ç–µ–º–µ DistBelief?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ú–æ–¥–µ–ª–∏ –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∏—Å—å —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º 50 –¥–æ 100 —Ä–µ–ø–ª–∏–∫ –º–æ–¥–µ–ª–∏, –∫–∞–∂–¥–∞—è –∏–∑ –∫–æ—Ç–æ—Ä—ã—Ö –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∞ –Ω–µ—Å–∫–æ–ª—å–∫–æ CPU-—è–¥–µ—Ä –Ω–∞ —Ä–∞–∑–Ω—ã—Ö –º–∞—à–∏–Ω–∞—Ö –≤ —Ü–µ–Ω—Ç—Ä–µ –¥–∞–Ω–Ω—ã—Ö.




 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [01:34<00:10, 10.01s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –±—ã–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω—ã –≤–æ–ø—Ä–æ—Å—ã –≤ —Ç–µ—Å—Ç–æ–≤–æ–º –Ω–∞–±–æ—Ä–µ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ word vectors?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í–æ–ø—Ä–æ—Å—ã –≤ —Ç–µ—Å—Ç–æ–≤–æ–º –Ω–∞–±–æ—Ä–µ –±—ã–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω—ã –≤ –¥–≤–∞ —ç—Ç–∞–ø–∞: —Å–Ω–∞—á–∞–ª–∞ —Å–æ–∑–¥–∞–≤–∞–ª–∞—Å—å —Ä—É—á–Ω–∞—è —Å–ø–∏—Å–æ–∫ –ø–∞—Ä —Å—Ö–æ–∂–∏—Ö —Å–ª–æ–≤, –∞ –∑–∞—Ç–µ–º —Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–ª—Å—è –±–æ–ª—å—à–æ–π —Å–ø–∏—Å–æ–∫ –≤–æ–ø—Ä–æ—Å–æ–≤ –ø—É—Ç—ë–º —Å–ª—É—á–∞–π–Ω–æ–≥–æ —Å–æ–µ–¥–∏–Ω–µ–Ω–∏—è –¥–≤—É—Ö –ø–∞—Ä —Å–ª–æ–≤.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:45<00:00, 10.53s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –æ—Ç–≤–µ—Ç–∞ –Ω–∞ –∞–Ω–∞–ª–æ–≥–∏–∏ –≤—Ä–æ–¥–µ ‚Äúking - man + woman = ?‚Äù?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ú–µ—Ç–æ–¥, –∏—Å–ø–æ–ª—å–∑—É–µ–º—ã–π –¥–ª—è –æ—Ç–≤–µ—Ç–∞ –Ω–∞ –∞–Ω–∞–ª–æ–≥–∏–∏ –≤—Ä–æ–¥–µ ‚Äúking - man + woman =?‚Äù, ‚Äî —ç—Ç–æ –º–µ—Ç–æ–¥ –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö –æ–ø–µ—Ä–∞—Ü–∏–π, –æ—Å–Ω–æ–≤–∞–Ω–Ω—ã–π –Ω–∞ –∞–ª–≥–µ–±—Ä–∞–∏—á–µ—Å–∫–∏—Ö –¥–µ–π—Å—Ç–≤–∏—è—Ö —Å –≤–µ–∫—Ç–æ—Ä–∞–º–∏ —Å–ª–æ–≤.







In [35]:
times_4 = times

In [36]:
np.mean(times_4)

np.float64(7.517814183235169)

In [37]:
df = pd.read_csv('test_results/article_4_questions.csv')

In [38]:
df['Qwen3'] = outputs

In [39]:
df.to_csv('test_results/article_4_questions.csv', index=False)

#### –°—Ç–∞—Ç—å—è LADDER: SELF-IMPROVING LLMS THROUGH RECURSIVE PROBLEM DECOMPOSITION

In [40]:
questions = ['–í —á—ë–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –∫–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER?',
             '–ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER?',
             '–ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –≤ LADDER?',
             '–ß—Ç–æ —Ç–∞–∫–æ–µ TTRL –∏ —á–µ–º –æ–Ω –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç LADDER?',
             '–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã –º–æ–¥–µ–ª—å—é Qwen2.5 7B –Ω–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee –ø–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL?',
             '–ö–∞–∫–∞—è —Ñ—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ GRPO –≤ —Ä–∞–º–∫–∞—Ö –æ–±—É—á–µ–Ω–∏—è –≤ LADDER?',
             '–ü–æ—á–µ–º—É RL –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –ø–ª–æ—Ö–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è?',
             '–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏—è –≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã—Ö –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?',
             '–ö–∞–∫ LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ –±–µ–∑ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?',
             '–ö–∞–∫–∏–µ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –∏–ª–∏ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –∑–∞–º–µ—á–µ–Ω—ã –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?'
            ]

In [41]:
arxiv_url = "https://arxiv.org/pdf/2503.00735"
arxiv_id = extract_arxiv_id(arxiv_url)

In [42]:
outputs = []
times = []

for question in tqdm(questions):
    reranked = retrieve_and_rerank(question, vectorstore, reranker, arxiv_id, top_k=10, top_n=2)
    top_chunks = [doc.page_content for doc, score in reranked]
    context = "\n\n".join(top_chunks)

    messages = build_messages(question, context)
    messages = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
    start = time.time()

    answer = llm(messages, max_new_tokens=256, do_sample=False, top_p=None, temperature=None, top_k=None)[0]['generated_text']

    end = time.time()
    inf_time = end - start
    
    times.append(inf_time)
    print(f'–í–æ–ø—Ä–æ—Å: {question}\n')
    print(f'–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: {answer}\n\n')
    outputs.append(answer)

 10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                                                                          | 1/10 [00:09<01:25,  9.55s/it]

–í–æ–ø—Ä–æ—Å: –í —á—ë–º –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –∫–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ö–ª—é—á–µ–≤–∞—è –∏–¥–µ—è —Ñ—Ä–µ–π–º–≤–æ—Ä–∫–∞ LADDER –∑–∞–∫–ª—é—á–∞–µ—Ç—Å—è –≤ –∞–≤—Ç–æ–Ω–æ–º–Ω–æ–º —É–ª—É—á—à–µ–Ω–∏–∏ —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–µ–π —Ä–µ—à–∞—Ç—å –∑–∞–¥–∞—á–∏ —á–µ—Ä–µ–∑ —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–µ —Ä–∞–∑–ª–æ–∂–µ–Ω–∏–µ —Å–ª–æ–∂–Ω—ã—Ö –∑–∞–¥–∞—á –Ω–∞ –±–æ–ª–µ–µ –ø—Ä–æ—Å—Ç—ã–µ –≤–∞—Ä–∏–∞–Ω—Ç—ã –∏ –æ–±—É—á–µ–Ω–∏–µ –Ω–∞ –∏—Ö —Ä–µ—à–µ–Ω–∏—è—Ö.




 20%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                                                  | 2/10 [00:16<01:05,  8.18s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –º–µ—Ç–æ–¥ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ú–µ—Ç–æ–¥ —á–∏—Å–ª–µ–Ω–Ω–æ–π –∏–Ω—Ç–µ–≥—Ä–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç–∏ —Ä–µ—à–µ–Ω–∏–π –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤ –≤ LADDER.




 30%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                                                          | 3/10 [00:26<01:01,  8.78s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è –≥–µ–Ω–µ—Ä–∞—Ü–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –≤ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –í–∞—Ä–∏–∞–Ω—Ç—ã –∑–∞–¥–∞—á –≤ LADDER –≥–µ–Ω–µ—Ä–∏—Ä—É—é—Ç—Å—è —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º —Ç—Ä—ë—Ö—ç—Ç–∞–ø–Ω–æ–≥–æ –º–µ—Ç–æ–¥–∞, –æ–ø–∏—Å–∞–Ω–Ω–æ–≥–æ –≤ —Ä–∞–∑–¥–µ–ª–µ 3.1.2, –≥–¥–µ –¥–ª—è –∫–∞–∂–¥–æ–≥–æ –∏–Ω—Ç–µ–≥—Ä–∞–ª–∞ –∏–∑ –æ–±—É—á–∞—é—â–µ–≥–æ –Ω–∞–±–æ—Ä–∞ —Å–æ–∑–¥–∞—ë—Ç—Å—è –¥–µ—Ä–µ–≤–æ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤.




 40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                 | 4/10 [00:43<01:13, 12.25s/it]

–í–æ–ø—Ä–æ—Å: –ß—Ç–æ —Ç–∞–∫–æ–µ TTRL –∏ —á–µ–º –æ–Ω –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: TTRL ‚Äî —ç—Ç–æ –º–µ—Ç–æ–¥ —Ç–µ—Å—Ç–æ–≤–æ–≥–æ –≤—Ä–µ–º–µ–Ω–∏, –∫–æ—Ç–æ—Ä—ã–π –∏—Å–ø–æ–ª—å–∑—É–µ—Ç —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–µ —Ä–∞–∑–ª–æ–∂–µ–Ω–∏–µ –∑–∞–¥–∞—á –¥–ª—è —É–ª—É—á—à–µ–Ω–∏—è —Å–ø–æ—Å–æ–±–Ω–æ—Å—Ç–∏ –º–æ–¥–µ–ª–∏ —Ä–µ—à–∞—Ç—å —Å–ª–æ–∂–Ω—ã–µ –∑–∞–¥–∞—á–∏. LADDER ‚Äî —ç—Ç–æ —Å—Ç—Ä—É–∫—Ç—É—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã–π —Ñ—Ä–µ–π–º–≤–æ—Ä–∫, –æ—Å–Ω–æ–≤–∞–Ω–Ω—ã–π –Ω–∞ —Ä–µ–∫—É—Ä—Å–∏–≤–Ω–æ–º —Ä–∞–∑–ª–æ–∂–µ–Ω–∏–∏ –∑–∞–¥–∞—á –∏ —É—Å–∏–ª–µ–Ω–∏–∏ –æ–±—É—á–µ–Ω–∏—è, –≤–∫–ª—é—á–∞—é—â–∏–π –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤, –ø—Ä–æ–≤–µ—Ä–∫—É —Ä–µ—à–µ–Ω–∏–π –∏ –æ–±—É—á–µ–Ω–∏–µ —Å —É—Å–∏–ª–µ–Ω–∏–µ–º. TTRL —É–ª—É—á—à–∞–µ—Ç LADDER, –ø—Ä–∏–º–µ–Ω—è—è —Ä–∞–∑–ª–æ–∂–µ–Ω–∏–µ –∑–∞–¥–∞—á –Ω–µ–ø–æ—Å—Ä–µ–¥—Å—Ç–≤–µ–Ω–Ω–æ –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –≤–æ–ø—Ä–æ—Å–∞—Ö, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ —Ä–µ—à–∞—Ç—å –±–æ–ª–µ–µ —Å–ª–æ–∂–Ω—ã–µ –∑–∞–¥–∞—á–∏.




 50%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                                         | 5/10 [00:53<00:56, 11.25s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã –º–æ–¥–µ–ª—å—é Qwen2.5 7B –Ω–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee –ø–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ú–æ–¥–µ–ª—å Qwen2.5 7B –¥–æ—Å—Ç–∏–≥–ª–∞ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞ 90% –Ω–∞ —ç–∫–∑–∞–º–µ–Ω–µ MIT Integration Bee –ø–æ—Å–ª–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è LADDER –∏ TTRL.




 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                                 | 6/10 [01:01<00:41, 10.37s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∞—è —Ñ—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –≤ GRPO –≤ —Ä–∞–º–∫–∞—Ö –æ–±—É—á–µ–Ω–∏—è –≤ LADDER?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –§—É–Ω–∫—Ü–∏—è –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è, –∏—Å–ø–æ–ª—å–∑—É–µ–º–∞—è –≤ GRPO –≤ —Ä–∞–º–∫–∞—Ö –æ–±—É—á–µ–Ω–∏—è –≤ LADDER, ‚Äî —ç—Ç–æ —Ñ—É–Ω–∫—Ü–∏—è, –æ—Å–Ω–æ–≤–∞–Ω–Ω–∞—è –Ω–∞ –æ—Ü–µ–Ω–∫–µ –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏ —Ä–µ—à–µ–Ω–∏–π, –∫–æ—Ç–æ—Ä–∞—è –ø—Ä–æ–≤–µ—Ä—è–µ—Ç –ø—Ä–∞–≤–∏–ª—å–Ω–æ—Å—Ç—å –æ—Ç–≤–µ—Ç–æ–≤ –º–æ–¥–µ–ª–∏.




 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                         | 7/10 [01:12<00:31, 10.58s/it]

–í–æ–ø—Ä–æ—Å: –ü–æ—á–µ–º—É RL –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç –ø–ª–æ—Ö–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –Ω–∞ –∑–∞–¥–∞—á–∞—Ö –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏—è?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ü–æ—Ç–æ–º—É —á—Ç–æ RL –±–µ–∑ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –Ω–µ —Å–ø–æ—Å–æ–±–µ–Ω —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ –æ–±—É—á–∞—Ç—å—Å—è —Å–ª–æ–∂–Ω—ã–º –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–º –Ω–∞–≤—ã–∫–∞–º, —Ç–∞–∫–∏–º –∫–∞–∫ –∏–Ω—Ç–µ–≥—Ä–∏—Ä–æ–≤–∞–Ω–∏–µ, –∏–∑-–∑–∞ –æ—Ç—Å—É—Ç—Å—Ç–≤–∏—è —Å—Ç—Ä—É–∫—Ç—É—Ä–∏—Ä–æ–≤–∞–Ω–Ω–æ–≥–æ –≥—Ä–∞–¥–∏–µ–Ω—Ç–∞ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏, —á—Ç–æ –ø—Ä–∏–≤–æ–¥–∏—Ç –∫ –ø–ª–æ—Ö–∏–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞–º.




 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                | 8/10 [01:22<00:20, 10.36s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –¥–ª—è —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏—è –≥–µ–Ω–µ—Ä–∏—Ä—É–µ–º—ã—Ö –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ò—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Å–ª–µ–¥—É—é—â–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏: —Å–ª—É—á–∞–π–Ω–æ–µ –≤—ã–±–æ—Ä–∫–∞ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–π, —Ü–∏–∫–ª–∏—á–µ—Å–∫–æ–µ –∏–∑–º–µ–Ω–µ–Ω–∏–µ —Ç–µ–º–ø–µ—Ä–∞—Ç—É—Ä—ã –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –∏ –ø–µ—Ä—Å–æ–Ω–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –ø—Ä–æ–º–ø—Ç—ã —Å —Ä–∞–∑–Ω—ã–º–∏ –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–º–∏ –ø–µ—Ä—Å–ø–µ–∫—Ç–∏–≤–∞–º–∏.




 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã        | 9/10 [01:32<00:10, 10.25s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ –±–µ–∑ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: LADDER –º–∞—Å—à—Ç–∞–±–∏—Ä—É–µ—Ç –æ–±—É—á–µ–Ω–∏–µ, –ø–æ–∑–≤–æ–ª—è—è –º–æ–¥–µ–ª—è–º –¥–∏–Ω–∞–º–∏—á–µ—Å–∫–∏ —Ä–∞–∑–±–∏–≤–∞—Ç—å —Å–ª–æ–∂–Ω—ã–µ –∑–∞–¥–∞—á–∏ –Ω–∞ –±–æ–ª–µ–µ –ø—Ä–æ—Å—Ç—ã–µ –∏ –æ–±—É—á–∞—Ç—å—Å—è –Ω–∞ –Ω–∏—Ö –≤–æ –≤—Ä–µ–º—è –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É–ª—É—á—à–∞—Ç—å –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –±–µ–∑ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –º–æ–¥–µ–ª–∏ –∏–ª–∏ –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–∏—è.




100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:42<00:00, 10.27s/it]

–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏—è –∏–ª–∏ —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –±—ã–ª–∏ –∑–∞–º–µ—á–µ–Ω—ã –ø—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á?

–û—Ç–≤–µ—Ç –Ω–∞ –≤–æ–ø—Ä–æ—Å: –ü—Ä–∏ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∑–∞–¥–∞—á –Ω–∞–±–ª—é–¥–∞–ª–∏—Å—å —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ —Å –∫–æ–Ω—Ç—Ä–æ–ª–µ–º —É—Ä–æ–≤–Ω—è —Å–ª–æ–∂–Ω–æ—Å—Ç–∏, —Ç–∞–∫ –∫–∞–∫ –Ω–µ–±–æ–ª—å—à–∏–µ –∏–∑–º–µ–Ω–µ–Ω–∏—è –≤ –∫–æ—ç—Ñ—Ñ–∏—Ü–∏–µ–Ω—Ç–∞—Ö –∏–ª–∏ —Ñ—É–Ω–∫—Ü–∏–æ–Ω–∞–ª—å–Ω–æ–π —Å–æ—Å—Ç–∞–≤–ª—è—é—â–µ–π –º–æ–≥–ª–∏ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ —É–≤–µ–ª–∏—á–∏–≤–∞—Ç—å —Å–ª–æ–∂–Ω–æ—Å—Ç—å –∏–Ω—Ç–µ–≥—Ä–∞–ª–æ–≤.







In [43]:
times_5 = times

In [44]:
np.mean(times_5)

np.float64(8.391678547859192)

In [45]:
df = pd.read_csv('test_results/article_5_questions.csv')

In [46]:
df['Qwen3'] = outputs

In [47]:
df.to_csv('test_results/article_5_questions.csv', index=False)

In [48]:
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–∞ Qwen3: {np.mean(times_1 + times_2 + times_3 + times_4 + times_5):.1f} —Å–µ–∫')

–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–∞ Qwen3: 7.7 —Å–µ–∫


# –°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–æ–≤

In [49]:
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–æ–≤ Tbank: 8.5 —Å–µ–∫')
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–æ–≤ Yandex: 4.2 —Å–µ–∫')
print(f'–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–æ–≤ Qwen3: 7.7 —Å–µ–∫')

–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–æ–≤ Tbank: 8.5 —Å–µ–∫
–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–æ–≤ Yandex: 4.2 —Å–µ–∫
–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ—Ç–≤–µ—Ç–æ–≤ Qwen3: 7.7 —Å–µ–∫


## –°–æ–±–∏—Ä–∞–µ–º –≤—Å–µ –æ—Ç–≤–µ—Ç—ã –≤ –æ–¥–∏–Ω –¥–∞—Ç–∞—Ñ—Ä–µ–π–º

In [52]:
df1 = pd.read_csv('test_results/article_1_questions.csv')
df2 = pd.read_csv('test_results/article_2_questions.csv')
df3 = pd.read_csv('test_results/article_3_questions.csv')
df4 = pd.read_csv('test_results/article_4_questions.csv')
df5 = pd.read_csv('test_results/article_5_questions.csv')

In [53]:
df = pd.concat([df1, df2, df3, df4, df5])

In [57]:
df.to_csv('test_results/article_questions_all.csv', index=False)

## LLM as a judge

In [1]:
import pandas as pd

df = pd.read_csv('article_questions_all.csv')

In [2]:
df

Unnamed: 0,Questions,gt,Tbank,YandexGPT,Qwen3,gen_tbank_rank,gen_yandex_rank,gen_qwen_rank,sim_scores_tbank,sim_scores_yandex,sim_scores_qwen
0,–ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏...,–í –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω–æ–π —Å—Ç–∞—Ç—å–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è Adam —Å –ø–∞—Ä...,–í –¥–∞–Ω–Ω–æ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä Ad...,–ú—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏ –æ–ø—Ç–∏–º–∏–∑–∞—Ç–æ—Ä Adam —Å –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º...,"–û–ø—Ç–∏–º–∞–π–∑–µ—Ä ‚Äî Adam, –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã: Œ≤‚ÇÅ = 0.9, Œ≤...",1,2,2,0.669123,0.570089,0.50041
1,–ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transf...,"Transformer –∑–∞–º–µ–Ω—è–µ—Ç RNN –∏–ª–∏ LSTM/GRU, –∏—Å–ø–æ–ª—å–∑...","–í —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö, —Ç...","Transformer –∑–∞–º–µ–Ω—è–µ—Ç —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏, –Ω–∞–∏–±–æ...",–†–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏.,1,2,3,0.695416,0.769922,0.370163
2,–ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º...,"–≠—Ç–æ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏–º–∞–Ω–∏—è, –∫–æ—Ç–æ—Ä—ã–π –≤—ã—á–∏—Å–ª—è–µ—Ç:\nAtt...",–î–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk –≤ Scaled Dot-Product Attention ...,Scaled Dot-Product Attention ‚Äî —ç—Ç–æ –º–µ—Ç–æ–¥ –≤–Ω–∏–º...,Scaled Dot-Product Attention ‚Äî —ç—Ç–æ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω...,3,2,1,0.503097,0.558233,0.644848
3,–ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Tra...,"Encoder: N –æ–¥–∏–Ω–∞–∫–æ–≤—ã—Ö –±–ª–æ–∫–æ–≤ (–æ–±—ã—á–Ω–æ N=6), –∫–∞–∂...",–°—Ç—Ä—É–∫—Ç—É—Ä–∞ Transformer —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ –¥–≤—É—Ö –æ—Å–Ω–æ–≤–Ω—ã—Ö...,–°—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer —Å–æ—Å...,Encoder —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ —Å—Ç–µ–∫–∞ –∏–∑ 6 –∏–¥–µ–Ω—Ç–∏—á–Ω—ã—Ö —Å–ª–æ–µ–≤...,3,2,1,0.355634,0.415566,0.558227
4,–ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏...,"–ü–æ—Ç–æ–º—É —á—Ç–æ –∫–∞–∂–¥–∞—è ""–≥–æ–ª–æ–≤–∞"" –æ–±—É—á–∞–µ—Ç—Å—è —Ñ–æ–∫—É—Å–∏—Ä–æ–≤...",Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å...,Multi-head attention –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ —Å–æ–≤–º–µ—Å—Ç...,Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å...,2,3,1,0.405849,0.377725,0.400534
5,–ö–∞–∫–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ ...,–ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –¥–æ–±–∞–≤–ª—è—é—Ç –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –æ –ø...,"–ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ (PE) –≤ –Ω–µ–π—Ä–æ–Ω–Ω—ã—Ö —Å–µ—Ç—è—Ö, ...",–ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç —Ñ—É–Ω–∫—Ü–∏—é –ø—Ä–µ–¥—Å...,–ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –ø–æ–∑–≤–æ–ª—è—é—Ç –º–æ–¥–µ–ª–∏ —É—á–∏—Ç—ã–≤–∞...,3,2,1,0.602244,0.709361,0.736253
6,–í —á–µ–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é...,Self-attention: O(n¬≤) –ø–æ –¥–ª–∏–Ω–µ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ...,–ü—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–µ—Ä–µ–¥ —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º...,–í –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ —Å–ª–æ–π self-at...,Self-attention –∏–º–µ–µ—Ç –ø–æ—Å—Ç–æ—è–Ω–Ω–æ–µ —á–∏—Å–ª–æ –æ–ø–µ—Ä–∞—Ü–∏–π...,2,3,1,0.619954,0.577273,0.644929
7,–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –ø—Ä–∏...,"Dropout (–≤ attention, feed-forward, embeddings...",–î–ª—è –æ–±—É—á–µ–Ω–∏—è Transformer –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Å–ª–µ–¥—É—é...,–ü—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Ç—Ä–∏ —Ç...,–í –æ–±—É—á–µ–Ω–∏–∏ Transformer –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Ç—Ä–∏ —Ç–∏–ø–∞...,1,2,2,0.34294,0.224844,0.229979
8,–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Transformer –ø–æ–∫–∞–∑–∞–ª –Ω–∞ –∑–∞–¥–∞—á–∞...,EN‚ÜíDE: BLEU 28.4 (vs. –ø—Ä–µ–¥—ã–¥—É—â–∏–µ ~26.4)\nEN‚ÜíFR...,–ù–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ —Å –∞–Ω–≥–ª–∏–π—Å–∫–æ–≥–æ –Ω–∞ —Ñ—Ä–∞–Ω—Ü—É–∑—Å–∫...,–ù–∞ –∑–∞–¥–∞—á–µ –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíFR –ø–æ WMT 2014 –º–æ–¥–µ–ª—å T...,Transformer –ø–æ–∫–∞–∑–∞–ª BLEU-—Å—á–µ—Ç 41.8 –Ω–∞ –∑–∞–¥–∞—á–µ E...,3,2,1,0.672282,0.662603,0.66286
9,–ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é...,–ü–æ—Ç–æ–º—É —á—Ç–æ self-attention –Ω–µ —Ç—Ä–µ–±—É–µ—Ç –ø–æ—Å–ª–µ–¥–æ–≤–∞...,–ü–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—è –≤ Transformer –≤–æ–∑–º–æ–∂–Ω–∞ –∏–∑-–∑–∞ –æ—Ç...,Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –±–æ–ª—å—à–µ –ø–∞—Ä–∞...,–ü–æ—Ç–æ–º—É —á—Ç–æ Transformer –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏...,1,3,2,0.395121,0.322822,0.407397


In [3]:
df

Unnamed: 0,Questions,gt,Tbank,YandexGPT,Qwen3,gen_tbank_rank,gen_yandex_rank,gen_qwen_rank,sim_scores_tbank,sim_scores_yandex,sim_scores_qwen
0,–ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏...,–í –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω–æ–π —Å—Ç–∞—Ç—å–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è Adam —Å –ø–∞—Ä...,–í –¥–∞–Ω–Ω–æ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä Ad...,–ú—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏ –æ–ø—Ç–∏–º–∏–∑–∞—Ç–æ—Ä Adam —Å –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º...,"–û–ø—Ç–∏–º–∞–π–∑–µ—Ä ‚Äî Adam, –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã: Œ≤‚ÇÅ = 0.9, Œ≤...",1,2,2,0.669123,0.570089,0.50041
1,–ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transf...,"Transformer –∑–∞–º–µ–Ω—è–µ—Ç RNN –∏–ª–∏ LSTM/GRU, –∏—Å–ø–æ–ª—å–∑...","–í —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö, —Ç...","Transformer –∑–∞–º–µ–Ω—è–µ—Ç —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏, –Ω–∞–∏–±–æ...",–†–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏.,1,2,3,0.695416,0.769922,0.370163
2,–ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º...,"–≠—Ç–æ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏–º–∞–Ω–∏—è, –∫–æ—Ç–æ—Ä—ã–π –≤—ã—á–∏—Å–ª—è–µ—Ç:\nAtt...",–î–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk –≤ Scaled Dot-Product Attention ...,Scaled Dot-Product Attention ‚Äî —ç—Ç–æ –º–µ—Ç–æ–¥ –≤–Ω–∏–º...,Scaled Dot-Product Attention ‚Äî —ç—Ç–æ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω...,3,2,1,0.503097,0.558233,0.644848
3,–ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Tra...,"Encoder: N –æ–¥–∏–Ω–∞–∫–æ–≤—ã—Ö –±–ª–æ–∫–æ–≤ (–æ–±—ã—á–Ω–æ N=6), –∫–∞–∂...",–°—Ç—Ä—É–∫—Ç—É—Ä–∞ Transformer —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ –¥–≤—É—Ö –æ—Å–Ω–æ–≤–Ω—ã—Ö...,–°—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer —Å–æ—Å...,Encoder —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ —Å—Ç–µ–∫–∞ –∏–∑ 6 –∏–¥–µ–Ω—Ç–∏—á–Ω—ã—Ö —Å–ª–æ–µ–≤...,3,2,1,0.355634,0.415566,0.558227
4,–ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏...,"–ü–æ—Ç–æ–º—É —á—Ç–æ –∫–∞–∂–¥–∞—è ""–≥–æ–ª–æ–≤–∞"" –æ–±—É—á–∞–µ—Ç—Å—è —Ñ–æ–∫—É—Å–∏—Ä–æ–≤...",Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å...,Multi-head attention –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ —Å–æ–≤–º–µ—Å—Ç...,Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å...,2,3,1,0.405849,0.377725,0.400534
5,–ö–∞–∫–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç –ø–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ ...,–ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –¥–æ–±–∞–≤–ª—è—é—Ç –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –æ –ø...,"–ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ (PE) –≤ –Ω–µ–π—Ä–æ–Ω–Ω—ã—Ö —Å–µ—Ç—è—Ö, ...",–ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç —Ñ—É–Ω–∫—Ü–∏—é –ø—Ä–µ–¥—Å...,–ü–æ–∑–∏—Ü–∏–æ–Ω–Ω—ã–µ –∫–æ–¥–∏—Ä–æ–≤–∫–∏ –ø–æ–∑–≤–æ–ª—è—é—Ç –º–æ–¥–µ–ª–∏ —É—á–∏—Ç—ã–≤–∞...,3,2,1,0.602244,0.709361,0.736253
6,–í —á–µ–º –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é...,Self-attention: O(n¬≤) –ø–æ –¥–ª–∏–Ω–µ –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ...,–ü—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞ self-attention –ø–µ—Ä–µ–¥ —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–º...,–í –ø–ª–∞–Ω–µ –≤—ã—á–∏—Å–ª–∏—Ç–µ–ª—å–Ω–æ–π —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ —Å–ª–æ–π self-at...,Self-attention –∏–º–µ–µ—Ç –ø–æ—Å—Ç–æ—è–Ω–Ω–æ–µ —á–∏—Å–ª–æ –æ–ø–µ—Ä–∞—Ü–∏–π...,2,3,1,0.619954,0.577273,0.644929
7,–ö–∞–∫–∏–µ —Ç–µ—Ö–Ω–∏–∫–∏ —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü–∏–∏ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –ø—Ä–∏...,"Dropout (–≤ attention, feed-forward, embeddings...",–î–ª—è –æ–±—É—á–µ–Ω–∏—è Transformer –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Å–ª–µ–¥—É—é...,–ü—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏ Transformer –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Ç—Ä–∏ —Ç...,–í –æ–±—É—á–µ–Ω–∏–∏ Transformer –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å —Ç—Ä–∏ —Ç–∏–ø–∞...,1,2,2,0.34294,0.224844,0.229979
8,–ö–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã Transformer –ø–æ–∫–∞–∑–∞–ª –Ω–∞ –∑–∞–¥–∞—á–∞...,EN‚ÜíDE: BLEU 28.4 (vs. –ø—Ä–µ–¥—ã–¥—É—â–∏–µ ~26.4)\nEN‚ÜíFR...,–ù–∞ –∑–∞–¥–∞—á–∞—Ö –ø–µ—Ä–µ–≤–æ–¥–∞ —Å –∞–Ω–≥–ª–∏–π—Å–∫–æ–≥–æ –Ω–∞ —Ñ—Ä–∞–Ω—Ü—É–∑—Å–∫...,–ù–∞ –∑–∞–¥–∞—á–µ –ø–µ—Ä–µ–≤–æ–¥–∞ EN‚ÜíFR –ø–æ WMT 2014 –º–æ–¥–µ–ª—å T...,Transformer –ø–æ–∫–∞–∑–∞–ª BLEU-—Å—á–µ—Ç 41.8 –Ω–∞ –∑–∞–¥–∞—á–µ E...,3,2,1,0.672282,0.662603,0.66286
9,–ü–æ—á–µ–º—É Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é...,–ü–æ—Ç–æ–º—É —á—Ç–æ self-attention –Ω–µ —Ç—Ä–µ–±—É–µ—Ç –ø–æ—Å–ª–µ–¥–æ–≤–∞...,–ü–∞—Ä–∞–ª–ª–µ–ª–∏–∑–∞—Ü–∏—è –≤ Transformer –≤–æ–∑–º–æ–∂–Ω–∞ –∏–∑-–∑–∞ –æ—Ç...,Transformer –ø–æ–∑–≤–æ–ª—è–µ—Ç –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –±–æ–ª—å—à–µ –ø–∞—Ä–∞...,–ü–æ—Ç–æ–º—É —á—Ç–æ Transformer –∏—Å–ø–æ–ª—å–∑—É–µ—Ç –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏...,1,3,2,0.395121,0.322822,0.407397


In [93]:
n = 27
str_question = df['Questions'].iloc[n]
str_gt = df['gt'].iloc[n]
str_tbank = df['Tbank'].iloc[n]
str_yandex = df['YandexGPT'].iloc[n]
str_qwen = df['Qwen3'].iloc[n]

print(f'–í–æ–ø—Ä–æ—Å: {str_question}\n')
print(f'–≠—Ç–∞–ª–æ–Ω–Ω—ã–π –æ—Ç–≤–µ—Ç: {str_gt}\n')
print(f'–û—Ç–≤–µ—Ç Tbank: {str_tbank}\n')
print(f'–û—Ç–≤–µ—Ç YandexGPT: {str_yandex}\n')  
print(f'–û—Ç–≤–µ—Ç Qwen: {str_qwen}')

–í–æ–ø—Ä–æ—Å: –ö–∞–∫ –æ—Å—É—â–µ—Å—Ç–≤–ª—è–µ—Ç—Å—è —Å–æ–≤–º–µ—Å—Ç–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –≤ RAG?

–≠—Ç–∞–ª–æ–Ω–Ω—ã–π –æ—Ç–≤–µ—Ç: –ú–æ–¥–µ–ª—å –æ–±—É—á–∞–µ—Ç—Å—è end-to-end, –º–∏–Ω–∏–º–∏–∑–∏—Ä—É—è –æ—Ç—Ä–∏—Ü–∞—Ç–µ–ª—å–Ω—ã–π log-likelihood. –¢–æ–ª—å–∫–æ query-encoder BERTq –∏ BART –æ–±—É—á–∞—é—Ç—Å—è, document-encoder BERTd –∏ –∏–Ω–¥–µ–∫—Å –æ—Å—Ç–∞—é—Ç—Å—è —Ñ–∏–∫—Å–∏—Ä–æ–≤–∞–Ω–Ω—ã–º–∏

–û—Ç–≤–µ—Ç Tbank: –í RAG (Retrieval-Augmented Generation) –æ–±—É—á–µ–Ω–∏–µ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ –∏ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ –ø—Ä–æ–∏—Å—Ö–æ–¥–∏—Ç —Å–æ–≤–º–µ—Å—Ç–Ω–æ. –ì–µ–Ω–µ—Ä–∞—Ç–æ—Ä (–Ω–∞–ø—Ä–∏–º–µ—Ä, –º–æ–¥–µ–ª—å –Ω–∞ –æ—Å–Ω–æ–≤–µ T5 –∏–ª–∏ BART) –∏ —Å–∏—Å—Ç–µ–º–∞ –ø–æ–∏—Å–∫–∞ (—Ä–µ—Ç—Ä–∏–≤–µ—Ä) —Ä–∞–±–æ—Ç–∞—é—Ç –≤–º–µ—Å—Ç–µ, —á—Ç–æ–±—ã —É–ª—É—á—à–∏—Ç—å –∫–∞—á–µ—Å—Ç–≤–æ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ —Ç–µ–∫—Å—Ç–∞. –û–±–∞ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç–∞ –ø—Ä–æ—Ö–æ–¥—è—Ç –æ–±—É—á–µ–Ω–∏–µ –Ω–∞ –æ–¥–Ω–æ–º –∏ —Ç–æ–º –∂–µ –Ω–∞–±–æ—Ä–µ –¥–∞–Ω–Ω—ã—Ö, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –∏–º –∞–¥–∞–ø—Ç–∏—Ä–æ–≤

In [192]:
# scores [1-3] —á–µ–º –º–µ–Ω—å—à–µ —Ç–µ–º –ª—É—á—à–µ
scores_tbank = [1, 1, 3, 3, 2, 3, 2, 1, 3, 1, 2, 2, 1, 3, 2, 3, 1, 2, 3, 1, 3, 3, 1, 1, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 3, 3, 1, 3, 3]
scores_yandex = [2, 2, 2, 2, 3, 2, 3, 2, 2, 3, 3, 2, 3, 2, 1, 2, 2, 3, 2, 3, 3, 2, 2, 3, 3, 1, 1, 2, 3, 2, 3, 1, 3, 2, 2, 2, 2, 2, 2, 3, 2, 3, 2, 1, 2, 1, 2, 2, 2, 1]
scores_qwen = [2, 3, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 1, 3, 1, 3, 1, 1, 2, 3, 1, 3, 2, 1, 2, 3, 1, 1, 1, 3, 1, 1, 1, 1, 2, 1, 3, 3, 2, 1, 2, 3, 2, 2, 2, 1, 3, 1, 2]

In [195]:
df['gen_tbank_rank'] = scores_tbank
df['gen_yandex_rank'] = scores_yandex
df['gen_qwen_rank'] = scores_qwen

In [197]:
df.to_csv('test_results/article_questions_all.csv', index=False)

## –°—Ä–µ–¥–Ω–∏–π —Ä–∞–Ω–≥ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –≤–æ–ø—Ä–æ—Å–æ–≤

In [201]:
df[['gen_tbank_rank', 'gen_yandex_rank', 'gen_qwen_rank']].mean().sort_values()

gen_qwen_rank      1.74
gen_tbank_rank     2.10
gen_yandex_rank    2.16
dtype: float64

# Bleu/Rouge

In [130]:
df = pd.read_csv('article_questions_all.csv')

## Bleu

In [132]:
hyps = df["Tbank"].tolist()
refs = df["gt"].tolist()
bleu = BLEU(tokenize="intl", smooth_method="exp", effective_order=True)   

df["bleu_tbank"] = [
    bleu.sentence_score(h, [r]).score for h, r in zip(hyps, refs)
]

In [133]:
hyps = df["YandexGPT"].tolist()
refs = df["gt"].tolist()
bleu = BLEU(tokenize="intl", smooth_method="exp", effective_order=True)   

df["bleu_yandex"] = [
    bleu.sentence_score(h, [r]).score for h, r in zip(hyps, refs)
]

In [134]:
hyps = df["Qwen3"].tolist()
refs = df["gt"].tolist()
bleu = BLEU(tokenize="intl", smooth_method="exp", effective_order=True)   

df["bleu_qwen"] = [
    bleu.sentence_score(h, [r]).score for h, r in zip(hyps, refs)
]

In [135]:
df[['bleu_tbank', 'bleu_yandex', 'bleu_qwen']]

Unnamed: 0,bleu_tbank,bleu_yandex,bleu_qwen
0,3.515791,0.419478,1.273276
1,4.07844,9.514054,0.004866
2,1.934502,1.599418,4.461248
3,2.641894,5.860751,2.088522
4,0.968374,1.622934,1.4333
5,0.973466,2.734225,16.723342
6,3.769086,5.954583,8.624407
7,0.597629,1.232214,1.232214
8,1.053028,4.858588,3.759195
9,2.058758,1.888475,1.959711


In [136]:
df[['bleu_tbank', 'bleu_yandex', 'bleu_qwen']].mean()

bleu_tbank     3.644703
bleu_yandex    4.321198
bleu_qwen      4.840649
dtype: float64

In [137]:
df.to_csv('article_questions_all.csv', index=False)

## Rouge

In [138]:
df = pd.read_csv('article_questions_all.csv')

In [139]:
import re
from nltk.stem.snowball import SnowballStemmer

stem = SnowballStemmer("russian")

class RussianTok(tokenizers.Tokenizer):
    _re = re.compile(r"[–∞-—è—ë]+", re.I)
    def tokenize(self, txt):
        return [stem.stem(w) for w in self._re.findall(txt.lower())]

ru_tok = RussianTok()

scorer = rouge_scorer.RougeScorer(
    ["rouge1", "rouge2", "rougeL"],
    use_stemmer=False,          
    tokenizer=ru_tok            # —Å–≤–æ–π —Ç–æ–∫–µ–Ω–∞–π–∑–µ—Ä
)

In [140]:
pair_scores = [
    scorer.score(ref, hyp)          
    for hyp, ref in zip(df["Tbank"], df["gt"])
]

df["rouge1_f_tbank"] = [s["rouge1"].fmeasure for s in pair_scores]
df["rouge2_f_tbank"] = [s["rouge2"].fmeasure for s in pair_scores]
df["rougeL_f_tbank"] = [s["rougeL"].fmeasure for s in pair_scores]

df["rouge1_p_tbank"] = [s["rouge1"].precision for s in pair_scores]
df["rouge2_p_tbank"] = [s["rouge2"].precision for s in pair_scores]
df["rougeL_p_tbank"] = [s["rougeL"].precision for s in pair_scores]

df["rouge1_r_tbank"] = [s["rouge1"].recall for s in pair_scores]
df["rouge2_r_tbank"] = [s["rouge2"].recall for s in pair_scores]
df["rougeL_r_tbank"] = [s["rougeL"].recall for s in pair_scores]

In [141]:
pair_scores = [
    scorer.score(ref, hyp)          
    for hyp, ref in zip(df["YandexGPT"], df["gt"])
]

df["rouge1_f_yandex"] = [s["rouge1"].fmeasure for s in pair_scores]
df["rouge2_f_yandex"] = [s["rouge2"].fmeasure for s in pair_scores]
df["rougeL_f_yandex"] = [s["rougeL"].fmeasure for s in pair_scores]

df["rouge1_p_yandex"] = [s["rouge1"].precision for s in pair_scores]
df["rouge2_p_yandex"] = [s["rouge2"].precision for s in pair_scores]
df["rougeL_p_yandex"] = [s["rougeL"].precision for s in pair_scores]

df["rouge1_r_yandex"] = [s["rouge1"].recall for s in pair_scores]
df["rouge2_r_yandex"] = [s["rouge2"].recall for s in pair_scores]
df["rougeL_r_yandex"] = [s["rougeL"].recall for s in pair_scores]

In [142]:
pair_scores = [
    scorer.score(ref, hyp)          
    for hyp, ref in zip(df["Qwen3"], df["gt"])
]

df["rouge1_f_qwen"] = [s["rouge1"].fmeasure for s in pair_scores]
df["rouge2_f_qwen"] = [s["rouge2"].fmeasure for s in pair_scores]
df["rougeL_f_qwen"] = [s["rougeL"].fmeasure for s in pair_scores]

df["rouge1_p_qwen"] = [s["rouge1"].precision for s in pair_scores]
df["rouge2_p_qwen"] = [s["rouge2"].precision for s in pair_scores]
df["rougeL_p_qwen"] = [s["rougeL"].precision for s in pair_scores]

df["rouge1_r_qwen"] = [s["rouge1"].recall for s in pair_scores]
df["rouge2_r_qwen"] = [s["rouge2"].recall for s in pair_scores]
df["rougeL_r_qwen"] = [s["rougeL"].recall for s in pair_scores]

### F-measure

In [143]:
df[['rouge1_f_tbank', 'rouge2_f_tbank', 'rougeL_f_tbank']].mean()

rouge1_f_tbank    0.206473
rouge2_f_tbank    0.038891
rougeL_f_tbank    0.154290
dtype: float64

In [144]:
df[['rouge1_f_yandex', 'rouge2_f_yandex', 'rougeL_f_yandex']].mean()

rouge1_f_yandex    0.223548
rouge2_f_yandex    0.037516
rougeL_f_yandex    0.184365
dtype: float64

In [145]:
df[['rouge1_f_qwen', 'rouge2_f_qwen', 'rougeL_f_qwen']].mean()

rouge1_f_qwen    0.255723
rouge2_f_qwen    0.072566
rougeL_f_qwen    0.202847
dtype: float64

### Precision

In [146]:
df[['rouge1_p_tbank', 'rouge2_p_tbank', 'rougeL_p_tbank']].mean()

rouge1_p_tbank    0.151476
rouge2_p_tbank    0.028680
rougeL_p_tbank    0.112875
dtype: float64

In [147]:
df[['rouge1_p_yandex', 'rouge2_p_yandex', 'rougeL_p_yandex']].mean()

rouge1_p_yandex    0.234096
rouge2_p_yandex    0.039478
rougeL_p_yandex    0.195566
dtype: float64

In [148]:
df[['rouge1_p_qwen', 'rouge2_p_qwen', 'rougeL_p_qwen']].mean()

rouge1_p_qwen    0.267341
rouge2_p_qwen    0.073784
rougeL_p_qwen    0.211680
dtype: float64

### Recall

In [149]:
df[['rouge1_r_tbank', 'rouge2_r_tbank', 'rougeL_r_tbank']].mean()

rouge1_r_tbank    0.390185
rouge2_r_tbank    0.071249
rougeL_r_tbank    0.298839
dtype: float64

In [150]:
df[['rouge1_r_yandex', 'rouge2_r_yandex', 'rougeL_r_yandex']].mean()

rouge1_r_yandex    0.259862
rouge2_r_yandex    0.039937
rougeL_r_yandex    0.214634
dtype: float64

In [151]:
df[['rouge1_r_qwen', 'rouge2_r_qwen', 'rougeL_r_qwen']].mean()

rouge1_r_qwen    0.276396
rouge2_r_qwen    0.076814
rougeL_r_qwen    0.221734
dtype: float64

In [152]:
df.to_csv('article_questions_all.csv', index=False)

## Cosine Similarity 

In [208]:
from sentence_transformers import SentenceTransformer
import torch

# –ó–∞–≥—Ä—É–∑–∏–º SOTA —ç–º–±–µ–¥–¥–µ—Ä –¥–ª—è —Ä—É—Å—Å–∫–∏—Ö —Ç–µ–∫—Å—Ç–æ–≤
embedder = SentenceTransformer("ai-forever/FRIDA")

In [209]:
df_gen = pd.read_csv('test_results/article_questions_all.csv')

In [216]:
df_gen.head()

Unnamed: 0,Questions,gt,Tbank,YandexGPT,Qwen3,gen_tbank_rank,gen_yandex_rank,gen_qwen_rank
0,–ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏...,–í –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω–æ–π —Å—Ç–∞—Ç—å–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è Adam —Å –ø–∞—Ä...,–í –¥–∞–Ω–Ω–æ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä Ad...,–ú—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏ –æ–ø—Ç–∏–º–∏–∑–∞—Ç–æ—Ä Adam —Å –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º...,"–û–ø—Ç–∏–º–∞–π–∑–µ—Ä ‚Äî Adam, –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã: Œ≤‚ÇÅ = 0.9, Œ≤...",1,2,2
1,–ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transf...,"Transformer –∑–∞–º–µ–Ω—è–µ—Ç RNN –∏–ª–∏ LSTM/GRU, –∏—Å–ø–æ–ª—å–∑...","–í —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö, —Ç...","Transformer –∑–∞–º–µ–Ω—è–µ—Ç —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏, –Ω–∞–∏–±–æ...",–†–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏.,1,2,3
2,–ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º...,"–≠—Ç–æ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏–º–∞–Ω–∏—è, –∫–æ—Ç–æ—Ä—ã–π –≤—ã—á–∏—Å–ª—è–µ—Ç:\nAtt...",–î–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk –≤ Scaled Dot-Product Attention ...,Scaled Dot-Product Attention ‚Äî —ç—Ç–æ –º–µ—Ç–æ–¥ –≤–Ω–∏–º...,Scaled Dot-Product Attention ‚Äî —ç—Ç–æ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω...,3,2,1
3,–ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Tra...,"Encoder: N –æ–¥–∏–Ω–∞–∫–æ–≤—ã—Ö –±–ª–æ–∫–æ–≤ (–æ–±—ã—á–Ω–æ N=6), –∫–∞–∂...",–°—Ç—Ä—É–∫—Ç—É—Ä–∞ Transformer —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ –¥–≤—É—Ö –æ—Å–Ω–æ–≤–Ω—ã—Ö...,–°—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer —Å–æ—Å...,Encoder —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ —Å—Ç–µ–∫–∞ –∏–∑ 6 –∏–¥–µ–Ω—Ç–∏—á–Ω—ã—Ö —Å–ª–æ–µ–≤...,3,2,1
4,–ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏...,"–ü–æ—Ç–æ–º—É —á—Ç–æ –∫–∞–∂–¥–∞—è ""–≥–æ–ª–æ–≤–∞"" –æ–±—É—á–∞–µ—Ç—Å—è —Ñ–æ–∫—É—Å–∏—Ä–æ–≤...",Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å...,Multi-head attention –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ —Å–æ–≤–º–µ—Å—Ç...,Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å...,2,3,1


## Tbank

In [217]:
reference_answers = df_gen['gt'].tolist()
generated_answers = df_gen['Tbank'].tolist()
questions = df['Questions'].tolist()

In [220]:
from tqdm import tqdm
from typing import List, Dict

def evaluate_similarity_frida(
    questions: List[str], 
    generated_answers: List[str],
    reference_answers: List[str]
) -> List[float]:
    """
    –í—ã—á–∏—Å–ª—è–µ—Ç –∫–æ—Å–∏–Ω—É—Å–Ω—É—é –±–ª–∏–∑–æ—Å—Ç—å –º–µ–∂–¥—É —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã–º–∏ –∏ —ç—Ç–∞–ª–æ–Ω–Ω—ã–º–∏ –æ—Ç–≤–µ—Ç–∞–º–∏
    —Å –ø–æ–º–æ—â—å—é FRIDA SentenceTransformer.

    :param questions: –°–ø–∏—Å–æ–∫ –≤–æ–ø—Ä–æ—Å–æ–≤
    :param generated_answers: –°–ø–∏—Å–æ–∫ –æ—Ç–≤–µ—Ç–æ–≤, —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö LLM.
    :param reference_answers: –°–ø–∏—Å–æ–∫ —ç—Ç–∞–ª–æ–Ω–Ω—ã—Ö –æ—Ç–≤–µ—Ç–æ–≤.
        
    :return: –ö–æ—Å–∏–Ω—É—Å–Ω–∞—è –±–ª–∏–∑–æ—Å—Ç—å –ø–æ –∫–∞–∂–¥–æ–π –ø–∞—Ä–µ.
    """

    inputs = (
        [f"search_document: {text}" for text in generated_answers] +
        [f"search_document: {text}" for text in reference_answers]
    )

    embeddings = embedder.encode(inputs, convert_to_tensor=True)

    gen_embs = embeddings[:len(generated_answers)]
    ref_embs = embeddings[len(generated_answers):]

    sim_scores = (gen_embs @ ref_embs.T).diagonal().tolist()
    print(len(sim_scores))
    for i, (question, generated, reference) in enumerate(zip(questions, generated_answers, reference_answers)):
        
        print(f"–í–æ–ø—Ä–æ—Å: {question}:")
        print(f"cosine_similarity: {sim_scores[i]:.4f}\n")
        print(100 * '*')

    avg_sim = sum(sim_scores) / len(sim_scores)
    print("\n–û–±—â–∞—è –æ—Ü–µ–Ω–∫–∞")
    print(f"–°—Ä–µ–¥–Ω–∏–π similarity: {avg_sim:.4f}")
    
    return sim_scores

In [221]:
scores_tbank = evaluate_similarity_frida(questions, generated_answers, reference_answers)

50
–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?:
cosine_similarity: 0.6691

****************************************************************************************************
–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?:
cosine_similarity: 0.6954

****************************************************************************************************
–í–æ–ø—Ä–æ—Å: –ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?:
cosine_similarity: 0.5031

****************************************************************************************************
–í–æ–ø—Ä–æ—Å: –ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?:
cosine_similarity: 0.3556

**************************************************************

In [223]:
df_gen['sim_scores_tbank'] = scores_tbank

In [225]:
df_gen.head()

Unnamed: 0,Questions,gt,Tbank,YandexGPT,Qwen3,gen_tbank_rank,gen_yandex_rank,gen_qwen_rank,sim_scores_tbank
0,–ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏...,–í –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω–æ–π —Å—Ç–∞—Ç—å–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è Adam —Å –ø–∞—Ä...,–í –¥–∞–Ω–Ω–æ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª—Å—è –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä Ad...,–ú—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏ –æ–ø—Ç–∏–º–∏–∑–∞—Ç–æ—Ä Adam —Å –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º...,"–û–ø—Ç–∏–º–∞–π–∑–µ—Ä ‚Äî Adam, –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã: Œ≤‚ÇÅ = 0.9, Œ≤...",1,2,2,0.669123
1,–ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transf...,"Transformer –∑–∞–º–µ–Ω—è–µ—Ç RNN –∏–ª–∏ LSTM/GRU, –∏—Å–ø–æ–ª—å–∑...","–í —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö, —Ç...","Transformer –∑–∞–º–µ–Ω—è–µ—Ç —Ä–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏, –Ω–∞–∏–±–æ...",–†–µ–∫—É—Ä—Ä–µ–Ω—Ç–Ω—ã–µ —Å–ª–æ–∏.,1,2,3,0.695416
2,–ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º...,"–≠—Ç–æ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω–∏–º–∞–Ω–∏—è, –∫–æ—Ç–æ—Ä—ã–π –≤—ã—á–∏—Å–ª—è–µ—Ç:\nAtt...",–î–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk –≤ Scaled Dot-Product Attention ...,Scaled Dot-Product Attention ‚Äî —ç—Ç–æ –º–µ—Ç–æ–¥ –≤–Ω–∏–º...,Scaled Dot-Product Attention ‚Äî —ç—Ç–æ –º–µ—Ö–∞–Ω–∏–∑–º –≤–Ω...,3,2,1,0.503097
3,–ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Tra...,"Encoder: N –æ–¥–∏–Ω–∞–∫–æ–≤—ã—Ö –±–ª–æ–∫–æ–≤ (–æ–±—ã—á–Ω–æ N=6), –∫–∞–∂...",–°—Ç—Ä—É–∫—Ç—É—Ä–∞ Transformer —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ –¥–≤—É—Ö –æ—Å–Ω–æ–≤–Ω—ã—Ö...,–°—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer —Å–æ—Å...,Encoder —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ —Å—Ç–µ–∫–∞ –∏–∑ 6 –∏–¥–µ–Ω—Ç–∏—á–Ω—ã—Ö —Å–ª–æ–µ–≤...,3,2,1,0.355634
4,–ü–æ—á–µ–º—É Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏...,"–ü–æ—Ç–æ–º—É —á—Ç–æ –∫–∞–∂–¥–∞—è ""–≥–æ–ª–æ–≤–∞"" –æ–±—É—á–∞–µ—Ç—Å—è —Ñ–æ–∫—É—Å–∏—Ä–æ–≤...",Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å...,Multi-head attention –ø–æ–∑–≤–æ–ª—è–µ—Ç –º–æ–¥–µ–ª–∏ —Å–æ–≤–º–µ—Å—Ç...,Multi-Head Attention —É–ª—É—á—à–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å...,2,3,1,0.405849


## Yandex

In [226]:
reference_answers = df_gen['gt'].tolist()
generated_answers = df_gen['YandexGPT'].tolist()
questions = df['Questions'].tolist()

In [227]:
scores_yandex = evaluate_similarity_frida(questions, generated_answers, reference_answers)

50
–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?:
cosine_similarity: 0.5701

****************************************************************************************************
–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?:
cosine_similarity: 0.7699

****************************************************************************************************
–í–æ–ø—Ä–æ—Å: –ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?:
cosine_similarity: 0.5582

****************************************************************************************************
–í–æ–ø—Ä–æ—Å: –ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?:
cosine_similarity: 0.4156

**************************************************************

In [228]:
df_gen['sim_scores_yandex'] = scores_yandex

## Qwen3

In [231]:
reference_answers = df_gen['gt'].tolist()
generated_answers = df_gen['Qwen3'].tolist()
questions = df['Questions'].tolist()

In [233]:
scores_qwen = evaluate_similarity_frida(questions, generated_answers, reference_answers)

50
–í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –æ–ø—Ç–∏–º–∞–π–∑–µ—Ä –∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∏—Å—å –≤ –º–æ–¥–µ–ª–∏?:
cosine_similarity: 0.5004

****************************************************************************************************
–í–æ–ø—Ä–æ—Å: –ö–∞–∫–∏–µ –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–Ω—ã–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã –∑–∞–º–µ–Ω—è–µ—Ç Transformer –≤ —Ç—Ä–∞–¥–∏—Ü–∏–æ–Ω–Ω—ã—Ö sequence-to-sequence –º–æ–¥–µ–ª—è—Ö?:
cosine_similarity: 0.3702

****************************************************************************************************
–í–æ–ø—Ä–æ—Å: –ß—Ç–æ —Ç–∞–∫–æ–µ Scaled Dot-Product Attention –∏ –∑–∞—á–µ–º –≤ –Ω–µ–º –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–µ–ª–µ–Ω–∏–µ –Ω–∞ ‚àödk?:
cosine_similarity: 0.6448

****************************************************************************************************
–í–æ–ø—Ä–æ—Å: –ö–∞–∫ —É—Å—Ç—Ä–æ–µ–Ω–∞ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ encoder –∏ decoder –≤ Transformer?:
cosine_similarity: 0.5582

**************************************************************

In [234]:
df_gen['sim_scores_qwen'] = scores_qwen

In [246]:
df_gen.to_csv('test_results/article_questions_all.csv', index=False)

# –°—Ä–µ–¥–Ω–∏–µ sim_scores

In [245]:
df_gen[['sim_scores_tbank', 'sim_scores_yandex', 'sim_scores_qwen']].mean().sort_values(ascending=False)

sim_scores_qwen      0.533568
sim_scores_tbank     0.531741
sim_scores_yandex    0.526195
dtype: float64

# –°—Ä–µ–¥–Ω–∏–µ —Ä–∞–Ω–≥–∏ –Ω–∞ –æ—Å–Ω–æ–≤–µ similarity

In [239]:
ranked = df_gen[['sim_scores_tbank', 'sim_scores_yandex', 'sim_scores_qwen']].rank(axis=1, method='min', ascending=False)

In [242]:
ranked.mean().sort_values()

sim_scores_qwen      1.82
sim_scores_tbank     1.98
sim_scores_yandex    2.20
dtype: float64

# –°—Ä–µ–¥–Ω–∏–µ —Ä–∞–Ω–≥–∏ –Ω–∞ –æ—Å–Ω–æ–≤–µ —Å–æ–±—Å—Ç–≤–µ–Ω–Ω–æ–π –ø—Ä–æ–≤–µ—Ä–∫–∏

In [241]:
df_gen[['gen_tbank_rank', 'gen_yandex_rank', 'gen_qwen_rank']].mean().sort_values()

gen_qwen_rank      1.74
gen_tbank_rank     2.10
gen_yandex_rank    2.16
dtype: float64

# –°–æ–±–µ—Ä–µ–º –≤—Å–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –≤ –æ–¥–Ω—É —Ç–∞–±–ª–∏—Ü—É

In [283]:
df = pd.DataFrame()

In [284]:
df.index = ['YandexGPT 5', 'T-Lite', 'Qwen 3']

In [285]:
df['–°—Ä–µ–¥–Ω–∏–π —Ä–∞–Ω–≥ –Ω–∞ –æ—Å–Ω–æ–≤–µ –æ—Ü–µ–Ω–∫–∏ Gpt4o (—Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è) (10 —Å—Ç–∞—Ç–µ–π)'] = [2.1, 2.2, 1.7]
df['–°—Ä–µ–¥–Ω–∏–π —Ä–∞–Ω–≥ –Ω–∞ –æ—Å–Ω–æ–≤–µ –∫–æ—Å–∏–Ω—É—Å–Ω–æ–π –±–ª–∏–∑–æ—Å—Ç–∏ (—Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è) (10 —Å—Ç–∞—Ç–µ–π)'] = [2.0, 2.3, 1.7]

df['–°—Ä–µ–¥–Ω–∏–π —Ä–∞–Ω–≥ –Ω–∞ –æ—Å–Ω–æ–≤–µ –æ—Ü–µ–Ω–∫–∏ Gpt4o (–≥–µ–Ω–µ—Ä–∞—Ü–∏—è –æ—Ç–≤–µ—Ç–æ–≤) (50 –≤–æ–ø—Ä–æ—Å–æ–≤)'] = [2.16, 2.1, 1.74]
df['–°—Ä–µ–¥–Ω–∏–π —Ä–∞–Ω–≥ –Ω–∞ –æ—Å–Ω–æ–≤–µ –∫–æ—Å–∏–Ω—É—Å–Ω–æ–π –±–ª–∏–∑–æ—Å—Ç–∏ (–≥–µ–Ω–µ—Ä–∞—Ü–∏—è –æ—Ç–≤–µ—Ç–æ–≤) (50 –≤–æ–ø—Ä–æ—Å–æ–≤)'] = [2.2, 1.98, 1.82]

df['–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞ (—Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è) (10 —Å—Ç–∞—Ç–µ–π)'] = [12, 15, 17]
df['–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞ (–≥–µ–Ω–µ—Ä–∞—Ü–∏—è –æ—Ç–≤–µ—Ç–æ–≤) (50 –≤–æ–ø—Ä–æ—Å–æ–≤)'] = [4.2, 8.5, 7.7]

In [286]:
df

Unnamed: 0,–°—Ä–µ–¥–Ω–∏–π —Ä–∞–Ω–≥ –Ω–∞ –æ—Å–Ω–æ–≤–µ –æ—Ü–µ–Ω–∫–∏ Gpt4o (—Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è) (10 —Å—Ç–∞—Ç–µ–π),–°—Ä–µ–¥–Ω–∏–π —Ä–∞–Ω–≥ –Ω–∞ –æ—Å–Ω–æ–≤–µ –∫–æ—Å–∏–Ω—É—Å–Ω–æ–π –±–ª–∏–∑–æ—Å—Ç–∏ (—Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è) (10 —Å—Ç–∞—Ç–µ–π),–°—Ä–µ–¥–Ω–∏–π —Ä–∞–Ω–≥ –Ω–∞ –æ—Å–Ω–æ–≤–µ –æ—Ü–µ–Ω–∫–∏ Gpt4o (–≥–µ–Ω–µ—Ä–∞—Ü–∏—è –æ—Ç–≤–µ—Ç–æ–≤) (50 –≤–æ–ø—Ä–æ—Å–æ–≤),–°—Ä–µ–¥–Ω–∏–π —Ä–∞–Ω–≥ –Ω–∞ –æ—Å–Ω–æ–≤–µ –∫–æ—Å–∏–Ω—É—Å–Ω–æ–π –±–ª–∏–∑–æ—Å—Ç–∏ (–≥–µ–Ω–µ—Ä–∞—Ü–∏—è –æ—Ç–≤–µ—Ç–æ–≤) (50 –≤–æ–ø—Ä–æ—Å–æ–≤),–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞ (—Å—É–º–º–∞—Ä–∏–∑–∞—Ü–∏—è) (10 —Å—Ç–∞—Ç–µ–π),–°—Ä–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞ (–≥–µ–Ω–µ—Ä–∞—Ü–∏—è –æ—Ç–≤–µ—Ç–æ–≤) (50 –≤–æ–ø—Ä–æ—Å–æ–≤)
YandexGPT 5,2.1,2.0,2.16,2.2,12,4.2
T-Lite,2.2,2.3,2.1,1.98,15,8.5
Qwen 3,1.7,1.7,1.74,1.82,17,7.7


In [287]:
df.to_csv('test_results/agg_results.csv', index=False)