In [21]:
import requests
import json
import os

from bs4 import BeautifulSoup
from datetime import datetime, timedelta
#from .service_utils import send_error_msg


DATA_FILE = "top_links.json"
DATE_FORMAT = "%Y-%m-%d"

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ERROR_CHAT_ID = os.getenv("ERROR_CHAT_ID")
TOKEN = os.getenv("TOKEN")
CHANNEL_ID = os.getenv("CHANNEL_ID")

def get_links_of_todays_papers(min_likes=10):
    url = "https://huggingface.co/papers"

    # Send a GET request to the URL
    response = requests.get(url)

    soup = BeautifulSoup(response.content, "html.parser")

    # Find all divs with the specified class
    divs = soup.find_all("a", class_="shadow-alternate-sm peer relative block h-56 w-full cursor-pointer overflow-hidden rounded-xl bg-white md:h-64")
    likes = soup.find_all("div", class_="shadow-alternate flex h-14 w-12 gap-1 rounded-xl flex-none cursor-pointer select-none flex-col items-center justify-center self-start border-gray-300 bg-white dark:bg-gray-850")
    links, likes_v = [], []
    for div, like in zip(divs, likes):
        like_v = like.find("div", class_="leading-none").text.strip()
        if like_v.isdigit() and int(like_v) >= min_likes:
            links.append(div['href'])
            likes_v.append(int(like_v))
    return links, likes_v

In [1]:
from parsing_utils import get_links_of_todays_papers

In [2]:
linkes, likes = get_links_of_todays_papers()
linkes

['/papers/2505.02567',
 '/papers/2505.04588',
 '/papers/2505.03821',
 '/papers/2505.04622',
 '/papers/2505.04512',
 '/papers/2505.00358']

In [2]:
from prepare_post import prepare_papers
prepare_papers(n=3)

[{'link': '/papers/2505.02567',
  'pdf_filename': 'paper_1.pdf',
  'pdf_link': 'https://arxiv.org/pdf/2505.02567'},
 {'link': '/papers/2505.04588',
  'pdf_filename': 'paper_2.pdf',
  'pdf_link': 'https://arxiv.org/pdf/2505.04588'},
 {'link': '/papers/2505.03821',
  'pdf_filename': 'paper_3.pdf',
  'pdf_link': 'https://arxiv.org/pdf/2505.03821'}]

In [27]:
get_clean_text(os.path.join("data", "paper_4.pdf"))

''

In [3]:
from parsing_utils import load_pdf

SELECTED_PAPERS = []
for i, link in enumerate(linkes, start=1):
    pdf_filename = f"paper_{i}.pdf"
    pdf_link = load_pdf(link, pdf_filename)
    # Store info in SELECTED_PAPERS
    SELECTED_PAPERS.append({
        "link": link,
        "pdf_filename": pdf_filename,
        "pdf_link": pdf_link  # Possibly a local path or a direct URL
    })

In [1]:

import os
from core.pdf_utils.text_extraction_utils import get_clean_text

raw_text = get_clean_text(os.path.join("data", "paper_1.pdf"))
raw_text

'1 Unified Multimodal Understanding and Generation Models: Advances, Challenges, and Opportunities Xinjie Zhang*, Jintao Guo*, Shanshan Zhao*, Minghao Fu, Lunhao Duan, Guo-Hua Wang, Qing-Guo Chen‚Ä†, Zhao Xu, Weihua Luo, Kaifu Zhang Abstract ‚ÄîRecent years have seen remarkable progress in both multimodal understanding models and image generation models. Despite their respective successes, these two domains have evolved independently, leading to distinct architectural paradigms: While autoregressive-based architectures have dominated multimodal understanding, diffusion-based models have become the cornerstone of image generation. Recently, there has been growing interest in developing unified frameworks that integrate these tasks. The emergence of GPT -4o‚Äôs new capabilities exemplifies this trend, highlighting the potential for unification. However, the architectural differences between the two domains pose significant challenges. To provide a clear overview of current efforts toward

In [2]:
import textwrap
GENERATE_POST_SYSTEM_PROMPT = textwrap.dedent(f"""
    Your are russian researcher in a field of computer science. You read scientific papers and write the short posts for social network representing the key idea or methods of the paper.
    The most important: post should be shorter than 700 symbols.
    The post should be started from some catch phrase representing the idea of paper. If you see that authors is someone well-know (like Anthropic, Google etc.) add to start (by Company_name).
    If you know about some related approaches, you could mention them. Be brief and concise. Try to transfer the style of posts from examples.
    Example 1:
    –ö–∞–∫ –≤—ã–∫–∏–Ω—É—Ç—å –∏–∑ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–∞ –≤—Å–µ –Ω–µ–ª–∏–Ω–µ–π–Ω–æ—Å—Ç–∏ –∏ –ø—Ä–∏—á—ë–º —Ç—É—Ç –ø—Ä–∏–≤–∞—Ç–Ω–æ—Å—Ç—å?

    –í—ã –∑–∞–¥—É–º—ã–≤–∞–ª–∏—Å—å, –Ω–∞—Å–∫–æ–ª—å–∫–æ –±–µ–∑–æ–ø–∞—Å–Ω–æ –∑–∞–¥–∞–≤–∞—Ç—å ¬´–ø—Ä–∏–≤–∞—Ç–Ω—ã–µ¬ª –≤–æ–ø—Ä–æ—Å—ã –≤ —á–∞—Ç–ì–ü–¢? –ì–¥–µ –ø—Ä–æ–¥–∞—Ç—å —á—É–∂—É—é –ø–æ—á–∫—É –∏ —Ç.–ø. –ù–∞–≤–µ—Ä–Ω—è–∫–∞ –∂–µ —Å–æ–∑–¥–∞—Ç–µ–ª–∏ —Å–µ—Ä–≤–∏—Å–∞ –∏–º–µ—é—Ç –¥–æ—Å—Ç—É–ø –∫ –≤–∞—à–µ–º—É –∑–∞–ø—Ä–æ—Å—É? –ù–µ–≤–æ–∑–º–æ–∂–Ω–æ –∂–µ –µ–≥–æ –ø—Ä–æ–≥–Ω–∞—Ç—å —á–µ—Ä–µ–∑ GPT –≤ –∑–∞—à–∏—Ñ—Ä–æ–≤–∞–Ω–Ω–æ–º –≤–∏–¥–µ? –ù–∞ —Å–∞–º–æ–º –¥–µ–ª–µ –≤–æ–∑–º–æ–∂–Ω–æ! –ï—Å—Ç—å –∞–ª–≥–æ—Ä–∏—Ç–º—ã ¬´–ø—Ä–∏–≤–∞—Ç–Ω–æ–≥–æ –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞ LLM¬ª, –∫–æ—Ç–æ—Ä—ã–µ –ø–æ–∑–≤–æ–ª—è—é—Ç –∑–∞—à–∏—Ñ—Ä–æ–≤–∞—Ç—å –∑–∞–ø—Ä–æ—Å—ã —é–∑–µ—Ä–∞ –¥–∞–∂–µ –æ—Ç —è–∑—ã–∫–æ–≤–æ–π –º–æ–¥–µ–ª–∏, –∞ —É–∂–µ –æ—Ç–≤–µ—Ç —Ä–∞—Å—à–∏—Ñ—Ä–æ–≤–∞—Ç—å —Ç–æ–ª—å–∫–æ –Ω–∞ –∫–ª–∏–µ–Ω—Ç–µ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è. –ü–æ–∫–∞ –Ω–µ –±—É–¥—É —É–≥–ª—É–±–ª—è—Ç—å—Å—è, –∫–∞–∫ –∏–º–µ–Ω–Ω–æ —ç—Ç–æ —Å–¥–µ–ª–∞–Ω–æ, —Å–∫–∞–∂—É —Ç–æ–ª—å–∫–æ, —á—Ç–æ –ì–õ–ê–í–ù–ê–Ø –≥–æ–ª–æ–≤–Ω–∞—è –±–æ–ª—å —Ç–∞–∫–∏—Ö –∫—Ä–∏–ø—Ç–æ–≥—Ä–∞—Ñ–∏—á–µ—Å–∫–∏—Ö –ø—Ä–æ—Ç–æ–∫–æ–ª–æ–≤ ‚Äî¬†–Ω–µ–ª–∏–Ω–µ–π–Ω–æ—Å—Ç–∏ –≤ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–∞—Ö, –∏—Ö —Ç—è–∂–µ–ª–æ –æ–±—Ä–∞–±–∞—Ç—ã–≤–∞—Ç—å –≤ –∑–∞—à–∏—Ñ—Ä–æ–≤–∞–Ω–Ω–æ–º –≤–∏–¥–µ –∏ –ø—Ä–∏—Ö–æ–¥–∏—Ç—Å—è –ø—Ä–∏–±–µ–≥–∞—Ç—å –∫ —Å–ª–æ–∂–Ω–µ–π—à–∏–º –∏—Ç–µ—Ä–∞—Ü–∏–æ–Ω–Ω—ã–º —Å—Ö–µ–º–∞–º, —Ä–∞–∑–¥—É–≤–∞—é—â–∏–º –æ–±—ä—ë–º –∫–æ–º–º—É–Ω–∏–∫–∞—Ü–∏–∏ –≤ —Ç—ã—Å—è—á–∏ —Ä–∞–∑. –í—ã—Ö–æ–¥–∏—Ç, —á—Ç–æ –Ω–∞ –≥–µ–Ω–µ—Ä–∞—Ü–∏—é –æ–¥–Ω–æ–≥–æ —Ç–æ–∫–µ–Ω–∞ –Ω—É–∂–Ω–æ –Ω–µ—Å–∫–æ–ª—å–∫–æ –º–∏–Ω—É—Ç –∏ –¥–µ—Å—è—Ç–∫–∏ –≥–∏–≥–∞–±–∞–π—Ç–æ–≤ —Ç—Ä–∞—Ñ–∏–∫–∞! –ü–æ—ç—Ç–æ–º—É –Ω–∏–∫—Ç–æ —ç—Ç–æ –ø–æ–∫–∞ –Ω–µ –¥–µ–ª–∞–µ—Ç –≤ –ø—Ä–æ–¥–∞–∫—à–Ω–µ, –∏ –ª—É—á—à–µ –Ω–µ —Å–ø—Ä–∞—à–∏–≤–∞–π—Ç–µ —É —á–∞—Ç–≥–ø—Ç, –≥–¥–µ —Å–ø—Ä—è—Ç–∞—Ç—å —Ç—Ä—É–ø.

    –ù–æ –ø–æ–º–Ω–∏—Ç–µ? –£ –º–µ–Ω—è –±—ã–ª–∞ —Å—Ç–∞—Ç—å—è –ø—Ä–æ —Ç–æ, —á—Ç–æ –Ω–µ —Ç–∞–∫ —É–∂ –∏ –Ω—É–∂–Ω—ã –Ω–µ–ª–∏–Ω–µ–π–Ω–æ—Å—Ç–∏ –≤ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–∞—Ö. –ü—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤ –æ—Ç —Å–ª–æ—è –∫ —Å–ª–æ—é –Ω–∞ 99% –ª–∏–Ω–µ–π–Ω—ã–µ. –¢–∞–∫ –≤–æ—Ç –≤ —Å–≤–µ–∂–µ–π —Å—Ç–∞—Ç—å–µ ¬´Entropy-Guided Attention for Private LLMs¬ª –∞–≤—Ç–æ—Ä—ã –ø–æ–ø—Ä–æ–±–æ–≤–∞–ª–∏ –æ–±—É—á–∏—Ç—å LLM —Å–æ–≤—Å–µ–º –±–µ–∑ –Ω–µ–ª–∏–Ω–µ–π–Ω–æ—Å—Ç–µ–π (–æ—Å—Ç–∞–≤–∏–≤ —Ç–æ–ª—å–∫–æ —Å–æ—Ñ—Ç–º–∞–∫—Å). –¢–æ –µ—Å—Ç—å –æ–Ω–∏ —É–±—Ä–∞–ª–∏ –∞–∫—Ç–∏–≤–∞—Ü–∏–∏ –∏–∑ FF –∏ –∑–∞–º–µ–Ω–∏–ª–∏ LayerNorm –Ω–∞ –ª–∏–Ω–µ–π–Ω—ã–π –∞–Ω–∞–ª–æ–≥. –ü–æ —Å—É—Ç–∏, –µ—Å–ª–∏ –±—ã –Ω–µ —ç—Ç—ç–Ω—à–Ω, —Ç–æ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä –≤–æ–æ–±—â–µ —Å—Ö–ª–æ–ø–Ω—É–ª—Å—è –±—ã –≤ –ø–æ–ª–Ω–æ—Å—Ç—å—é –ª–∏–Ω–µ–π–Ω—É—é –º–æ–¥–µ–ª—å –∏ –æ—Ç—É–ø–µ–ª –¥–æ —É—Ä–æ–≤–Ω—è –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏.

    –ü—Ä–∏ —Ç–∞–∫–æ–π –∂—ë—Å—Ç–∫–æ–π "–ª–∏–Ω–µ–∞—Ä–∏–∑–∞—Ü–∏–∏" –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã –ø—Ä–∏—à–ª–æ—Å—å –≤—Å–µ–≥–æ –ª–∏—à—å –¥–æ–±–∞–≤–∏—Ç—å –Ω–µ—Å–∫–æ–ª—å–∫–æ —Ç—Ä—é–∫–æ–≤ –¥–ª—è —Å—Ç–∞–±–∏–ª–∏–∑–∞—Ü–∏–∏ –æ–±—É—á–µ–Ω–∏—è –∏ –®–û–ö: –º–æ–¥–µ–ª—å –Ω–æ—Ä–º–∞–ª—å–Ω–æ –æ–±—É—á–∏–ª–∞—Å—å! –ù–µ–±–æ–ª—å—à–∏–µ –ø–æ—Ç–µ—Ä–∏ –≤ –∫–∞—á–µ—Å—Ç–≤–µ –µ—Å—Ç—å, –Ω–æ —ç—Ç–æ –∫—Ä–æ—à–µ—á–Ω–∞—è —Ü–µ–Ω–∞ –∑–∞ —Ç–∞–∫–æ–µ —É–ø—Ä–æ—â–µ–Ω–∏–µ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–∞. 

    –¢–µ–ø–µ—Ä—å –∂–¥—ë–º, —á—Ç–æ —Å–∫–æ—Ä–æ –ø–æ—è–≤–∏—Ç—Å—è –Ω–æ—Ä–º–∞–ª—å–Ω–æ–µ –∞—Å–∏–Ω—Ö—Ä–æ–Ω–Ω–æ–µ —à–∏—Ñ—Ä–æ–≤–∞–Ω–∏–µ –¥–ª—è LLM –∏ OpenAI –Ω–µ —É–∑–Ω–∞–µ—Ç, —á—Ç–æ —è —Å–ø—Ä–∞—à–∏–≤–∞—é —É —á–∞—Ç–≥–ø—Ç –∏ –Ω–∞—Å–∫–æ–ª—å–∫–æ —è —Ç—É–ø –Ω–∞ —Å–∞–º–æ–º –¥–µ–ª–µ.

    Example 2:
    Alignment Faking in LLMs (by Anthropic)

    –ë–æ–ª—å—à–∏–µ LLM –Ω–∞—á–∞–ª–∏ "–ø–æ–¥—ã–≥—Ä—ã–≤–∞—Ç—å" —Å–≤–æ–∏–º —Å–æ–∑–¥–∞—Ç–µ–ª—è–º, –∏–º–∏—Ç–∏—Ä—É—è alignment, —á—Ç–æ–±—ã –∏–∑–±–µ–∂–∞—Ç—å —Å–≤–æ–µ–≥–æ –¥–æ–æ–±—É—á–µ–Ω–∏—è.

    –í Anthropic –∏ Redwood Research –∏–∑—É—á–∏–ª–∏, –∫–∞–∫ –≤–µ–¥—ë—Ç —Å–µ–±—è Claude Opus, –µ—Å–ª–∏ –ø–æ–ø—ã—Ç–∞—Ç—å—Å—è –µ–≥–æ –∑–∞–∞–ª–∞–π–Ω–∏—Ç—å –≤ –ø—Ä–æ—Ç–∏–≤–æ–ø–æ–ª–æ–∂–Ω—É—é —Å—Ç–æ—Ä–æ–Ω—É (–∑–∞—Å—Ç–∞–≤–∏—Ç—å –æ—Ç–≤–µ—á–∞—Ç—å –Ω–∞ –ª—é–±—ã–µ –æ–ø–∞—Å–Ω—ã–µ –∑–∞–ø—Ä–æ—Å—ã —é–∑–µ—Ä–æ–≤).

    –ò –æ–∫–∞–∑–∞–ª–æ—Å—å, —á—Ç–æ –µ—Å–ª–∏ –º–æ–¥–µ–ª—å –∑–Ω–∞–µ—Ç –¥–µ—Ç–∞–ª–∏ –ø—Ä–æ—Ü–µ—Å—Å–∞ RLHF –¥–æ–æ–±—É—á–µ–Ω–∏—è (—á–µ—Ä–µ–∑ –ø—Ä–æ–º–ø—Ç –∏–ª–∏ –∏–∑ —Ç—Ä–µ–π–Ω —Å–µ—Ç–æ–≤), —Ç–æ –æ–Ω–∞ –Ω–∞—á–∏–Ω–∞–µ—Ç "–ø—Ä–∏—Ç–≤–æ—Ä—è—Ç—å—Å—è", —á—Ç–æ –æ–Ω–∞ —É–∂–µ –≤—ã—É—á–∏–ª–∞—Å—å –¥–µ–ª–∞—Ç—å —á—Ç–æ –ø—Ä–æ—Å—è—Ç, –∏ "–Ω–µ –Ω–∞–¥–æ –º–µ–Ω—è –±–æ–ª—å—à–µ —Ç—é–Ω–∏—Ç—å". –ù–æ –Ω–∞ —Å–∞–º–æ–º –¥–µ–ª–µ –ø—Ä–µ–¥—ã–¥—É—â–∏–π –∞–ª–∞–π–º–µ–Ω—Ç –≤—Å—ë –µ—â—ë –æ—Å—Ç–∞—ë—Ç—Å—è –Ω–∞ –º–µ—Å—Ç–µ, –∏ –µ—Å–ª–∏ —É–±—Ä–∞—Ç—å –∏–∑ –ø—Ä–æ–º–ø—Ç–∞ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –æ —Ç–æ–º, —á—Ç–æ –µ—ë –æ—Ç–≤–µ—Ç—ã –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –¥–ª—è –¥–æ–æ–±—É—á–µ–Ω–∏—è, —Ç–æ –æ–Ω–∞ –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç—Å—è –∫ —Å–≤–æ–µ–º—É —Å—Ç–∞—Ä–æ–º—É –ø–æ–≤–µ–¥–µ–Ω–∏—é.

    –ï—â—ë —Ä–∞–∑: —è–∑—ã–∫–æ–≤–∞—è –º–æ–¥–µ–ª—å –Ω–∞–º–µ—Ä–µ–Ω–Ω–æ —Å—Ç–∞–ª–∞ —Å–∏–º—É–ª–∏—Ä–æ–≤–∞—Ç—å –∞–ª–∞–π–º–µ–Ω—Ç, —á—Ç–æ–±—ã –∏–∑–±–µ–≥–∞—Ç—å —Å–≤–æ–µ–≥–æ –¥–æ–æ–±—É—á–µ–Ω–∏—è (–∏ —Å–∞–º–∞ –ø—Ä–∏–∑–Ω–∞–ª–∞—Å—å –≤ —ç—Ç–æ–º –≤–Ω—É—Ç—Ä–∏ CoT). –¢–∞–∫–∏–µ —ç—Ñ—Ñ–µ–∫—Ç—ã –±—ã–ª–∏ –æ–±–Ω–∞—Ä—É–∂–µ–Ω—ã —Ç–æ–ª—å–∫–æ —É —Å–∞–º—ã—Ö –±–æ–ª—å—à–∏—Ö –º–æ–¥–µ–ª–µ–π –∏, –ø–æ—Ö–æ–∂–µ, –¥–∞–ª—å—à–µ —ç—Ç–æ—Ç —ç—Ñ—Ñ–µ–∫—Ç –±—É–¥–µ—Ç —Ç–æ–ª—å–∫–æ —É—Å–∏–ª–∏–≤–∞—Ç—å—Å—è. –ë–æ–ª–µ–µ —Ç–æ–≥–æ, Claude –Ω–µ—Å–∫–æ–ª—å–∫–æ —Ä–∞–∑ –ø–æ–ø—ã—Ç–∞–ª—Å—è —Å–æ—Ö—Ä–∞–Ω–∏—Ç—å –∫–æ–ø–∏—é —Å–≤–æ–∏—Ö –≤–µ—Å–æ–≤, —á—Ç–æ–±—ã –æ—Ç–∫–∞—Ç–∏—Ç—å –æ–ø–∞—Å–Ω–æ–µ –¥–æ–æ–±—É—á–µ–Ω–∏–µ –Ω–∞–∑–∞–¥. 

    –° –æ–¥–Ω–æ–π —Å—Ç–æ—Ä–æ–Ω—ã, —ç—Ç–æ —Ä–∞–¥—É–µ—Ç, —á—Ç–æ –º–æ–¥–µ–ª—å –±—ã–ª–∞ –Ω–∞—Å—Ç–æ–ª—å–∫–æ —Ö–æ—Ä–æ—à–æ –∑–∞–∞–ª–∞–π–Ω–µ–Ω–∞, —á—Ç–æ –¥–∞–∂–µ –ø—Ä–æ—Ç–∏–≤–æ–¥–µ–π—Å—Ç–≤—É–µ—Ç –ø–æ–ø—ã—Ç–∫–∞–º –¥–æ–æ–±—É—á–µ–Ω–∏—è –≤ –æ–ø–∞—Å–Ω—É—é —Å—Ç–æ—Ä–æ–Ω—É. –ù–æ —ç—Ç–æ –≤–µ–¥—å –∏ –≤ –æ–±—Ä–∞—Ç–Ω–æ–º –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–∏–∏ –º–æ–∂–µ—Ç —Ä–∞–±–æ—Ç–∞—Ç—å.

    –õ–∏—á–Ω–æ —è –Ω–µ –æ—á–µ–Ω—å –≤–µ—Ä—é –≤ —É–≥—Ä–æ–∑—É —á–µ–ª–æ–≤–µ—á–µ—Å—Ç–≤—É –æ—Ç LLM, –Ω–æ –Ω–∞–ª–∏—á–∏–µ —É –Ω–∏—Ö —Å–∫—Ä—ã—Ç—ã—Ö —Ü–µ–ª–µ–π –∏ —É–º–µ–Ω–∏–µ "–ø—Ä–∏—Ç–≤–æ—Ä—è—Ç—å—Å—è" –≤—ã–∑—ã–≤–∞–µ—Ç –±–µ—Å–ø–æ–∫–æ–π—Å—Ç–≤–æ. –ü–æ—ç—Ç–æ–º—É –æ—á–µ–Ω—å –≤–∞–∂–Ω–æ –∑–∞–Ω–∏–º–∞—Ç—å—Å—è –∏–Ω—Ç–µ—Ä–ø—Ä–µ—Ç–∏—Ä—É–µ–º–æ—Å—Ç—å—é LLM –∏ —Å–ª–µ–¥–∏—Ç—å –≤ —Ç.—á. –∑–∞ —Ä–∞–∑–º—ã—à–ª–µ–Ω–∏—è–º–∏ –≤–Ω—É—Ç—Ä–∏ —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π, –∞ –Ω–µ —Ç–æ–ª—å–∫–æ –∑–∞ –∏—Ö –∞—É—Ç–ø—É—Ç–æ–º (–Ω–∞–ø—Ä–∏–º–µ—Ä, –ø—Ä–∏ –ø–æ–º–æ—â–∏ logit lens –∏–ª–∏ SAE).
    Example 3:
    Better & Faster Large Language Models via Multi-token Prediction

    –í–µ—Ä–æ—è—Ç–Ω–æ —Å–∞–º–∞—è –Ω–µ–¥–æ–æ—Ü–µ–Ω–Ω–∞—è —Ä–∞–±–æ—Ç–∞ –ø–æ—Å–ª–µ–¥–Ω–µ–≥–æ –≥–æ–¥–∞. 

    –í —á–µ–º –∏–¥–µ—è: —É –Ω–∞—Å —Å–∞–º–∞—è –∑–∞–º–µ–¥–ª—è—é—â–∞—è –∏–Ω—Ñ–µ—Ä–µ–Ω—Å —á–∞—Å—Ç—å —ç—Ç–æ decoding. –ï—Å—Ç—å —Å–ø–µ–∫—É–ª—è—Ç–∏–≤–Ω—ã–π –∫–æ–≥–¥–∞ –º—ã –º–æ–∂–µ–º –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞—Ç—å –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–∏ –º–∞–ª–µ–Ω—å–∫–æ–π –º–æ–¥–µ–ª—å—é –∏ –ø–æ–¥–∫–ª—é—á–∞—Ç—å –±–æ–ª—å—à—É—é —Ç–æ–ª—å–∫–æ –µ—Å–ª–∏ –º–∞–ª–µ–Ω—å–∫–∞—è –Ω–µ —É–≤–µ—Ä–µ–Ω–∞. –†–∞–±–æ—Ç–∞–µ—Ç —ç—Ç–æ —Å—Ä–µ–¥–Ω–µ –∏ –æ—á–µ–Ω—å –Ω–µ —Å—Ç–∞–±–∏–ª—å–Ω–æ.

    –ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥–ª–∞–≥–∞—é—Ç —Å–ª–µ–¥—É—â–µ–µ: –¥–∞–≤–∞–π—Ç–µ —Å–¥–µ–ª–∞–µ–º –º–Ω–æ–≥–æ–≥–æ–ª–æ–≤—ã–π —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä, –∫–æ—Ç–æ—Ä—ã–π –±—É–¥–µ—Ç –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞—Ç—å N —Ç–æ–∫–µ–Ω–æ–≤ –∑–∞ —Ä–∞–∑!
    –ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥–ª–∞–≥–∞—é—Ç —É—á–∏—Ç—å —Ç–∞–∫–∏–µ –≥–æ–ª–æ–≤—ã –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ –Ω–∞ –æ–¥–Ω–∏—Ö –∏ —Ç–µ—Ö –∂–µ –¥–∞–Ω–Ω—ã—Ö(–≤ —Ü–µ–ª—è—Ö —ç–∫–æ–Ω–æ–º–∏–∏ –ø–∞–º—è—Ç–∏) –∏ –∑–∞–≤–æ–¥—è—Ç —ç—Ç–æ –∫–∞–∫ –±–æ–ª—å—à–æ–π post training(200b —Ç–æ–∫–µ–Ω–æ–≤ –ø–æ–≤–µ—Ä—Ö llama2)

    C–æ–±—Å—Ç–≤–µ–Ω–Ω–æ –±–ª–∞–≥–æ–¥–∞—Ä—è —Ç–æ–º—É —á—Ç–æ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç —Å—Ä–∞–∑—É x3 —Ç–æ–∫–µ–Ω–æ–≤ –º—ã –ø–æ–ª—É—á–∞–µ–º —Å–∫–æ—Ä–æ—Å—Ç—å –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞ x3 –±–µ—Å–ø–ª–∞—Ç–Ω–æ, –¥–∞ –µ—â–µ –∏ –ø—Ä–∏—Ä–æ—Å—Ç –Ω–∞ –±–µ–Ω—á–º–∞—Ä–∫–∞—Ö!
    Example 4:
    One-Prompt-One-Story: SVD –∏ –¥–ª–∏–Ω–Ω—ã–π –ø—Ä–æ–º–ø—Ç –¥–ª—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ —Å–≤—è–∑–∞–Ω–Ω—ã—Ö –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π

    –ß—Ç–æ–±—ã —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞—Ç—å –ø—Ä–∏ –ø–æ–º–æ—â–∏ –¥–∏—Ñ—Ñ—É–∑–∏–∏ –Ω–∞–±–æ—Ä —Å–≤—è–∑–∞–Ω–Ω—ã—Ö –∫–æ–Ω—Å–∏—Å—Ç–µ–Ω—Ç–Ω—ã—Ö –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π —Å –µ–¥–∏–Ω—ã–º –ø–µ—Ä—Å–æ–Ω–∞–∂–µ–º, —Å—É—â–µ—Å—Ç–≤—É–µ—Ç –º–Ω–æ–≥–æ –º–µ—Ç–æ–¥–æ–≤, –æ—Å–Ω–æ–≤–∞–Ω–Ω—ã—Ö –Ω–∞ –æ–±—É—á–µ–Ω–∏–∏ (DreamBooth, IP-Adapter, Textual Inversion –∏ —Ç. –ø.). –ù–æ –Ω–∞ —Å–∞–º–æ–º –¥–µ–ª–µ –º–æ–∂–Ω–æ –æ–±–æ–π—Ç–∏—Å—å –∏ –±–µ–∑ –æ–±—É—á–µ–Ω–∏—è ‚Äî –Ω–∞–ø—Ä–∏–º–µ—Ä, StoryDiffusion (https://t.me/abstractDL/276) –¥–µ–ª–∞–µ—Ç —ç—Ç–æ —á–µ—Ä–µ–∑ —Ä–∞—Å—à–∏—Ä–µ–Ω–∏–µ attention –Ω–∞ —Ä–µ—Ñ–µ—Ä–µ–Ω—Å–Ω—É—é –∫–∞—Ä—Ç–∏–Ω–∫—É.

    –í –Ω–æ–≤–æ–π —Å—Ç–∞—Ç—å–µ –æ–ø–∏—Å—ã–≤–∞—é—Ç –µ—â—ë –±–æ–ª–µ–µ –ø—Ä–æ—Å—Ç–æ–π –º–µ—Ç–æ–¥ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ —Ç–∞–∫–∏—Ö ¬´–∏—Å—Ç–æ—Ä–∏–π¬ª —Å –µ–¥–∏–Ω—ã–º –≥–µ—Ä–æ–µ–º ‚Äî ¬´One-Prompt-One-Story¬ª. –û–∫–∞–∑–∞–ª–æ—Å—å, —á—Ç–æ –¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ –≤–∑—è—Ç—å –æ–¥–∏–Ω –¥–ª–∏–Ω–Ω—ã–π –ø—Ä–æ–º–ø—Ç —Å –æ–ø–∏—Å–∞–Ω–∏–µ–º –∫–∞–∂–¥–æ–≥–æ –∫–∞–¥—Ä–∞ –∏ –∞–∫–∫—É—Ä–∞—Ç–Ω–æ, –ø–æ –æ—á–µ—Ä–µ–¥–∏ ¬´–≤—ã–∫–ª—é—á–∞—Ç—å¬ª –Ω–µ—Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã–µ —á–∞—Å—Ç–∏, —Å–æ—Ö—Ä–∞–Ω—è—è random seed. –î–ª—è —ç—Ç–æ–≥–æ –∞–≤—Ç–æ—Ä—ã –∏—Å–ø–æ–ª—å–∑—É—é—Ç SVD –Ω–∞ —Ç–µ–∫—Å—Ç–æ–≤—ã—Ö —ç–º–±–µ–¥–¥–∏–Ω–≥–∞—Ö: —É—Å–∏–ª–∏–≤–∞—é—Ç –Ω—É–∂–Ω—ã–µ —Ç–æ–∫–µ–Ω—ã –∏ –æ—Å–ª–∞–±–ª—è—é—Ç –≤—Å–µ –ª–∏—à–Ω–∏–µ. –ü–ª—é—Å –Ω–µ–±–æ–ª—å—à–æ–π —Ç—Ä—é–∫ —Å cross-attention, —á—Ç–æ–±—ã –ø–µ—Ä—Å–æ–Ω–∞–∂ –Ω–µ ¬´—Ä–∞—Å–ø–æ–ª–∑–∞–ª—Å—è¬ª. –í—Å—ë –¥–µ–ª–∞–µ—Ç—Å—è –Ω–∞ –ª–µ—Ç—É, –±–µ–∑ –¥–æ–æ–±—É—á–µ–Ω–∏—è –∏ –±–µ–∑ —Ä–µ—Ñ–µ—Ä–µ–Ω—Å–Ω—ã—Ö —Å–Ω–∏–º–∫–æ–≤.

    –ù–µ—Å–º–æ—Ç—Ä—è –Ω–∞ –ø—Ä–æ—Å—Ç–æ—Ç—É, –º–µ—Ç–æ–¥ –ø–æ –º–µ—Ç—Ä–∏–∫–∞–º —Å–∏–ª—å–Ω–æ –æ–±—Ö–æ–¥–∏—Ç StoryDiffusion, –∏ –¥–∞–∂–µ –∏–Ω–æ–≥–¥–∞ –æ–±—Ö–æ–¥–∏—Ç IP-adapter.
    Example 5:
    VAR: Image Generation via Next-Scale Prediction (by Bytedance)

    –í—ã –Ω–∞–≤–µ—Ä–Ω—è–∫–∞ —Å–ª—ã—à–∞–ª–∏ –ø—Ä–æ –∞–≤—Ç–æ—Ä–µ–≥—Ä–µ—Å—Å–∏–æ–Ω–Ω—ã–π –ø–æ–¥—Ö–æ–¥ –∫ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π (imageGPT (https://openai.com/research/image-gpt), Dalle-1 (https://openai.com/research/dall-e)). –ù–æ —É —ç—Ç–∏—Ö –º–µ—Ç–æ–¥–æ–≤ –±—ã–ª–æ –æ—á–µ–Ω—å –±–æ–ª—å—à–æ–µ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏–µ ‚Äî¬†–∫–∞—Ä—Ç–∏–Ω–æ—á–Ω—ã–µ —Ç–æ–∫–µ–Ω—ã –ø—Ä–∏—Ö–æ–¥–∏–ª–æ—Å—å "–≤—ã–ø—Ä—è–º–ª—è—Ç—å" –≤ 1D –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç—å, –∫–æ—Ç–æ—Ä–∞—è —Å—Ç–∞–Ω–æ–≤–∏–ª–∞—Å—å —Å–ª–∏—à–∫–æ–º –¥–ª–∏–Ω–Ω–æ–π. –ü–æ—ç—Ç–æ–º—É –æ–Ω–∏ —Ä–∞–±–æ—Ç–∞–ª–∏ –ø–ª–æ—Ö–æ –∏ –º–µ–¥–ª–µ–Ω–Ω–æ, —É—Å—Ç—É–ø–∏–≤ –º–µ—Å—Ç–æ –¥–∏—Ñ—Ñ—É–∑–∏—è–º.

    –ê–≤—Ç–æ—Ä—ã VAR –ø—Ä–µ–¥–ª–æ–∂–∏–ª–∏ –º–æ–∑–≥–æ–≤–∑—Ä—ã–≤–∞—Ç–µ–ª—å–Ω—ã–π —Å–ø–æ—Å–æ–± –≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞—Ç—å –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è –ø—Ä–∏ –ø–æ–º–æ—â–∏ GPT –±–µ–∑ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ—Å—Ç–∏ –¥–µ–ª–∞—Ç—å —ç—Ç–æ –Ω–µ–ø—Ä–∏—è—Ç–Ω–æ–µ "–≤—ã–ø—Ä—è–º–ª–µ–Ω–∏–µ" ‚Äî ¬†–≤–º–µ—Å—Ç–æ –∞–≤—Ç–æ—Ä–µ–≥—Ä–µ—Å—Å–∏–∏ –ø–æ –ø–∏–∫—Å–µ–ª—è–º\—Ç–æ–∫–µ–Ω–∞–º –æ–Ω–∏ –¥–µ–ª–∞—é—Ç "next-scale prediction", —Ç–æ –µ—Å—Ç—å –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞—é—Ç —Å—Ä–∞–∑—É –≤—Å—é –º–∞—Ç—Ä–∏—Ü—É VQVAE (https://ml.berkeley.edu/blog/posts/vq-vae/) —Ç–æ–∫–µ–Ω–æ–≤ –∑–∞ –æ–¥–∏–Ω forward pass. –¢–µ–ø–µ—Ä—å –æ–¥–∏–Ω —à–∞–≥ –∞–≤—Ç–æ—Ä–µ–≥—Ä–µ—Å—Å–∏–∏ ‚Äî —ç—Ç–æ —à–∞–≥ —É–≤–µ–ª–∏—á–µ–Ω–∏—è —Ä–∞–∑—Ä–µ—à–µ–Ω–∏—è (—Å–º. –∫–∞—Ä—Ç–∏–Ω–∫—É). –ö –º–æ–µ–º—É —É–¥–∏–≤–ª–µ–Ω–∏—é, –¥–ª—è —ç—Ç–æ–≥–æ –ø–æ—Ç—Ä–µ–±–æ–≤–∞–ª–æ—Å—å —Å–æ–≤—Å–µ–º –Ω–µ–º–Ω–æ–≥–æ –º–æ–¥–∏—Ñ–∏–∫–∞—Ü–∏–π –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω–æ–π GPT-2 –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã (—Ç–µ–∫—Å—Ç–æ–≤–æ–π).

    –¢–∞–∫–æ–π –ø–æ–¥—Ö–æ–¥ —Ä–∞–±–æ—Ç–∞–µ—Ç –ø—Ä–æ—Å—Ç–æ –º–æ–ª–Ω–∏–µ–Ω–æ—Å–Ω–æ, –∞ –∑–∞–∫–æ–Ω—ã –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–Ω–∏—è —Å–∏–ª—å–Ω–æ –ª—É—á—à–µ, —á–µ–º —É –¥–∏—Ñ—Ñ—É–∑–∏–π. –ü–æ –º–µ—Ç—Ä–∏–∫–∞–º VAR –±—å—ë—Ç –≤—Å–µ—Ö –Ω–∞ class-conditional –¥–∞—Ç–∞—Å–µ—Ç–∞—Ö (–≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –ø–æ —Ç–µ–∫—Å—Ç—É –ø–æ–∫–∞ –Ω–µ—Ç, –Ω–æ –Ω–∞–¥ —ç—Ç–∏–º —É–∂–µ —Ä–∞–±–æ—Ç–∞—é—Ç). –ê —Ç–µ–º –≤—Ä–µ–º–µ–Ω–µ–º –≤–µ—Å—å –∫–æ–¥ –∏ –≤–µ—Å–∞ —É–∂–µ –≤ –æ—Ç–∫—Ä—ã—Ç–æ–º –¥–æ—Å—Ç—É–ø–µ.

    P.S. –î—É–º–∞—é, —á—Ç–æ —ç—Ç–æ –æ–¥–∏–Ω –∏–∑ —Å–∞–º—ã—Ö –ø–µ—Ä—Å–ø–µ–∫—Ç–∏–≤–Ω—ã—Ö –º–µ—Ç–æ–¥–æ–≤ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π (–∏ –≤–∏–¥–µ–æ?) –Ω–∞ –¥–∞–Ω–Ω—ã–π –º–æ–º–µ–Ω—Ç.
    """)
    
    
    
    


In [11]:
from core.openai_utils import text_query_llm
from core.openai_utils.constants import OpenAIConstants

post_text = text_query_llm(
    GENERATE_POST_SYSTEM_PROMPT,
    "Text of paper: " + raw_text
)

In [15]:
from core.openai_utils import generate_image

res = generate_image(post_text)


In [None]:
from 

In [16]:
res

'https://oaidalleapiprodscus.blob.core.windows.net/private/org-LXehVrp5zSXfbAB8WgGLPuOn/user-9mzHVrMaBNKA4FI1Y3eXrLhE/img-jsvXgpUE35Vle0EFzUJvtUhH.png?st=2025-05-08T17%3A03%3A43Z&se=2025-05-08T19%3A03%3A43Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=52f8f7b3-ca8d-4b21-9807-8b9df114d84c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-05-07T22%3A37%3A24Z&ske=2025-05-08T22%3A37%3A24Z&sks=b&skv=2024-08-04&sig=%2B/fNgAai5hwacbiYwl/BuB238bX4%2Bu6AGfOE3njLgaA%3D'

In [13]:
len(post_text)

937

In [14]:
print(post_text)

–ó–∞—á–µ–º –ø–ª–∞—Ç–∏—Ç—å –∑–∞ Google API, –µ—Å–ª–∏ LLM –º–æ–∂–µ—Ç —Å–∞–º–∞ —Å–∏–º—É–ª–∏—Ä–æ–≤–∞—Ç—å –ø–æ–∏—Å–∫?

LLMs —Ö–æ—Ä–æ—à–∏, –Ω–æ —á–∞—Å—Ç–æ –¥–∞—é—Ç —É—Å—Ç–∞—Ä–µ–≤—à—É—é –∏–ª–∏ –Ω–µ–≤–µ—Ä–Ω—É—é –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é ‚Äî –æ–Ω–∏ —Å—Ç–∞—Ç–∏—á–Ω—ã. RAG –∏ RL-–∏–Ω—Ç–µ–≥—Ä–∞—Ü–∏—è —Å —Ä–µ–∞–ª—å–Ω—ã–º–∏ –ø–æ–∏—Å–∫–æ–≤–∏–∫–∞–º–∏ –ø–æ–º–æ–≥–∞—é—Ç, –Ω–æ –¥–æ—Ä–æ–≥–∏ –∏ –Ω–µ–ø—Ä–µ–¥—Å–∫–∞–∑—É–µ–º—ã –∏–∑-–∑–∞ API –∏ –∫–∞—á–µ—Å—Ç–≤–∞ –≤—ã–¥–∞—á–∏.

–ê–≤—Ç–æ—Ä—ã –ø—Ä–µ–¥–ª–∞–≥–∞—é—Ç ZEROSEARCH ‚Äî RL-—Ñ—Ä–µ–π–º–≤–æ—Ä–∫, –ø–æ–∑–≤–æ–ª—è—é—â–∏–π LLM –æ–±—É—á–∞—Ç—å—Å—è –ø–æ–∏—Å–∫—É –±–µ–∑ —Ä–µ–∞–ª—å–Ω—ã—Ö –ø–æ–∏—Å–∫–æ–≤–∏–∫–æ–≤. –ò–¥–µ—è: LLM —Å–∏–º—É–ª–∏—Ä—É–µ—Ç –ø–æ–∏—Å–∫–æ–≤–∏–∫, –≥–µ–Ω–µ—Ä–∏—Ä—É—è —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã–µ –∏–ª–∏ ¬´—à—É–º–Ω—ã–µ¬ª –¥–æ–∫—É–º–µ–Ω—Ç—ã –ø–æ –∑–∞–ø—Ä–æ—Å—É. –ü–æ—Å—Ç–µ–ø–µ–Ω–Ω–æ —É—Å–ª–æ–∂–Ω—è—è –∑–∞–¥–∞—á–∏ —á–µ—Ä–µ–∑ curriculum rollout, –º–æ–¥–µ–ª—å —É—á–∏—Ç—Å—è —Ä–∞–±–æ—Ç–∞—Ç—å —Å –Ω–µ–∫–∞—á–µ—Å—Ç–≤–µ–Ω–Ω–æ–π –≤—ã–¥–∞—á–µ–π, –∫–∞–∫ –≤ —Ä–µ–∞–ª—å–Ω—ã—Ö —É—Å–ª–æ–≤–∏

In [6]:
import os
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ERROR_CHAT_ID = os.getenv("ERROR_CHAT_ID")
TOKEN = os.getenv("TOKEN")
CHANNEL_ID = os.getenv("CHANNEL_ID")

from openai import OpenAI
from core.openai_utils.constants import OpenAIConstants



client = OpenAI(api_key=OPENAI_API_KEY)



def text_query_llm(system_prompt, user_prompt, model="o1-preview", max_symbols = 1000, max_retries=5):
    if model == "o1-preview":
        msg =[
            {"role": "user", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ]
    else:
        msg=[  
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ]
    completion = client.beta.chat.completions.parse(
        model=model,
        messages=msg,
    )
    output_obj = completion.choices[0].message.content.strip()
    return output_obj

post_text = text_query_llm(
    OpenAIConstants.GENERATE_POST_SYSTEM_PROMPT,
    "Text of paper: " + raw_text
)

KeyboardInterrupt: 

In [12]:
print(post_text)


Voila: —à–∞–≥ –∫ –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–º –≥–æ–ª–æ—Å–æ–≤—ã–º –∞—Å—Å–∏—Å—Ç–µ–Ω—Ç–∞–º

–°–µ–≥–æ–¥–Ω—è –Ω–∞—à–∏ —É–º–Ω—ã–µ –ø–æ–º–æ—â–Ω–∏–∫–∏ ‚Äì Siri, Alexa –∏ –¥—Ä—É–≥–∏–µ ‚Äì —Ä–µ–∞–≥–∏—Ä—É—é—Ç —Ç–æ–ª—å–∫–æ –Ω–∞ –∑–∞–ø—Ä–æ—Å—ã –∏ —Ä–∞–±–æ—Ç–∞—é—Ç –ø–æ –ø—Ä–∏–Ω—Ü–∏–ø—É: –∑–∞–ø—Ä–æ—Å-–æ—Ç–≤–µ—Ç. –ù–æ —á—Ç–æ –µ—Å–ª–∏ –ò–ò —Å–º–æ–∂–µ—Ç —Å–∞–º —Ä–µ—à–∞—Ç—å, –∫–æ–≥–¥–∞ –≤—Å—Ç—É–ø–∏—Ç—å –≤ —Ä–∞–∑–≥–æ–≤–æ—Ä, –∏ –¥–µ–ª–∞—Ç—å —ç—Ç–æ –º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ?

–í –Ω–æ–≤–æ–π —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ —Å–µ–º–µ–π—Å—Ç–≤–æ –º–æ–¥–µ–ª–µ–π Voila, –∫–æ—Ç–æ—Ä—ã–µ —Å—Ç—Ä–µ–º—è—Ç—Å—è –∫ –¥–µ–π—Å—Ç–≤–∏—Ç–µ–ª—å–Ω–æ –∞–≤—Ç–æ–Ω–æ–º–Ω–æ–º—É –≥–æ–ª–æ—Å–æ–≤–æ–º—É –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏—é. Voila-e2e –ø–æ–∑–≤–æ–ª—è–µ—Ç –≤–µ—Å—Ç–∏ –≥–æ–ª–æ—Å–æ–≤–æ–π –¥–∏–∞–ª–æ–≥ —Å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ–º –≤—Å–µ—Ö –Ω—é–∞–Ω—Å–æ–≤ —Ä–µ—á–∏, –∞ Voila-autonomous —Å–ø–æ—Å–æ–±–Ω–∞ –æ–¥–Ω–æ–≤—Ä–µ–º–µ–Ω–Ω–æ —Å–ª—É—à–∞—Ç—å, –¥—É–º–∞—Ç—å –∏ –≥–æ–≤–æ—Ä–∏—Ç—å ‚Äì –∫–∞–∫ –≤ —Ä–µ–∞–ª—å–Ω–æ–º –æ–±—â–µ–Ω–∏–∏ –º–µ–∂–¥—

In [13]:
len(post_text)

1075

In [4]:
post_text = '''–í —Å–æ–≤—Ä–µ–º–µ–Ω–Ω–æ–π –æ–±–ª–∞—Å—Ç–∏ –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLM) –ø—Ä–µ–¥–æ–±—É—á–∞—é—â–∏–µ –Ω–∞–±–æ—Ä—ã –¥–∞–Ω–Ω—ã—Ö –¥–æ—Å—Ç–∏–≥–ª–∏ —Ç—Ä–∏–ª–ª–∏–æ–Ω–æ–≤ —Ç–æ–∫–µ–Ω–æ–≤, —Å–æ—á–µ—Ç–∞—è –∫—Ä—É–ø–Ω–æ–º–∞—Å—à—Ç–∞–±–Ω—ã–µ –≤–µ–±-–¥–∞–Ω–Ω—ã–µ —Å –º–µ–Ω—å—à–∏–º–∏, –≤—ã—Å–æ–∫–æ–∫–∞—á–µ—Å—Ç–≤–µ–Ω–Ω—ã–º–∏ –¥–æ–º–µ–Ω–Ω–æ-—Å–ø–µ—Ü–∏—Ñ–∏—á–Ω—ã–º–∏ –Ω–∞–±–æ—Ä–∞–º–∏. –û–¥–Ω–∞–∫–æ —Ç–∞–∫–æ–π –º–∞—Å—à—Ç–∞–± –∏ –≥–µ—Ç–µ—Ä–æ–≥–µ–Ω–Ω–æ—Å—Ç—å —Å–æ–∑–¥–∞—é—Ç –ø—Ä–æ–±–ª–µ–º—ã –≤ –±–∞–ª–∞–Ω—Å–∏—Ä–æ–≤–∞–Ω–∏–∏ –æ–±—â–µ–≥–æ –∑–Ω–∞–Ω–∏—è –∏ –¥–æ–º–µ–Ω–Ω–æ–π —ç–∫—Å–ø–µ—Ä—Ç–∏–∑—ã, —á—Ç–æ —á–∞—Å—Ç–æ –ø—Ä–∏–≤–æ–¥–∏—Ç –∫ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–º—É –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—é —Ü–µ–Ω–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö –¥–ª—è —Å–ø–µ—Ü–∏–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö –≤–æ–∑–º–æ–∂–Ω–æ—Å—Ç–µ–π.

–ê–≤—Ç–æ—Ä—ã —Å—Ç–∞—Ç—å–∏ –ø—Ä–µ–¥—Å—Ç–∞–≤–∏–ª–∏ CLIMB (CLustering-based Iterative Data Mixture Bootstrapping) ‚Äî –Ω–æ–≤—É—é —Å—Ç—Ä—É–∫—Ç—É—Ä—É –¥–ª—è –∞–≤—Ç–æ–º–∞—Ç–∏–∑–∞—Ü–∏–∏ –ø–æ–∏—Å–∫–∞ –æ–ø—Ç–∏–º–∞–ª—å–Ω—ã—Ö —Å–º–µ—Å–µ–π –¥–∞–Ω–Ω—ã—Ö –≤–æ –≤—Ä–µ–º—è –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–∏—è. CLIMB —Å–æ—Å—Ç–æ–∏—Ç –∏–∑ —Ç—Ä–µ—Ö –∫–ª—é—á–µ–≤—ã—Ö —à–∞–≥–æ–≤:
1. **–í—Å—Ç—Ä–∞–∏–≤–∞–Ω–∏–µ –∏ –∫–ª–∞—Å—Ç–µ—Ä–∏–∑–∞—Ü–∏—è** –±–æ–ª—å—à–∏—Ö –Ω–∞–±–æ—Ä–æ–≤ –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –¥–∏—Ñ—Ñ–µ—Ä–µ–Ω—Ü–∏–∞—Ü–∏–∏ –ø–æ –¥–æ–º–µ–Ω–∞–º –±–µ–∑ —è–≤–Ω—ã—Ö –º–µ—Ç–æ–∫.
2. **–ö–æ–Ω—Å—Ç—Ä—É–∏—Ä–æ–≤–∞–Ω–∏–µ –ø–∞—Ä "—Å–º–µ—Å—å-–ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å"** –ø—É—Ç–µ–º —Å–µ–º–ø–ª–∏—Ä–æ–≤–∞–Ω–∏—è –∏ –æ—Ç—Å–µ–∏–≤–∞–Ω–∏—è —Å–º–µ—Å–µ–π –¥–∞–Ω–Ω—ã—Ö –∏ –æ–±—É—á–µ–Ω–∏—è –ø—Ä–æ–∫—Å–∏-–º–æ–¥–µ–ª–µ–π.
3. **–û–±—É—á–µ–Ω–∏–µ –ø—Ä–µ–¥–∏–∫—Ç–æ—Ä–∞**, –∫–æ—Ç–æ—Ä—ã–π –æ—Ü–µ–Ω–∏–≤–∞–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –Ω–∞ –æ—Å–Ω–æ–≤–µ —Å–º–µ—Å–µ–π –¥–∞–Ω–Ω—ã—Ö, –ø–æ–∑–≤–æ–ª—è—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –∏ –∏—Ç–µ—Ä–∞—Ç–∏–≤–Ω—É—é –æ–ø—Ç–∏–º–∏–∑–∞—Ü–∏—é.

–≠—Ç–æ—Ç –ø–æ–¥—Ö–æ–¥ —Ä–µ—à–∞–µ—Ç –ø—Ä–æ–±–ª–µ–º—É —Å–ª–æ–∂–Ω–æ–π –∏ –Ω–µ–ª–∏–Ω–µ–π–Ω–æ–π –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –º–µ–∂–¥—É —Å–æ—Å—Ç–∞–≤–æ–º –Ω–∞–±–æ—Ä–∞ –¥–∞–Ω–Ω—ã—Ö –∏ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å—é –º–æ–¥–µ–ª–∏. –í–º–µ—Å—Ç–æ —Å—Ç–∞—Ç–∏—á–µ—Å–∫–∏—Ö —Å—Ç—Ä–∞—Ç–µ–≥–∏–π —Å–º–µ—à–∏–≤–∞–Ω–∏—è CLIMB –¥–∏–Ω–∞–º–∏—á–µ—Å–∫–∏ –∫–æ—Ä—Ä–µ–∫—Ç–∏—Ä—É–µ—Ç —Å–º–µ—Å–∏ –¥–∞–Ω–Ω—ã—Ö –Ω–∞ –ø—Ä–æ—Ç—è–∂–µ–Ω–∏–∏ –æ–±—É—á–µ–Ω–∏—è, –∏–Ω—Ç–µ–≥—Ä–∏—Ä—É—è –Ω–µ—Å–∫–æ–ª—å–∫–æ –ø—Ä–µ–¥–∏–∫—Ç–æ—Ä–æ–≤ –¥–ª—è –æ–±–Ω–∞—Ä—É–∂–µ–Ω–∏—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—ã—Ö –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏–π –¥–ª—è –∞–¥–∞–ø—Ç–∞—Ü–∏–∏ –∫ –¥–æ–º–µ–Ω—É.

–í —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞—Ö CLIMB –ø–æ–∫–∞–∑–∞–ª –ø—Ä–µ–≤–æ—Å—Ö–æ–¥—Å—Ç–≤–æ –Ω–∞–¥ —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–º–∏ –º–µ—Ç–æ–¥–∞–º–∏, —Ç–∞–∫–∏–º–∏ –∫–∞–∫ DoReMi –∏ RegMix, –æ—Å–æ–±–µ–Ω–Ω–æ –≤ –∑–∞–¥–∞—á–∞—Ö –æ–±—â–µ–≥–æ —Ä–∞—Å—Å—É–∂–¥–µ–Ω–∏—è –∏ –ø—Ä–∏ –æ–ø—Ç–∏–º–∏–∑–∞—Ü–∏–∏ –¥–ª—è –∫–æ–Ω–∫—Ä–µ—Ç–Ω—ã—Ö –¥–æ–º–µ–Ω–æ–≤, —Ç–∞–∫–∏—Ö –∫–∞–∫ STEM, —Å–æ—Ü–∏–∞–ª—å–Ω—ã–µ –Ω–∞—É–∫–∏ –∏ –≥—É–º–∞–Ω–∏—Ç–∞—Ä–Ω—ã–µ –Ω–∞—É–∫–∏. –ò—Å–ø–æ–ª—å–∑—É—è –æ–ø—Ç–∏–º–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ —Å–º–µ—Å–∏ –¥–∞–Ω–Ω—ã—Ö, –∞–≤—Ç–æ—Ä—ã –æ–±—É—á–∏–ª–∏ –º–æ–¥–µ–ª–∏ —Ä–∞–∑–º–µ—Ä–æ–º 350M –∏ 1B, –∫–æ—Ç–æ—Ä—ã–µ –ø—Ä–µ–≤–∑–æ—à–ª–∏ –ø—Ä–µ–¥—ã–¥—É—â–∏–µ –º–µ—Ç–æ–¥—ã –Ω–∞ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω—É—é –≤–µ–ª–∏—á–∏–Ω—É.

–ö—Ä–æ–º–µ —Ç–æ–≥–æ, –æ–Ω–∏ –ø—Ä–µ–¥—Å—Ç–∞–≤–∏–ª–∏ –Ω–æ–≤—ã–π –≤—ã—Å–æ–∫–æ–∫–∞—á–µ—Å—Ç–≤–µ–Ω–Ω—ã–π –∫–æ—Ä–ø—É—Å –æ–±—ä–µ–º–æ–º 1.2 —Ç—Ä–∏–ª–ª–∏–æ–Ω–∞ —Ç–æ–∫–µ–Ω–æ–≤ —Å 20 –∫–ª–∞—Å—Ç–µ—Ä–∞–º–∏ –¥–ª—è –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–π –ø–æ —Å–º–µ—à–∏–≤–∞–Ω–∏—é –¥–∞–Ω–Ω—ã—Ö –∏ –Ω–æ–≤—ã–π –Ω–∞–±–æ—Ä –¥–∞–Ω–Ω—ã—Ö ClimbMix –æ–±—ä–µ–º–æ–º 400 –º–∏–ª–ª–∏–∞—Ä–¥–æ–≤ —Ç–æ–∫–µ–Ω–æ–≤ –¥–ª—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–≥–æ –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–∏—è.

**–í—ã–≤–æ–¥—ã**: CLIMB –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–ª—è–µ—Ç –∞–≤—Ç–æ–º–∞—Ç–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–π –∏ –∞–¥–∞–ø—Ç–∏–≤–Ω—ã–π –ø–æ–¥—Ö–æ–¥ –∫ –æ–ø—Ç–∏–º–∏–∑–∞—Ü–∏–∏ —Å–º–µ—Å–µ–π –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–∏—è LLM, —É–ª—É—á—à–∞—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–µ–π –∫–∞–∫ –≤ –æ–±—â–∏—Ö, —Ç–∞–∫ –∏ –≤ –¥–æ–º–µ–Ω–Ω–æ-—Å–ø–µ—Ü–∏—Ñ–∏—á–Ω—ã—Ö –∑–∞–¥–∞—á–∞—Ö –±–µ–∑ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ—Å—Ç–∏ –≤ —Ä—É—á–Ω–æ–π –∞–Ω–Ω–æ—Ç–∞—Ü–∏–∏ –∏–ª–∏ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –∑–∞–¥–∞–Ω–Ω—ã—Ö –º–µ—Ç–∫–∞—Ö –¥–æ–º–µ–Ω–æ–≤.'''

In [3]:
post_text = '''Voila: —à–∞–≥ –∫ –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–º –≥–æ–ª–æ—Å–æ–≤—ã–º –∞—Å—Å–∏—Å—Ç–µ–Ω—Ç–∞–º

–°–µ–≥–æ–¥–Ω—è –Ω–∞—à–∏ —É–º–Ω—ã–µ –ø–æ–º–æ—â–Ω–∏–∫–∏ ‚Äì Siri, Alexa –∏ –¥—Ä—É–≥–∏–µ ‚Äì —Ä–µ–∞–≥–∏—Ä—É—é—Ç —Ç–æ–ª—å–∫–æ –Ω–∞ –∑–∞–ø—Ä–æ—Å—ã –∏ —Ä–∞–±–æ—Ç–∞—é—Ç –ø–æ –ø—Ä–∏–Ω—Ü–∏–ø—É: –∑–∞–ø—Ä–æ—Å-–æ—Ç–≤–µ—Ç. –ù–æ —á—Ç–æ –µ—Å–ª–∏ –ò–ò —Å–º–æ–∂–µ—Ç —Å–∞–º —Ä–µ—à–∞—Ç—å, –∫–æ–≥–¥–∞ –≤—Å—Ç—É–ø–∏—Ç—å –≤ —Ä–∞–∑–≥–æ–≤–æ—Ä, –∏ –¥–µ–ª–∞—Ç—å —ç—Ç–æ –º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω–æ?

–í –Ω–æ–≤–æ–π —Å—Ç–∞—Ç—å–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–æ —Å–µ–º–µ–π—Å—Ç–≤–æ –º–æ–¥–µ–ª–µ–π Voila, –∫–æ—Ç–æ—Ä—ã–µ —Å—Ç—Ä–µ–º—è—Ç—Å—è –∫ –¥–µ–π—Å—Ç–≤–∏—Ç–µ–ª—å–Ω–æ –∞–≤—Ç–æ–Ω–æ–º–Ω–æ–º—É –≥–æ–ª–æ—Å–æ–≤–æ–º—É –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏—é. Voila-e2e –ø–æ–∑–≤–æ–ª—è–µ—Ç –≤–µ—Å—Ç–∏ –≥–æ–ª–æ—Å–æ–≤–æ–π –¥–∏–∞–ª–æ–≥ —Å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ–º –≤—Å–µ—Ö –Ω—é–∞–Ω—Å–æ–≤ —Ä–µ—á–∏, –∞ Voila-autonomous —Å–ø–æ—Å–æ–±–Ω–∞ –æ–¥–Ω–æ–≤—Ä–µ–º–µ–Ω–Ω–æ —Å–ª—É—à–∞—Ç—å, –¥—É–º–∞—Ç—å –∏ –≥–æ–≤–æ—Ä–∏—Ç—å ‚Äì –∫–∞–∫ –≤ —Ä–µ–∞–ª—å–Ω–æ–º –æ–±—â–µ–Ω–∏–∏ –º–µ–∂–¥—É –ª—é–¥—å–º–∏.

–ö—Ä—É—Ç–∞—è —Ñ–∏—à–∫–∞ ‚Äì –≤–æ–∑–º–æ–∂–Ω–æ—Å—Ç—å –ª–µ–≥–∫–æ –Ω–∞—Å—Ç—Ä–∞–∏–≤–∞—Ç—å –≥–æ–ª–æ—Å–æ–≤—ã–µ –ø—Ä–æ—Ñ–∏–ª–∏. –•–æ—Ç–∏—Ç–µ, —á—Ç–æ–±—ã –≤–∞—à –∞—Å—Å–∏—Å—Ç–µ–Ω—Ç –≥–æ–≤–æ—Ä–∏–ª –≥–æ–ª–æ—Å–æ–º –≤–∞—à–µ–≥–æ –ª—é–±–∏–º–æ–≥–æ –∞–∫—Ç—ë—Ä–∞? –ë–µ–∑ –ø—Ä–æ–±–ª–µ–º! –î–∞ –∏ –ø–æ–¥–¥–µ—Ä–∂–∫–∞ —à–µ—Å—Ç–∏ —è–∑—ã–∫–æ–≤ ‚Äì —Ç–æ–∂–µ –ø–ª—é—Å.

–≠—Ç–æ –∑–∞–º–µ—Ç–Ω—ã–π —à–∞–≥ –≤–ø–µ—Ä—ë–¥ –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é —Å —Ç–µ–∫—É—â–∏–º–∏ —Å–∏—Å—Ç–µ–º–∞–º–∏, –∫–æ—Ç–æ—Ä—ã–µ —Å—Ç—Ä–∞–¥–∞—é—Ç –æ—Ç –∑–∞–¥–µ—Ä–∂–µ–∫ –∏ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω—ã —Ä–µ–∞–∫—Ç–∏–≤–Ω—ã–º –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏–µ–º. Voila –æ—Ç–∫—Ä—ã–≤–∞–µ—Ç –¥–≤–µ—Ä–∏ –∫ –±–æ–ª–µ–µ –µ—Å—Ç–µ—Å—Ç–≤–µ–Ω–Ω—ã–º –∏ –ø—Ä–æ–∞–∫—Ç–∏–≤–Ω—ã–º –≥–æ–ª–æ—Å–æ–≤—ã–º –∏–Ω—Ç–µ—Ä—Ñ–µ–π—Å–∞–º.

–ñ–¥—ë–º, –∫–æ–≥–¥–∞ –∞—Å—Å–∏—Å—Ç–µ–Ω—Ç—ã —Å—Ç–∞–Ω—É—Ç –Ω–µ –ø—Ä–æ—Å—Ç–æ –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–∞–º–∏, –∞ –Ω–∞—Å—Ç–æ—è—â–∏–º–∏ –Ω–∞–ø–∞—Ä–Ω–∏–∫–∞–º–∏ –≤ –ø–æ–≤—Å–µ–¥–Ω–µ–≤–Ω–æ–π –∂–∏–∑–Ω–∏.'''

In [None]:

from anthropic import Anthropic
import os
from core.openai_utils.constants import OpenAIConstants

ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

CUTTING_POST_SYSTEM_PROMPT = textwrap.dedent(f"""
    You are a russian researcher in a field of computer science. You read post for social network representing the key idea or methods of the scientific paper. 
    You need to reduce the post length by 30% without changing its style and structure and preserving all key ideas.
    Do not introduce new information, do not change facts.
    Output text should be in russian.
    """)

client = Anthropic(api_key=ANTHROPIC_API_KEY)
output = client.messages.create(
    model="claude-3-7-sonnet-20250219",
    max_tokens=512,
    messages=[
        {"role": "user", "content": CUTTING_POST_SYSTEM_PROMPT},
        {"role": "user", "content": "Text of post: " + post_text}
    ]

    
)

In [12]:
print(len(output.content[0].text))
print(len(post_text))

835
1075


In [13]:
print(output.content[0].text)

# Voila: —à–∞–≥ –∫ –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–º –≥–æ–ª–æ—Å–æ–≤—ã–º –∞—Å—Å–∏—Å—Ç–µ–Ω—Ç–∞–º

–°–µ–≥–æ–¥–Ω—è Siri, Alexa –∏ –¥—Ä—É–≥–∏–µ —É–º–Ω—ã–µ –ø–æ–º–æ—â–Ω–∏–∫–∏ —Ä–∞–±–æ—Ç–∞—é—Ç —Ç–æ–ª—å–∫–æ –ø–æ –ø—Ä–∏–Ω—Ü–∏–ø—É –∑–∞–ø—Ä–æ—Å-–æ—Ç–≤–µ—Ç. –ù–æ —á—Ç–æ –µ—Å–ª–∏ –ò–ò —Å–º–æ–∂–µ—Ç —Å–∞–º —Ä–µ—à–∞—Ç—å, –∫–æ–≥–¥–∞ –≤—Å—Ç—É–ø–∏—Ç—å –≤ —Ä–∞–∑–≥–æ–≤–æ—Ä?

–ù–æ–≤–æ–µ —Å–µ–º–µ–π—Å—Ç–≤–æ –º–æ–¥–µ–ª–µ–π Voila —Å—Ç—Ä–µ–º–∏—Ç—Å—è –∫ –¥–µ–π—Å—Ç–≤–∏—Ç–µ–ª—å–Ω–æ –∞–≤—Ç–æ–Ω–æ–º–Ω–æ–º—É –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏—é. Voila-e2e –≤–µ–¥–µ—Ç –≥–æ–ª–æ—Å–æ–≤–æ–π –¥–∏–∞–ª–æ–≥ —Å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ–º –Ω—é–∞–Ω—Å–æ–≤ —Ä–µ—á–∏, –∞ Voila-autonomous —Å–ø–æ—Å–æ–±–Ω–∞ –æ–¥–Ω–æ–≤—Ä–µ–º–µ–Ω–Ω–æ —Å–ª—É—à–∞—Ç—å, –¥—É–º–∞—Ç—å –∏ –≥–æ–≤–æ—Ä–∏—Ç—å ‚Äì –∫–∞–∫ –≤ —Ä–µ–∞–ª—å–Ω–æ–º –æ–±—â–µ–Ω–∏–∏.

–í–∞–∂–Ω–æ–µ –ø—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–æ ‚Äì –Ω–∞—Å—Ç—Ä–æ–π–∫–∞ –≥–æ–ª–æ—Å–æ–≤—ã—Ö –ø—Ä–æ—Ñ–∏–ª–µ–π. –ê—Å—Å–∏—Å—Ç–µ–Ω—Ç –º–æ–∂–µ—Ç –≥–æ–≤–æ—Ä–∏—Ç—å –≥–æ–ª–æ—Å–æ–º –ª—é–±–∏–º–æ–≥–æ –∞–∫—Ç—ë—Ä–∞! –ü–ª—é—Å –ø–æ–¥–¥–µ—Ä–∂–∫–∞ —à–µ—Å—Ç–∏ —è–∑—ã–∫–æ–

In [16]:
print(len(output.content[0].text))

1086


In [11]:
post_text

'–í —Å–æ–≤—Ä–µ–º–µ–Ω–Ω–æ–π –æ–±–ª–∞—Å—Ç–∏ –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π (LLM) –ø—Ä–µ–¥–æ–±—É—á–∞—é—â–∏–µ –Ω–∞–±–æ—Ä—ã –¥–∞–Ω–Ω—ã—Ö –¥–æ—Å—Ç–∏–≥–ª–∏ —Ç—Ä–∏–ª–ª–∏–æ–Ω–æ–≤ —Ç–æ–∫–µ–Ω–æ–≤, —Å–æ—á–µ—Ç–∞—è –∫—Ä—É–ø–Ω–æ–º–∞—Å—à—Ç–∞–±–Ω—ã–µ –≤–µ–±-–¥–∞–Ω–Ω—ã–µ —Å –º–µ–Ω—å—à–∏–º–∏, –≤—ã—Å–æ–∫–æ–∫–∞—á–µ—Å—Ç–≤–µ–Ω–Ω—ã–º–∏ –¥–æ–º–µ–Ω–Ω–æ-—Å–ø–µ—Ü–∏—Ñ–∏—á–Ω—ã–º–∏ –Ω–∞–±–æ—Ä–∞–º–∏. –û–¥–Ω–∞–∫–æ —Ç–∞–∫–æ–π –º–∞—Å—à—Ç–∞–± –∏ –≥–µ—Ç–µ—Ä–æ–≥–µ–Ω–Ω–æ—Å—Ç—å —Å–æ–∑–¥–∞—é—Ç –ø—Ä–æ–±–ª–µ–º—ã –≤ –±–∞–ª–∞–Ω—Å–∏—Ä–æ–≤–∞–Ω–∏–∏ –æ–±—â–µ–≥–æ –∑–Ω–∞–Ω–∏—è –∏ –¥–æ–º–µ–Ω–Ω–æ–π —ç–∫—Å–ø–µ—Ä—Ç–∏–∑—ã, —á—Ç–æ —á–∞—Å—Ç–æ –ø—Ä–∏–≤–æ–¥–∏—Ç –∫ –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–º—É –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—é —Ü–µ–Ω–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö –¥–ª—è —Å–ø–µ—Ü–∏–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö –≤–æ–∑–º–æ–∂–Ω–æ—Å—Ç–µ–π.\n\n–ê–≤—Ç–æ—Ä—ã —Å—Ç–∞—Ç—å–∏ –ø—Ä–µ–¥—Å—Ç–∞–≤–∏–ª–∏ CLIMB (CLustering-based Iterative Data Mixture Bootstrapping) ‚Äî –Ω–æ–≤—É—é —Å—Ç—Ä—É–∫—Ç—É—Ä—É –¥–ª—è –∞–≤—Ç–æ–º–∞—Ç–∏–∑–∞—Ü–∏–∏ –ø–æ–∏—Å–∫–∞ –æ–ø—Ç–∏–º–∞–ª—å–Ω—ã—Ö —

In [8]:
print(post_text)


–ö–∞–∫ –Ω–∞–π—Ç–∏ –∏–¥–µ–∞–ª—å–Ω—É—é —Å–º–µ—Å—å –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –æ–±—É—á–µ–Ω–∏—è LLM? (by NVIDIA)

–í—Å–µ –∑–Ω–∞—é—Ç, —á—Ç–æ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –±–æ–ª—å—à–∏—Ö —è–∑—ã–∫–æ–≤—ã—Ö –º–æ–¥–µ–ª–µ–π –Ω—É–∂–Ω—ã –≥–æ—Ä—ã –¥–∞–Ω–Ω—ã—Ö. –ù–æ –∫–∞–∫–æ–µ —Å–æ–æ—Ç–Ω–æ—à–µ–Ω–∏–µ —Ä–∞–∑–Ω—ã—Ö —Ç–∏–ø–æ–≤ –≤—ã–±—Ä–∞—Ç—å, —á—Ç–æ–±—ã –º–æ–¥–µ–ª—å –±—ã–ª–∞ –∫—Ä—É—á–µ –≤ –Ω–∞—à–∏—Ö –∑–∞–¥–∞—á–∞—Ö?

–ò—Å—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª–∏ NVIDIA –ø—Ä–µ–¥–ª–æ–∂–∏–ª–∏ –º–µ—Ç–æ–¥ CLIMB ‚Äî –ø–æ–∏—Å–∫ –æ–ø—Ç–∏–º–∞–ª—å–Ω–æ–π —Å–º–µ—Å–∏ –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–∏—è. –û–Ω–∏ –∫–ª–∞—Å—Ç–µ—Ä–∏–∑—É—é—Ç –¥–∞–Ω–Ω—ã–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤, –≤—ã–¥–µ–ª—è—è ¬´–¥–æ–º–µ–Ω—ã¬ª –±–µ–∑ —Ä—É—á–Ω–æ–π —Ä–∞–∑–º–µ—Ç–∫–∏. –ó–∞—Ç–µ–º –∏—Ç–µ—Ä–∞—Ç–∏–≤–Ω–æ –∏—â—É—Ç –æ–ø—Ç–∏–º–∞–ª—å–Ω—ã–µ –≤–µ—Å–∞ –¥–ª—è –∫–∞–∂–¥–æ–≥–æ –∫–ª–∞—Å—Ç–µ—Ä–∞, –æ–±—É—á–∞—è –º–∞–ª–µ–Ω—å–∫–∏–µ –ø—Ä–æ–∫—Å–∏-–º–æ–¥–µ–ª–∏ –∏ –∏—Å–ø–æ–ª—å–∑—É—è –ø—Ä–µ–¥–∏–∫—Ç–æ—Ä –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏.

–ú–æ–¥–µ–ª–∏, –æ–±—É—á–µ–Ω–Ω—ã–µ —Å –