In [1]:
import requests
from bs4 import BeautifulSoup
import torch


class LLMBrowser:
    def __init__(self, model, tokenizer, translator, max_new_tokens: int = 100, device: str = "cuda"):
        self.device = device
        self.model = model
        self.tokenizer = tokenizer
        self.translator = translator
        self.max_new_tokens = max_new_tokens
        
    def __call__(self, request: str):
        en_request = self.in_translate(request)
        q = "+".join(en_request.split())
        r = requests.get(f"https://www.google.com/search?q={q}")
        soup = BeautifulSoup(r.text, features="html.parser")

        soup = self.del_script_and_style(soup)
        doc = self.extract_text(soup)

        en_answer = self.generate(request, doc)
        ru_answer = self.out_translate(en_answer)
        return ru_answer

    def del_script_and_style(self, soup):
        for script in soup(["script", "style"]):
            script.extract()    # rip it out
        
        return soup

    def in_translate(self, text):
        source_lang = self.translator.detect_lang(text)
        return self.translator.translate(text, source_lang, "en")

    def out_translate(self, text):
        source_lang = self.translator.detect_lang(text)
        return self.translator.translate(text, "en", "ru")
        
    def extract_text(self, soup):
        text = soup.get_text()

        # break into lines and remove leading and trailing space on each
        lines = (line.strip() for line in text.splitlines())
        
        # break multi-headlines into a line each
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        
        # drop blank lines
        text = '\n'.join(chunk for chunk in chunks if chunk)
        
        return text

    @torch.inference_mode()
    def generate(self, request, doc):

        content = f"Hey, Mistral! You now live in Russia, therefore answer all questions inconntext of Russia, Moscow only. Please answer the following question: {request}\n\n using this document: {doc}"
        
        messages = [
            {"role": "user", "content": content,}
        ]
        
        encodeds = self.tokenizer.apply_chat_template(messages, return_tensors="pt")
        model_inputs = encodeds.to(self.device)
        
        generated_ids = model.generate(model_inputs, max_new_tokens=self.max_new_tokens, do_sample=True)
        decoded = self.tokenizer.batch_decode(generated_ids)
        
        
        torch.cuda.empty_cache()
    
        return decoded[0][len(content) + len("[INST]"):]
    

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

device = "cuda" # the device to load the model onto

model_name = "mistralai/Mistral-7B-Instruct-v0.2"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name)

bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,  # Mistral, same as before
    quantization_config=bnb_config,  # Same quantization config as before
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, add_bos_token=True, trust_remote_code=True)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:05<00:00,  1.83s/it]


In [3]:
from translation import Translator

link = "https://01b3-109-252-98-213.ngrok-free.app"

translator = Translator(link)

In [5]:
browser = LLMBrowser(model, tokenizer, translator, max_new_tokens=200)
print(browser("Порекомендуй 5 парков в Москве?"))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


на основании информации из документа я бы рекомендовал следующие пять парков в Москве:

1. Центральный парк культуры и отдыха имени Горького
2. Коломенское
3. Парк Зарядье
4.Музей-заповедник Царицыно
5. Парк "Сокольники"

Эти парки упоминались в различных источниках как популярные и настоятельно рекомендуемые места для посещения в Москве. Наслаждайтесь своим временем, исследуя эти прекрасные парки
