In [1]:
import torch
from langchain_core.retrievers import BaseRetriever
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from peft import AutoPeftModelForCausalLM, PeftModel, PeftConfig
import numpy as np
from tqdm import tqdm
import faiss
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, AutoModel
from langchain.prompts import PromptTemplate
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import datasets
from datasets import load_dataset
from datasets import Dataset
import getpass
from sklearn.metrics.pairwise import cosine_similarity

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
inference_api_key = getpass.getpass("Enter your HF Inference API Key:\n\n")
#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

In [3]:
df = pd.read_csv('/home/stc/disk/vologina/df.csv')

In [4]:
##  ДАТАСЕТ В ФОРМАТ ДОКУМЕНТА
class DataFrameLoader:
    def __init__(self, df, page_content_column):
        self.df = df
        self.page_content_column = page_content_column

    def load(self):
        return [Document(text) for text in self.df[self.page_content_column].tolist()]

class Document:
    def __init__(self, page_content):
        self.page_content = page_content

loader = DataFrameLoader(df, page_content_column='text')
documents = loader.load()

In [16]:
# Разделяем документы на части
class RecursiveCharacterTextSplitter:
    def __init__(self, chunk_size, chunk_overlap):
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap

    def split_documents(self, documents):
        texts = []
        for document in documents:
            text = document.page_content
            for i in range(0, len(text), self.chunk_size):
                end = i + self.chunk_size
                if end >= len(text):
                    texts.append(text[i:])
                else:
                    texts.append(text[i:end + self.chunk_overlap])
        return texts

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [4]:
loader = DataFrameLoader(df, page_content_column='text')
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [17]:
tokenizer = AutoTokenizer.from_pretrained('/home/stc/disk/vologina/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('/home/stc/disk/vologina/all-MiniLM-L6-v2')

In [18]:
def embed_bert_cls(texts, model, tokenizer):
    t = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        model_output = model(**{k: v.to(model.device) for k, v in t.items()})
    embeddings = model_output.last_hidden_state[:, 0, :]
    embeddings = torch.nn.functional.normalize(embeddings)
    return embeddings[0].cpu().numpy()

In [19]:
embeddings_list = []
for document in tqdm(documents):
    text = document.page_content
    embeddings = embed_bert_cls(text, model, tokenizer)
    embeddings_list.append(embeddings)
    
all_embeddings = np.vstack(embeddings_list)

  0%|          | 0/13392 [00:00<?, ?it/s]

100%|██████████| 13392/13392 [01:47<00:00, 124.98it/s]


In [20]:
db = all_embeddings.shape[1]
index = faiss.IndexFlatL2(db)
index.add(all_embeddings)

In [21]:
class FaissRetrieverCosine:
  def __init__(self, embeddings):
        self.embeddings = embeddings
        # Используем IndexFlatIP для косинусной близости
        self.index = faiss.IndexFlatIP(embeddings.shape[1])
        self.index.add(embeddings)
  def retrieve(self, query_embedding, k=5):
        query_embedding = query_embedding.reshape(1, -1).astype('float32')
        _, indices = self.index.search(query_embedding, k)
        return indices[0]

In [22]:
faiss_retriever_cosine = FaissRetrieverCosine(all_embeddings)

In [36]:
query_embedding = embed_bert_cls("погода", model, tokenizer)

top_k_indices = faiss_retriever_cosine.retrieve(query_embedding, k=5)

for i, idx in enumerate(top_k_indices):
    print(f"{i + 1}: {texts[idx]}")

1:   периода
2:  подумой
3:   А что по прогнозу погоды
4:  правда
5:  правда 


In [37]:
retrieved_embeddings = all_embeddings[top_k_indices]

cosine_similarities = cosine_similarity(query_embedding.reshape(1, -1), retrieved_embeddings)
print("Cosine Similarities:", cosine_similarities)

Cosine Similarities: [[0.93256277 0.9159932  0.91475314 0.9094141  0.9094141 ]]


In [26]:
cache_dir = '/mnt/storage/cache_huggingface'
config = PeftConfig.from_pretrained('/mnt/storage/cache_huggingface/models--evilfreelancer--ruGPT-3.5-13B-lora/snapshots/c316657abba32a553840064d6f03d08e64bda201')
model_gen = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    load_in_8bit=True,
    cache_dir = cache_dir,
    torch_dtype=torch.float16,
    device_map="auto",
    local_files_only=True
)
model_gen = PeftModel.from_pretrained(
    model_gen,
    '/home/stc/disk/vologina/ruGPT-3.5-13B-lora',
    torch_dtype=torch.float16
)
model_gen.eval()
tokenizer_gen = AutoTokenizer.from_pretrained('/home/stc/disk/vologina/ruGPT-3.5-13B-lora')

generation_config = GenerationConfig.from_pretrained('/home/stc/disk/vologina/ruGPT-3.5-13B-lora')
print(generation_config)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 6/6 [18:46<00:00, 187.81s/it]


GenerationConfig {
  "bos_token_id": 2,
  "do_sample": true,
  "eos_token_id": 3,
  "max_new_tokens": 1536,
  "no_repeat_ngram_size": 15,
  "pad_token_id": 0,
  "repetition_penalty": 1.15,
  "temperature": 0.7,
  "top_k": 30,
  "top_p": 0.9
}



In [27]:
DEFAULT_MESSAGE_TEMPLATE = "<s>{role}\n{content}</s>\n"
DEFAULT_SYSTEM_PROMPT = "Ты — русскоязычный эмоциональный ассистент. Ты эмоционально разговариваешь с людьми и помогаешь им."


In [31]:
# Класс для ведения беседы
class Conversation:
    def __init__(self, message_template="<s>{role}\n{content}</s>\n", system_prompt="Ты — русскоязычный эмоциональный ассистент. Ты эмоционально разговариваешь с людьми и помогаешь им.", start_token_id=2, bot_token_id=46787):
        self.message_template = message_template
        self.start_token_id = start_token_id
        self.bot_token_id = bot_token_id
        self.messages = [{"role": "system", "content": system_prompt}]

    def add_user_message(self, message):
        self.messages.append({"role": "user", "content": message})

    def add_bot_message(self, message):
        self.messages.append({"role": "bot", "content": message})

    def get_prompt(self, tokenizer):
        final_text = ""
        for message in self.messages:
            message_text = self.message_template.format(**message)
            final_text += message_text
        final_text += tokenizer.decode([self.start_token_id, self.bot_token_id])
        return final_text.strip()


# Функция для генерации ответа
def generate(model, tokenizer, prompt, generation_config):
    data = tokenizer(prompt, return_tensors="pt")
    data = {k: v.to(model.device) for k, v in data.items()}
    output_ids = model.generate(**data, generation_config=generation_config)[0]
    output_ids = output_ids[len(data["input_ids"][0]):]
    output = tokenizer.decode(output_ids, skip_special_tokens=True)
    return output.strip()

In [39]:
# Функция для выполнения запроса RAG
def rag_query(user_query):
    query_embedding = embed_bert_cls(user_query, model, tokenizer)
    top_k_indices = faiss_retriever_cosine.retrieve(query_embedding, k=5)
    
    relevant_texts = [texts[idx] for idx in top_k_indices]
    context = "\n".join(relevant_texts)
    
    conversation = Conversation()
    conversation.add_user_message(f"Контекст: {context}\nВопрос: {user_query}")
    
    prompt = conversation.get_prompt(tokenizer_gen)
    output = generate(model_gen, tokenizer_gen, prompt, generation_config)
    
    conversation.add_bot_message(output)
    return output

In [38]:
# Интерфейс для взаимодействия
while True:
    user_input = input("Вы: ")

    if user_input.strip() == "/заново":
        conversation = Conversation()
        print("История сброшена!")
        continue

    if user_input.strip() == "":
        continue

    if user_input.strip().lower() == "/стоп":
        break

    response = rag_query(user_input)
    print("ruGPT-3.5:", response)

ruGPT-3.5: Ответ: Прогнозы погоды, как правило, включают данные о температуре воздуха на определенной высоте (обычно это 10-20 метров), скорости ветра и влажности. Они могут быть очень точными, но все же не всегда точны. В целом, погода может меняться каждый день или даже несколько раз за день.
ruGPT-3.5: Год 2017-й, месяц январь.
ruGPT-3.5: Конечно, это точный ответ!
