In [1]:
!pip install gdown
!pip install langchain_community
!pip install langchain_huggingface
!pip install chromadb

Collecting langchain_community
  Downloading langchain_community-0.3.19-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.41 (from langchain_community)
  Downloading langchain_core-0.3.41-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.20 (from langchain_community)
  Downloading langchain-0.3.20-py3-none-any.whl.metadata (7.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [1]:
!gdown https://drive.google.com/drive/u/0/folders/1XiCxfPxVqQzHiYZqK2jr7jiHsRdg78-L --folder

Retrieving folder contents
Retrieving folder 1KLX6pQb8v8BiznOMBO3SrHY6aG3gBXFN 0e7e65d6-c2e8-40c6-aaf5-c60aec8d65f9
Processing file 14HJKbJ3s1mVbafdyJmCv8qP0QCmNEMz0 data_level0.bin
Processing file 1XdQjOAIeYrEPmX4VoaVyCCb-4_yL5Z2d header.bin
Processing file 1r3nzxM3ftBDFWzK-cOYEqToZVdgkxZhA index_metadata.pickle
Processing file 1erxiWS6Tjd3Ei2U-R-iBJQfSdnCaswhX length.bin
Processing file 1Nl4mMQ_tkJLyiR5oEr3h65CTTyPJzA9X link_lists.bin
Processing file 1DIgiuzocoXHoQ38p1TEgU6bie7xsQnBa chroma.sqlite3
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=14HJKbJ3s1mVbafdyJmCv8qP0QCmNEMz0
To: /content/chroma_e5_new_21/0e7e65d6-c2e8-40c6-aaf5-c60aec8d65f9/data_level0.bin
100% 16.9M/16.9M [00:00<00:00, 111MB/s] 
Downloading...
From: https://drive.google.com/uc?id=1XdQjOAIeYrEPmX4VoaVyCCb-4_yL5Z2d
To: /content/chroma_e5_new_21/0e7e65d6-c2e8-40c6-aaf5-c60aec8d65f9/header.bin
100% 100/100 [0

In [1]:
import json
import os
from typing import Callable, Dict, List
import nltk
import pandas as pd
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")

from pathlib import Path
from langchain.chains import RetrievalQA
from langchain.chat_models.gigachat import GigaChat
from langchain.docstore.document import Document
from langchain.prompts import ChatPromptTemplate
from langchain.text_splitter import TokenTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader
from langchain_community.embeddings.gigachat import GigaChatEmbeddings
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import os
ROOT = os.getcwd()

import sys

# VectorDB

In [4]:
def load_chroma(persist_directory, embeddings):
    """
    Загружай хрому, если обучил
    """
    assert os.path.isdir(persist_directory), "Firstly use create_vectordb func"

    return Chroma(
        persist_directory=persist_directory, embedding_function=embeddings
    )


In [5]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}

embeddings_e5 = HuggingFaceEmbeddings(
    model_name="intfloat/multilingual-e5-large",
    model_kwargs = model_kwargs,
    encode_kwargs=encode_kwargs,
)


In [6]:
vectordb=load_chroma(os.path.join(ROOT, "chroma_e5_new_21"), embeddings_e5)

# GET ANSWER

In [7]:
import yaml

def load_yaml(path_to_config: str) -> dict:
    with open(path_to_config, "r") as f:
        config = yaml.safe_load(f)
    return config

In [8]:
def create_qa_pipeline(llm, vectordb):

    system_prompt = load_yaml("config/system_prompt.yaml")
    messages = [("system", system_prompt["system_template"]), ("human", system_prompt["user_template"])]

    QA_CHAIN_PROMPT = ChatPromptTemplate.from_messages(messages,)
    retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k":4, "fetch_k":20, "lambda":.6})
    compressor = LLMChainExtractor.from_llm(llm)
    compression_retriever = ContextualCompressionRetriever(
        base_compressor=compressor, base_retriever=retriever
    )

    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=compression_retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    )
    return qa_chain

In [9]:
def generate_answer(question, llm, vectordb):
    """
    Метод генерации ответов на вопросы.
    Прогоняем на тестовом сете.
    """
    qa_chain=create_qa_pipeline(llm, vectordb)

    result = qa_chain({"query": question})

    return result['result'], result['source_documents']

## Модель

In [10]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab and Kaggle notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
    !pip install --no-deps cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [17]:
!pip install -U bitsandbytes

Collecting bitsandbytes
  Using cached bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Using cached bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl (76.1 MB)
Installing collected packages: bitsandbytes
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
unsloth 2025.3.3 requires tyro, which is not installed.
unsloth 2025.3.3 requires protobuf<4.0.0, but you have protobuf 5.29.3 which is incompatible.[0m[31m
[0mSuccessfully installed bitsandbytes-0.45.3


In [11]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/llama-2-13b-bnb-4bit",
    "unsloth/codellama-34b-bnb-4bit",
    "unsloth/tinyllama-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit", # New Google 6 trillion tokens model 2.5x faster!
    "unsloth/gemma-2b-bnb-4bit",
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.3: Fast Mistral patching. Transformers: 4.48.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/157 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/141k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/446 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

In [12]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.3.3 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [14]:
from langchain.llms import HuggingFacePipeline
from transformers import pipeline

# Создаем пайплайн для генерации текста
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=128,
    temperature=0.7
)

# Создаем LangChain-совместимый объект
llm = HuggingFacePipeline(pipeline=pipe)

Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'Jam

## Чтобы запустить пайплайн

In [15]:
from tqdm import tqdm

q = 'На каком курсе я могу поступить на военную кафедру?'

answ, source = generate_answer(q, llm, vectordb)

ValidationError: 1 validation error for StuffDocumentsChain
  Value error, document_variable_name context was not found in llm_chain input_variables: ['question', 'summaries'] [type=value_error, input_value={'llm_chain': LLMChain(ve...None, 'callbacks': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/value_error