# Настраиваем окружение

In [None]:
pip install gradio faiss-cpu langchain_community langchain_gigachat unstructured[all-docs] -q

# Импортируем библиотеки

In [2]:
import gradio as gr

from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_gigachat.chat_models.gigachat import GigaChat
from langchain_community.embeddings.gigachat import GigaChatEmbeddings
from langchain_community.vectorstores import FAISS

from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

# Инициализируем модели

In [4]:
from google.colab import userdata
giga_auth = userdata.get('token')

In [5]:
# LLM GigaChat-Pro
giga_pro = GigaChat(model="GigaChat-Pro",
                verify_ssl_certs=False,
                profanity_check=False,
                credentials=giga_auth,
                top_p=0.2,
                timeout=600)

In [6]:
# LLM GigaChat-Max
giga_max = GigaChat(model="GigaChat-Max",
                verify_ssl_certs=False,
                profanity_check=False,
                credentials=giga_auth,
                top_p=0.2,
                timeout=600)

In [None]:
# Эмбеддер GigaChat
giga_embed = GigaChatEmbeddings(
                    scope="GIGACHAT_API_PERS",
                    verify_ssl_certs=False,
                    credentials=giga_auth)

# Вспомогательные функции

In [8]:
def create_conversational_chain(llm, vector_store):
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        chain_type='stuff',
        retriever=vector_store.as_retriever(search_kwargs={"k": 4}),
        memory=memory,
        #verbose=True
    )
    return chain

In [9]:
def create_vector_store(pdf_files):
    global giga_embed
    text = []

    for pdf_file in pdf_files:
        pdf_path = pdf_file.name  # Путь загружаемого файла

        loader = UnstructuredPDFLoader(pdf_path)
        text.extend(loader.load())

    # Разделение документа на фрагменты
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                              chunk_overlap=100)
    text_chunks = text_splitter.split_documents(text)

    # Генерация векторной базы данных
    vector_store = FAISS.from_documents(text_chunks, embedding=giga_embed)
    vector_store.save_local("faiss_index")
    return vector_store

In [10]:
def upload_pdf(pdf_files):
    global vector_store, conversational_chain, giga_max
    vector_store = create_vector_store(pdf_files)
    if vector_store:
        conversational_chain = create_conversational_chain(giga_max, vector_store)
        return "PDF-файл успешно загружен. Задайте мне вопрос!"
    else:
        return "Ошибка при загрузке pdf-файла. Попробуйте ещё раз."


In [11]:
def ask_question(question):
    if conversational_chain and vector_store:
        result = conversational_chain.invoke({"question": question, "chat_history": []})
        return result["answer"]
    else:
        return "Пожалуйста, для начала загрузите pdf-файл."

# UI

In [12]:
# Creating the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## chatbot dino_risk")

    with gr.Row():
        pdf_input = gr.File(label="Загрузите ваш pdf-файл.", file_count="multiple", type="filepath")
        upload_button = gr.Button("Загрузка pdf-файла")
        status_output = gr.Textbox(label="Статус загрузки:", interactive=False)

    with gr.Row():
        question_input = gr.Textbox(label="Задайте вопрос")
        ask_button = gr.Button("Спросить")
        answer_output = gr.Textbox(label="Ответ ИИ", interactive=True)

    upload_button.click(upload_pdf, inputs=[pdf_input], outputs=[status_output])
    ask_button.click(ask_question, inputs=[question_input], outputs=[answer_output])


In [None]:
demo.launch(debug=True)