In [None]:
from flask import Flask, request, render_template_string
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "t-tech/T-lite-it-1.0" 
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    cache_dir="transcripting",
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
).to("cuda")

tokenizer = AutoTokenizer.from_pretrained(model_name)

page_template = """
<!DOCTYPE html>
<html>
<head>
    <title>Flask Interface</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 40px;
            background: #f7f7f7;
        }
        h1 { color: #333; }
        form, .response {
            background: #fff;
            padding: 20px;
            border-radius: 5px;
            margin-bottom: 20px;
        }
        label {
            font-weight: bold;
        }
        input[type="text"], textarea {
            width: 100%;
            padding: 10px;
            margin-top: 10px;
            box-sizing: border-box;
        }
        button {
            background: #0d6efd;
            color: #fff;
            border: none;
            padding: 10px 20px;
            margin-top: 10px;
            cursor: pointer;
            border-radius: 4px;
        }
        button:hover {
            background: #0b5ed7;
        }
        .response {
            border: 1px solid #ddd;
        }
        .response p {
            white-space: pre-wrap;
        }
    </style>
</head>
<body>
    <h1>Qwen Chatbot Interface на Flask</h1>
    <form method="post">
        <label for="prompt">Введите ваш запрос:</label>
        <textarea name="prompt" id="prompt" rows="3">{{ prompt_text }}</textarea>
        <button type="submit">Сгенерировать ответ</button>
    </form>
    {% if response_text %}
    <div class="response">
        <h2>Ответ:</h2>
        <p>{{ response_text }}</p>
    </div>
    {% endif %}
</body>
</html>
"""

app = Flask(__name__)

def generate_response(prompt):
    messages = [
        {"role": "system", "content": "Ты умный ассистент"},
        {"role": "user", "content": f'''Определи название ОКПД-2 по объекту закупки: {prompt}.'''}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

@app.route("/", methods=["GET", "POST"])
def home():
    prompt_text = ""
    response_text = ""
    if request.method == "POST":
        prompt_text = request.form.get("prompt", "")
        if prompt_text.strip():
            response_text = generate_response(prompt_text)

    return render_template_string(page_template, prompt_text=prompt_text, response_text=response_text)

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8079)


2024-12-19 15:38:27.361299: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-19 15:38:27.561250: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-19 15:38:27.561301: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-19 15:38:27.562295: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-19 15:38:27.661440: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: A

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8079
 * Running on http://192.168.9.78:8079
Press CTRL+C to quit
192.168.9.78 - - [19/Dec/2024 15:39:23] "GET / HTTP/1.1" 200 -
192.168.9.78 - - [19/Dec/2024 15:39:24] "GET /favicon.ico HTTP/1.1" 404 -
192.168.9.78 - - [19/Dec/2024 15:39:43] "POST / HTTP/1.1" 200 -
192.168.9.78 - - [19/Dec/2024 15:40:03] "POST / HTTP/1.1" 200 -
