In [1]:
import asyncio
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import pandas as pd
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForCausalLM, BertModel
import re
import pickle


p_d = './,][-")(~!#@^%$;*?&№∙^:<:>=_+\|`1°234}{567890'

def preprocess(text):
    output = text.replace('\n', ' ').replace('\t', ' ').replace('\u200c', ' ')
    output = re.sub(r'''(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))''', "", output)
    output = re.sub(r'^https?:\/\/.*[\r\n]*', '', output, flags=re.MULTILINE)
    for k in p_d:
        output = output.replace(k,' ')
    output = output.replace('  ', ' ')
    return output.strip()

class BertCLS(nn.Module):
    def __init__(self, model, n_classes):
        super(BertCLS, self).__init__()
        self.model = model
        self.fc = nn.Linear(1024, n_classes)
    
    def forward(self, batch):
        return self.fc(self.model(**batch).pooler_output)

df = pd.read_csv('./data/train_normal.csv')
classes_1 = df['class_1'].unique()
classes_1_str2int = {classes_1[i]:i for i in range(len(classes_1))}
classes_1_int2str = {i:classes_1[i] for i in range(len(classes_1))}
classes_2 = df['class_2'].unique()
classes_2_str2int = { classes_2[i]:i for i in range(len(classes_2))}
classes_2_int2str = { i:classes_2[i] for i in range(len(classes_2))}


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = "deepvk/USER-bge-m3"
tokenizer = AutoTokenizer.from_pretrained(model_name)


# model_1 = BertModel.from_pretrained(
#             model_name, 
#             ignore_mismatched_sizes=True, 
#             num_labels=len(classes_1)
#         )
# model_1 = BertCLS(model_1, n_classes=len(classes_1))

# model_2 = BertModel.from_pretrained(
#             model_name, 
#             ignore_mismatched_sizes=True, 
#             num_labels=len(classes_1)
#         )
# model_1 = BertCLS(model_1, n_classes=len(classes_1))
# model_1.load_state_dict(torch.load('./models-all-classes/deepvk_mge_class_1.pth'), strict=False)

# model_2 = BertCLS(model_2, n_classes=len(classes_2))
# model_2.load_state_dict(torch.load('./models-all-classes/deepvk_mge_class_2.pth'), strict=False)
class BertCLS(nn.Module):
    def __init__(self, model, n_classes):
        super(BertCLS, self).__init__()
        self.model = model
        self.fc = nn.Linear(1024, n_classes)
    
    def forward(self, batch):
        return self.fc(self.model(**batch).pooler_output)
# torch.load('./models-all-classes/last_class_1.pth')
# model_1 = torch.load('./models-all-classes/last_class_2.pth')
model_2 = torch.load('./models-all-classes/last_class_2.pth')
token = lambda model_input: tokenizer(model_input, padding=True,
                    max_length=512, truncation=True,
                    return_tensors='pt')

def predict_1(inputs):
    data = token(inputs)
    data = data.to(device)
    embeddings = model_1(data)
    return classes_1_int2str[embeddings.argmax(-1).detach().cpu().numpy()]
def predict_2(inputs):
    data = token(inputs)
    data = data.to(device)
    embeddings = model_2(data)
    return classes_2_int2str[embeddings.argmax(-1).detach().cpu().numpy()]

from fullrag import llmmodel
rag = llmmodel()


class Request(BaseModel):
    question: str


class Response(BaseModel):
    answer: str
    class_1: str
    class_2: str

app = FastAPI()


@app.get("/")
def index():
    return {"text": "Интеллектуальный помощник оператора службы поддержки."}

    
@app.post("/predict")
async def predict_sentiment(request: Request):
    text = request.question
    bz = rag.search_db(rag.db_main, text)
    quests, theme = [bz[i].dict()['metadata']['Вопрос из БЗ'] for i in range(5)], bz[0].dict()['metadata']['Тема']
    text_class = text + ' | ' + theme
    for i in range(len(quests)):
        text_class += ' | ' + quests[i]
    class_1, class_2 = predict_1(text), predict_2(text)
    
    response = Response(
        answer=rag.generate_answer(text),
        class_1=class_1, # Классификатор оценивается опционально; при отсутствии можно задать константное значение.
        class_2=class_2, # Классификатор оценивается опционально; при отсутствии можно задать константное значение.
    )
    return response




  model_2 = torch.load('./models-all-classes/last_class_2.pth')


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [2]:
if __name__ == "__main__":
    host = "127.0.0.1" # Сконфигурируйте host согласно настройкам вашего сервера.
    config = uvicorn.Config(app, host=host, port=80)
    server = uvicorn.Server(config)
    loop = asyncio.get_running_loop()
    loop.create_task(server.serve())

INFO:     Started server process [24039]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 13] error while attempting to bind on address ('127.0.0.1', 80): permission denied
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
