In [None]:
import torch 

# Ensure CUDA support is available 
if torch.cuda.is_available(): 
    
# Perform operations on GPU 
    device = torch.device("cuda") # ... 
# Clear CUDA memory 
    torch.cuda.empty_cache() 
else: 
    print("CUDA is not available.")

In [None]:
device = torch.device("cuda:0")
device

In [None]:
import requests
import json
from typing import Any, List, Tuple

import numpy as np
import torch
from numpy import ndarray
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from transformers import (  # type: ignore
    AutoModelForSequenceClassification,
    BatchEncoding,
    AutoTokenizer,
    PreTrainedTokenizerBase,
)

In [None]:
class MyTextDataset(Dataset):  # type: ignore
    def __init__(self, sentence_list: List[str]) -> None:
        self.sentences = sentence_list

    def __len__(self) -> int:
        return len(self.sentences)

    def __getitem__(self, idx: int) -> Tuple[int, str]:
        return idx, self.sentences[idx]


class MyCollateBatch:
    def __init__(self, tokenizer: PreTrainedTokenizerBase) -> None:
        self.tokenizer = tokenizer

    def __call__(self, batch: List[Tuple[int, str]]) -> BatchEncoding:
        sentences = [b[1] for b in batch]
        idx = [b[0] for b in batch]

        text = self.tokenizer(sentences, max_length=512, padding="max_length", truncation=True, return_tensors="pt")
        text["idx"] = idx
        return text


class ModelSentiment:
    def __init__(self, model_folder: str, device: torch.device) -> None:
        self.device = device
        self.model_folder = model_folder

        self.tokenizer = AutoTokenizer.from_pretrained(model_folder)  # type: ignore
        self.model = AutoModelForSequenceClassification.from_pretrained(model_folder, return_dict=True)  # type: ignore
        self.collate_fn = MyCollateBatch(self.tokenizer)

        self.model.to(device)
        self.model.eval()

    def __call__(self, sentence_list: List[str]):
        data_ds = MyTextDataset(sentence_list)
        loader = DataLoader(data_ds, batch_size=1, collate_fn=self.collate_fn)
        result = np.zeros((len(sentence_list), len(self.class_names())))
        print('here')
        for batch in tqdm(loader):
            idx = batch["idx"]
            batch = {k: v.to(self.device) for k, v in batch.items() if k != "idx"}
            with torch.no_grad():
                outputs = self.model(**batch)
                logits = outputs.logits
                predictions = torch.softmax(logits, dim=-1)

                result[idx, :] = predictions.to("cpu").numpy()

        return result

    def class_names(self) -> Any:
        return self.model.config.id2label

In [None]:
URL = "https://service-index-ai.ethics.hse.ru"

In [None]:
r = requests.get(URL+"/source")

In [None]:
r.json()

In [None]:
r = requests.post(URL+"/model", data=json.dumps({"model_name":"ethics_model_sentiment_e5","model_type":"sentiment"}))

In [None]:
model_id = r.json()["model_id"]

In [None]:
model_id

In [None]:
device = torch.device("cuda")

In [None]:
m = ModelSentiment("needed_e5_large", device)

In [None]:
m

In [None]:
r.text

In [None]:
params = {
    "sources": [
#         "banki.ru/broker",
         "banki.ru/mfo",
         "banki.ru/insurance",
         "vk.com/other",
#         "irecommend.ru",
        "vk.com/comments",
        "banki.ru",
        "sravni.ru",
#        "irecommend.ru",
        "banki.ru/news",
    ],
    "model_id": 6,
    "limit": 1000
}
for i in tqdm(range(100_000)):
    r = requests.get(URL+"/text/sentences", params = params)
    sentences = [elem["sentence"] for elem in r.json()["items"]]
    ids = [elem["id"] for elem in r.json()["items"]]
    with torch.no_grad():
        result_sentences = m(sentences)
    request_data = []
    for result_arr, sentence_id in zip(result_sentences, ids):
        request_data.append({"model_id": model_id, "text_sentence_id": sentence_id, "text_result":result_arr.tolist()})
    r = requests.post(URL+"/text_result/", data=json.dumps({"items":request_data}))
    if r.status_code != 201:
        raise Exception(r.text)

In [None]:
print('finish')