# Semantic analysis

# DistilBERT (distilbert-base-uncased)

# Distilbert-based Multilingual Sentiment Classification Model

In [None]:
from transformers import pipeline

pipe_1 = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")

Device set to use cpu


In [None]:
sentence = "Фильм оказался скучным и затянутым"
result = pipe_1(sentence)

print(result)

[{'label': 'Negative', 'score': 0.5545058846473694}]


# Nlptown/bert-base-multilingual-uncased-sentiment

In [None]:
pipe_2 = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment")

Device set to use cpu


In [None]:
sentence = "Этот телефон работает, как и ожидалось, без сюрпризов"
result = pipe_2(sentence)

print(result)

[{'label': '5 stars', 'score': 0.5823605060577393}]


#Seara/rubert-tiny2-russian-sentiment






In [None]:
from transformers import pipeline
model = pipeline(model="seara/rubert-tiny2-russian-sentiment")
model("я более менее отношусь к этому продукту")

Device set to use cpu


[{'label': 'neutral', 'score': 0.6445968151092529}]

# SetFit

FEW FH

In [None]:
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
class TaskAdaptiveSemanticFeatureLearner(nn.Module):
    def __init__(self, model_name):
        super(TaskAdaptiveSemanticFeatureLearner, self).__init__()
        self.base_model = AutoModel.from_pretrained(model_name)
        self.adaptation_layer = nn.Linear(self.base_model.config.hidden_size, self.base_model.config.hidden_size)

    def forward(self, input_ids, attention_mask):
        outputs = self.base_model(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        adapted_features = self.adaptation_layer(pooled_output)
        return adapted_features

In [None]:
class FewShotClassifier(nn.Module):
    def __init__(self, model_name):
        super(FewShotClassifier, self).__init__()
        self.feature_learner = TaskAdaptiveSemanticFeatureLearner(model_name)
        self.classifier = nn.Sequential(
            nn.Linear(self.feature_learner.base_model.config.hidden_size, 128),
            nn.ReLU(),
            nn.Linear(128, 3)
        )

    def forward(self, input_ids, attention_mask):
        features = self.feature_learner(input_ids, attention_mask)
        logits = self.classifier(features)
        return logits

In [None]:
def get_text_embedding(model, tokenizer, texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    inputs = {k: v for k, v in inputs.items() if k in ["input_ids", "attention_mask"]}
    with torch.no_grad():
        outputs = model.feature_learner(**inputs)
    return outputs.numpy()

def few_shot_classification(model, tokenizer, support_set, query_texts):
    category_prototypes = {}
    for category, examples in support_set.items():
        embeddings = get_text_embedding(model, tokenizer, examples)
        category_prototypes[category] = np.mean(embeddings, axis=0)

    for query in query_texts:
        query_embedding = get_text_embedding(model, tokenizer, [query])[0]
        similarities = {
            category: cosine_similarity([query_embedding], [prototype])[0][0]
            for category, prototype in category_prototypes.items()
        }
        predicted_category = max(similarities, key=similarities.get)
        print(f"Text: '{query}' -> Predicted Category: {predicted_category}")

In [None]:
def main():
    model_name = "distilbert-base-multilingual-cased"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = FewShotClassifier(model_name)

    support_set = {
        "positive": ["I love this product!", "This is amazing!", "Absolutely fantastic!"],
        "negative": ["I hate this.", "This is terrible.", "Awful experience."],
        "neutral": ["It's okay, not great.", "It's average.", "I feel indifferent."]
    }

    query_texts = ["This is a great day!", "I dislike this service.", "более менее сервис"]

    few_shot_classification(model, tokenizer, support_set, query_texts)

if __name__ == "__main__":
    main()

Text: 'This is a great day!' -> Predicted Category: positive
Text: 'I dislike this service.' -> Predicted Category: negative
Text: 'более менее сервис' -> Predicted Category: neutral


In [None]:
query_texts = ["This is a great day!", "I dislike this service.", "более менее сервис"]

    few_shot_classification(model, tokenizer, support_set, query_texts)

# if __name__ == "__main__":
#     main()

IndentationError: unexpected indent (<ipython-input-12-217e7bd72a48>, line 3)