In [4]:
from transformers import pipeline

# 1. Sentiment Analysis:
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
result = classifier("I love this!")
print("Sentiment Analysis Result:", result)

# 2. Text Generation:
generator = pipeline("text-generation", model="gpt2")
# Removed max_length to avoid conflict with default max_new_tokens (or implicitly used if max_new_tokens is not specified)
# Setting num_return_sequences=1 ensures only one text is generated.
text_result = generator("Once upon a time", max_new_tokens=50, num_return_sequences=1)
print("Text Generation Result:", text_result[0]["generated_text"])

# 3. Question Answering:
qa = pipeline("question-answering", model="deepset/minilm-uncased-squad2")
qa_result = qa(
    question="What is AI?",
    context="Artificial Intelligence is the simulation of human intelligence processes by machines, especially computer systems."
)
print("Question Answering Result:", qa_result)

Loading weights:   0%|          | 0/104 [00:00<?, ?it/s]

Sentiment Analysis Result: [{'label': 'POSITIVE', 'score': 0.9998764991760254}]


Loading weights:   0%|          | 0/148 [00:00<?, ?it/s]

GPT2LMHeadModel LOAD REPORT from: gpt2
Key                  | Status     |  | 
---------------------+------------+--+-
h.{0...11}.attn.bias | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=50) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Text Generation Result: Once upon a time, when the light of the sun and the light of the moon were all that was necessary to make a human being happy, he would have been much more successful in his quest to have this creature live.

Because of his efforts, he made


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertForQuestionAnswering LOAD REPORT from: deepset/minilm-uncased-squad2
Key                      | Status     |  | 
-------------------------+------------+--+-
bert.pooler.dense.weight | UNEXPECTED |  | 
bert.pooler.dense.bias   | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Question Answering Result: {'score': 0.27947568893432617, 'start': 27, 'end': 85, 'answer': 'the simulation of human intelligence processes by machines'}


In [5]:
# ЗАДАЧА 2: AutoModel & AutoTokenizer (3 часа)
from transformers import AutoTokenizer, AutoModel

# TODO:
# 1. Загрузить BERT:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

# 2. Токенизация:
tokens = tokenizer("Hello, how are you?", return_tensors="pt")

# 3. Получить embeddings:
outputs = model(**tokens)
embeddings = outputs.last_hidden_state

# 4. Понять: tokenizer, input_ids, attention_mask, embeddings

# ЗАДАЧА 3: Разные модели (2 часа)
# TODO: Попробовать:
# - BERT (bert-base-uncased)
# - RoBERTa (roberta-base)
# - DistilBERT (distilbert-base-uncased)
# - GPT-2 (gpt2)
# Сравнить: размер, скорость, качество

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

KeyboardInterrupt: 