In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

model_name = "gpt2-medium"

tokenizer = GPT2Tokenizer.from_pretrained(model_name)

model = GPT2LMHeadModel.from_pretrained(model_name)

prompt_text = "France is known for"

input_ids = tokenizer.encode(prompt_text, return_tensors="pt")
print(input_ids)

output = model.generate(input_ids, max_length=30, pad_token_id=tokenizer.eos_token_id)
print(output)

generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print("Generated text:")

print(generated_text)

tensor([[28572,   318,  1900,   329]])
tensor([[28572,   318,  1900,   329,   663,  1029,  3081,   286,  1204,    11,
           475,   340,   318,   635,   257,  1499,   810,   262,  2811,  9588,
           318,   655,   720,  1270,    11,   830,   257,   614,    13,   198]])
Generated text:
France is known for its high quality of life, but it is also a country where the average salary is just $30,000 a year.



In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

model_name = "t5-small"

tokenizer = T5Tokenizer.from_pretrained(model_name)

model = T5ForConditionalGeneration.from_pretrained(model_name)

input_text = ''' summarize:
Python is a high-level, general-purpose programming language.
Its design philosophy emphasizes code readability with the use of significant indentation.
Python is dynamically typed and garbage-collected.
It supports multiple programming paradigms, including structured, object-oriented and functional programming.
'''
input_ids = tokenizer.encode(input_text, return_tensors='pt')

output = model.generate(input_ids, max_length=40)

summary = tokenizer.decode(output[0], skip_special_tokens=True)
print(summary)

Python is a high-level, general-purpose programming language. it supports multiple programming paradigms, including structured, object-oriented and functional programming.


In [None]:
from transformers import RobertaTokenizer, RobertaForTokenClassification
from transformers import pipeline

model_name = "Jean-Baptiste/roberta-large-ner-english"

tokenizer = RobertaTokenizer.from_pretrained(model_name)

model = RobertaForTokenClassification.from_pretrained(model_name)

text = "Hugging Face Inc. is a company based in New York City, Its CEO is Clement Delangue"

ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

ner_results = ner_pipeline(text)

for entity in ner_results:
    print(f"Entity: {entity['word']}, Label: {entity['entity_group']}, Score: {entity['score']:.4f}")

tokenizer_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/849 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Device set to use cuda:0


Entity:  Hugging Face Inc., Label: ORG, Score: 0.9993
Entity:  New York City, Label: LOC, Score: 0.9998
Entity:  Clement Delangue, Label: PER, Score: 0.9996


In [None]:
from transformers import pipeline

question_answerer = pipeline('question-answering', model='deepset/roberta-base-squad2')

context = "Paris is the capital and most populous city of France. It is situated on the Seine River, in the northern part of the country."

question = "What is the capital of France?"

result = question_answerer(question=question, context=context)

print(f"Question: {question}")
print(f"Answer: {result['answer']}")

Device set to use cuda:0


Question: What is the capital of France?
Answer: Paris


In [None]:
from transformers import pipeline
ner = pipeline('ner', model='CAMeL-Lab/bert-base-arabic-camelbert-msa-ner')
ner("عاصمة السعودية هي مدينة الرياض والتي أسسها الملك سعود")

config.json:   0%|          | 0.00/980 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-msa-ner were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/86.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/305k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[{'entity': 'B-LOC',
  'score': 0.9060526,
  'index': 2,
  'word': 'السعودية',
  'start': 6,
  'end': 14},
 {'entity': 'B-LOC',
  'score': 0.9940147,
  'index': 5,
  'word': 'الرياض',
  'start': 24,
  'end': 30},
 {'entity': 'B-PERS',
  'score': 0.9785609,
  'index': 10,
  'word': 'سعود',
  'start': 49,
  'end': 53}]