In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration


#1) Caricamento del modello T5 (Versione small per rapidità in laboratorio)

model_name = 't5-base'
tokenizer = T5Tokenizer.from_pretrained(model_name) #Carichiamo il tokenizer associato
model = T5ForConditionalGeneration.from_pretrained(model_name) #Scarica i pesi pre-addestrati, carica l'archietettura T5 coretta e restituisce un modello pytorch

def run_t5(prompt_text):
  inputs = tokenizer(prompt_text, return_tensors='pt') #Prende input restituisce tensori pytorch
  #Generazione della risposta
  outputs = model.generate(inputs.input_ids, max_length=50) #Decoder autoregressivo un token alla volta
  return tokenizer.decode(outputs[0], skip_special_tokens=True)



#Traduzione Inglese -> Tedesco
translate_text = "translate English to German: The transformer architecture is revolutionary."
print(f"TRADUZIONE: {run_t5(translate_text)}")

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/257 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

TRADUZIONE: Die Transformatorarchitektur ist revolutionär.


In [None]:
def translate_t5(text, destination_language):
  #Costruzione dinamica del prompt
  prompt= f"translate English to {destination_language} : {text}"
  inputs = tokenizer(prompt, return_tensors="pt")
  outputs = model.generate(inputs.input_ids,max_length=40)
  return tokenizer.decode(outputs[0],skip_special_tokens=True)



#Utilizzo
print(translate_t5("I love computer engineering", "German"))
print(translate_t5("I love computer engineering", "French"))

Ich liebe die Computertechnik
J'aime le génie informatique


In [None]:
#Caso B:  riassunto (Summarization)
long_text= "summarize: The T5 model treats every NLP problem as a text-to-text task. " \
"This means it uses the same loss function and model architecture for " \
"tasks as diverse as translation, classification, and regression."
print(f"RIASSUNTO: {run_t5(long_text)}")

RIASSUNTO: the T5 model treats every NLP problem as a text-to-text task . this means it uses the same loss function and model architecture for tasks as diverse as translation, classification


In [None]:
#Caso C: Analisi della coerenza grammaticale ((CoLA))
grammar_text = "cola sentence: The course helps students learn"
print(f"GRAMMATICA: {run_t5(grammar_text)}")

GRAMMATICA: acceptable


In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

model_name="bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
sentences = ["I love this product", "This movie is awful"]
inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
  outputs = model(**inputs)

logits = outputs.logits
pred = torch.argmax(logits, dim=1)
print("X: ",pred)

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertForSequenceClassification LOAD REPORT from: bert-base-uncased
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.predictions.transform.dense.weight     | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.seq_relationship.bias                  | UNEXPECTED | 
cls.seq_relationship.weight                | UNEXPECTED | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
cls.predictions.bias                       | UNEXPECTED | 
cls.predictions.transform.dense.bias       | UNEXPECTED | 
classifier.weight                          | MISSING    | 
classifier.bias                            | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


X:  tensor([1, 1])


In [None]:
#SENTIMENTAL ANALYSIS
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

#1) Scelta del modello (BERT multilingua per recensioni)
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"

#2) Caricamento di Tokenizer e Modello
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

#3) Preparazione dell'input
text = "Il corso di apprendimento automatico e apprendimento profondo è molto impegnativo ma gratificante."
inputs = tokenizer(text, return_tensors="pt")

with torch.no_grad():
  outputs = model(**inputs)
  logits = outputs.logits

#5) Post processing: BERT restituisce stelle da 1 a 5
predicted_class = torch.argmax(logits,dim=1).item() +1
print(f"Testo {text}")
print(f"Valutazione stimata: {predicted_class} stelle su 5")

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

Testo Il corso di apprendimento automatico e apprendimento profondo è molto impegnativo ma gratificante.
Valutazione stimata: 4 stelle su 5


In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline


#1. Carichiamo un modello specifico per il dominio medicale
# Questo modello è stato istriuto per riconescere entità clicniche

# Esempio con un modello NER pubblicamente accessibile
model_checkpoint = "d4data/biomedical-ner-all"

# Caricamento di Tokenizer e Modello
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)

#3. Creiamo una pipeline per il Named Entity Recognition (NER)
# "'aggregation_strategy' serve per raggruppare i sotto-token (BPE) in parole intere"
medical_ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

clinical_text = (
    "The patient was prescribed 500mg of Amoxicillin fo the treatment of "
    "acute bacterial sinusitis and persistent fever."
)
#5 Esecuzione dell'analisi
results = medical_ner(clinical_text)

print(f"{'Entità' :<20} | {'Categoria':<15} | {'Confidenza':<10}")
print("-"*50)
for entity in results:
  print(f"{entity['word']:<20} | {entity['entity_group']:15} | {entity['score']:.4f}")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]



tokenizer_config.json:   0%|          | 0.00/373 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/266M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/102 [00:00<?, ?it/s]

Entità               | Categoria       | Confidenza
--------------------------------------------------
500mg                | Dosage          | 0.9967
am                   | Medication      | 0.9999
##oxicillin          | Medication      | 0.9969
acute                | Detailed_description | 1.0000
bacterial sinusitis  | Disease_disorder | 0.8028
persistent           | Detailed_description | 1.0000


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

#1. Caricamento di Clinical - BERT (modello pre-addestrato su testi medici)
# Usiamo 'emilyalsentzer/Bio_ClinicalBERT' come base

model_name = "emilyalsentzer/Bio_ClinicalBERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)

#2. Configurazione per la classificazione (2 classi: Urgente vs Routine)
# NOTA: In uno scenario reale, qui caricheresti i pesi dopo il fine-tuning.
# Qui usiamo il modello base per mostrare l'architettura.

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

def predict_urgency(text):
  # Tokenizzazione con padding e troncamento (standard per BERT)
  inputs = tokenizer(text, return_tensors="pt",truncation=True, padding=True, max_length=128)
  with torch.no_grad():
    outputs = model(**inputs)
    #BERT restituisce i logit ( valori grezzi prima della Softmax)
    logits = outputs.logits
    probabilities = torch.softmax(logits, dim=1)
    prediction = torch.argmax(probabilities, dim=1).item()

  labels = {0: "Routine/Monitoraggio", 1: "URGENTE - Contattare Medico"}
  return labels[prediction], probabilities[0]

#3 Test con due casi clinici differenti
case_1 = "Patient reports mild back pain after exercise, no neurological symptoms."
case_2 = "sudden onset of severe chest pain radianting to left arm and shortness of breath."

print(f"Caso 1:{predict_urgency(case_1)}")
print(f"Caso 2: {predict_urgency(case_2)}")


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertForSequenceClassification LOAD REPORT from: emilyalsentzer/Bio_ClinicalBERT
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.predictions.transform.dense.bias       | UNEXPECTED | 
cls.predictions.decoder.weight             | UNEXPECTED | 
cls.predictions.bias                       | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.predictions.transform.dense.weight     | UNEXPECTED | 
cls.seq_relationship.bias                  | UNEXPECTED | 
cls.seq_relationship.weight                | UNEXPECTED | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
classifier.bias                            | MISSING    | 
classifier.weight                          | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Conside

Caso 1:('Routine/Monitoraggio', tensor([0.6192, 0.3808]))
Caso 2: ('URGENTE - Contattare Medico', tensor([0.4592, 0.5408]))


Caso 1:('URGENTE - Contattare Medico', 0.685558021068573)
Caso 2: ('URGENTE - Contattare Medico', 0.7383987903594971)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM


model_name="gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

prompt = "Artificial intelligence will"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(**inputs, max_length=40, temperature=0.7, top_k=50)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))



Loading weights:   0%|          | 0/148 [00:00<?, ?it/s]

GPT2LMHeadModel LOAD REPORT from: gpt2
Key                  | Status     |  | 
---------------------+------------+--+-
h.{0...11}.attn.bias | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Artificial intelligence will be able to do things like search for and find people, and to do things like find out who's in the right place at the right time.

"We're going


In [None]:
#ChatBot --------
#Storico della conversazione
chat_history = ""
while True:
  user_input = input("👤: Tu: ")
  if user_input.lower()=='exit':
    print("Chat terminata.")
    break

  chat_history += f"User: {user_input}\nAssistant:"

  #Tokenizzazione
  inputs = tokenizer(chat_history, return_tensors="pt")
  # Generazione risposta
  outputs = model.generate(
      **inputs,
      max_length=inputs['input_ids'].shape[1]+50,
      temperature=0.7,
      top_k = 50,
      pad_token_id = tokenizer.eos_token_id
  )
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
  # Estraiamo solo la nuova risposta
  bot_reply = response.split("Assistant:")[-1].strip()

  print(f"🤖 Bot: {bot_reply}\n")
  #Aggiorniamo la storia
  chat_history += bot_reply + "\n"

👤: Tu: ciao
🤖 Bot: ciao



KeyboardInterrupt: Interrupted by user