In [3]:
!pip install torch transformers datasets  --quiet

Defaulting to user installation because normal site-packages is not writeable
    sys-platform (=="darwin") ; extra == 'objc'
                 ~^[0m[33m
[0m

In [7]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
from tqdm.notebook import tqdm
import numpy as np
from seqeval.metrics import classification_report, accuracy_score
import torch  

# Load dataset and tokenizer
dataset = load_dataset('imdb')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Define compute metrics function
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(labels, predictions)
    return {"accuracy": accuracy}

# Tokenize dataset
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
tokenized_datasets.set_format("torch")

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=1000,
    save_steps=1000,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test'],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

print(f"Model device: {next(model.parameters()).device}")

# Evaluate the model
results = trainer.evaluate()
print(f"Initial Model Performance: {results}")

# Train the model
trainer.train()

# Evaluate the model
results = trainer.evaluate()
print(f"Post-training Model Performance: {results}")


import numpy as np
from sklearn.metrics import accuracy_score

# Function to monitor drift
def monitor_drift(new_data, model, tokenizer):
    device = next(model.parameters()).device
    tokenized_new_data = tokenizer(new_data['text'], padding="max_length", truncation=True, return_tensors="pt")
    tokenized_new_data = {k: v.to(device) for k, v in tokenized_new_data.items()}
    predictions = model(**tokenized_new_data).logits
    predicted_labels = np.argmax(predictions.cpu().detach().numpy(), axis=1)
    accuracy = accuracy_score(new_data['label'], predicted_labels)
    return accuracy < 0.75

# Example of usage
new_data = {"text": ["New review text"], "label": [1]}  # Replace with real data
if monitor_drift(new_data, model, tokenizer):
    # Retrain the model with new trainer using updated data
    trainer.train()
    
    
import torch  
from torch import nn, autograd

class EWC(object):
    def __init__(self, model, dataloader, importance=1000):
        self.model = model
        self.device = next(model.parameters()).device
        self.importance = importance
        self.params = {n: p.clone() for n, p in self.model.named_parameters() if p.requires_grad}
        self.fisher = self.compute_fisher_information(dataloader)

    def compute_fisher_information(self, dataloader):
        fisher = {n: torch.zeros_like(p) for n, p in self.model.named_parameters() if p.requires_grad}
        self.model.eval()
        for batch in tqdm(dataloader):
            self.model.zero_grad()
            input_ids = batch['input_ids'].to(self.model.device)
            labels = batch['labels'].to(self.model.device)
            output = self.model(input_ids).logits
            loss = nn.CrossEntropyLoss()(output, labels)
            loss.backward()
            for n, p in self.model.named_parameters():
                if p.requires_grad:
                    fisher[n] += (p.grad ** 2) / len(dataloader)
        return fisher

    def penalty(self):
        loss = 0
        for n, p in self.model.named_parameters():
            if p.requires_grad:
                loss += torch.sum(self.fisher[n] * (p - self.params[n]) ** 2)
        return self.importance * loss

# Example of applying EWC during training
ewc = EWC(model, trainer.get_train_dataloader())

# Modify the training loop
for epoch in tqdm(range(3)):
    for batch in tqdm(trainer.get_train_dataloader()):
        inputs = batch['input_ids'].to(model.device)
        labels = batch['labels'].to(model.device)
        outputs = model(inputs)
        loss = nn.CrossEntropyLoss()(outputs.logits, labels)
        ewc_loss = ewc.penalty()
        total_loss = loss + ewc_loss
        total_loss.backward()
        trainer.optimizer.step()
        trainer.optimizer.zero_grad()


# Function to refine learning process
def refine_learning(trainer, eval_dataset, target_accuracy=0.85):
    results = trainer.evaluate(eval_dataset=eval_dataset)
    print(results)
    current_accuracy = results['eval_accuracy']

    if current_accuracy < target_accuracy:
        print(f"Refining learning process... Current accuracy: {current_accuracy}")
        trainer.args.num_train_epochs += 1  # Example adjustment
        trainer.train()

# Example of usage
refine_learning(trainer, tokenized_datasets['test'])


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model device: cuda:0


Initial Model Performance: {'eval_loss': 0.7143436074256897, 'eval_accuracy': 0.50076, 'eval_runtime': 195.7528, 'eval_samples_per_second': 127.712, 'eval_steps_per_second': 7.985}


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2983,0.223037,0.91408
2,0.128,0.231688,0.9348
3,0.0591,0.289004,0.93952


Post-training Model Performance: {'eval_loss': 0.28900426626205444, 'eval_accuracy': 0.93952, 'eval_runtime': 197.4132, 'eval_samples_per_second': 126.638, 'eval_steps_per_second': 7.917, 'epoch': 3.0}


  0%|          | 0/1563 [00:00<?, ?it/s]

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1563 [00:00<?, ?it/s]

  0%|          | 0/1563 [00:00<?, ?it/s]

  0%|          | 0/1563 [00:00<?, ?it/s]

{'eval_loss': 0.28900426626205444, 'eval_accuracy': 0.93952, 'eval_runtime': 197.4955, 'eval_samples_per_second': 126.585, 'eval_steps_per_second': 7.914, 'epoch': 3.0}
