In [1]:
import torch
import torch.nn as nn
import time
import numpy as np
import copy
import torchvision
from torch.utils.data import DataLoader, random_split, Dataset
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

# https://pytorch.org/vision/main/models/generated/torchvision.models.vgg11.html
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [2]:
# Check if GPU is available and use it, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device

device(type='cpu')

In [3]:
dataset = ImageFolder(root='../data/art-styles', transform=transform)


In [4]:
from PIL import Image

In [6]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)


In [7]:
def train_model(model, dataloaders, criterion, optimizer, device, num_epochs=25):
    model.to(device)
    since = time.time()

    test_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print("-" * 10)

        # Each epoch has a training and validation phase
        for phase in ["train", "test"]:
            if phase == "train":
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            dataloader = tqdm(dataloaders[phase], total=len(dataloaders[phase]))
            
            # Iterate over data.
            for inputs, labels, *extra in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                for e in extra:
                    e = e.to(device)    

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                # Track history if only in train
                with torch.set_grad_enabled(phase == "train"):
                    # Get model outputs and calculate loss
                    outputs = model(inputs, *extra)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # Backward + optimize only if in training phase
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                    
                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

            # Deep copy the model
            if phase == "test" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == "test":
                test_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print(f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    print(f"Best test Acc: {best_acc:4f}")

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model, test_acc_history


In [8]:

from tqdm import tqdm

# Function to evaluate the model
def evaluate_model(model, test_loader):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad(), tqdm(total=len(test_loader)) as progress_bar:
        for inputs, labels, *extra in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            for e in extra:
                e = e.to(device)   
            outputs = model(inputs, *extra)
            _, predicted = torch.max(outputs.data, 1)
            y_true.extend(labels.numpy())
            y_pred.extend(predicted.numpy())
            progress_bar.update(1)
    
    accuracy, f1, precision, recall = evaluate_model_metrics(np.array(y_true), np.array(y_pred))
    print(f'Accuracy: {accuracy:.4f}, F1-score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}\n')


# Function to calculate evaluation metrics
def evaluate_model_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    return accuracy, f1, precision, recall


In [9]:
torchvision.models.VGG11_Weights.__members__

mappingproxy({'IMAGENET1K_V1': VGG11_Weights.IMAGENET1K_V1,
              'DEFAULT': VGG11_Weights.IMAGENET1K_V1})

In [10]:
# Load the pre-trained VGG-11 model and fine-tune on your custom dataset
vgg11_model = torchvision.models.vgg11(weights=torchvision.models.VGG11_Weights.IMAGENET1K_V1)

# Modify the last layer to match the number of classes in your custom dataset
num_classes = len(set(dataset.classes))
vgg11_model.classifier[6] = nn.Linear(vgg11_model.classifier[6].in_features, num_classes)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vgg11_model.parameters(), lr=0.001)


In [11]:
evaluate_model(vgg11_model, test_loader)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:03<00:00,  3.85s/it]

Accuracy: 0.1115, F1-score: 0.0900, Precision: 0.1123, Recall: 0.1103






In [12]:
dataloaders = {"train": train_loader, "test": test_loader}

num_epochs = 5

# Train the model (using the train_model function defined earlier)
trained_vgg_model, test_acc_history = train_model(vgg11_model, dataloaders, criterion, optimizer, device, num_epochs)

Epoch 1/5
----------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 125/125 [20:08<00:00,  9.67s/it]


train Loss: 2.2788 Acc: 0.1315


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [01:27<00:00,  2.74s/it]


test Loss: 2.2530 Acc: 0.1305

Epoch 2/5
----------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 125/125 [15:59<00:00,  7.68s/it]


train Loss: 2.1432 Acc: 0.1950


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [01:27<00:00,  2.75s/it]


test Loss: 2.1340 Acc: 0.1920

Epoch 3/5
----------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 125/125 [15:49<00:00,  7.59s/it]


train Loss: 2.0590 Acc: 0.2353


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [01:27<00:00,  2.73s/it]


test Loss: 2.0562 Acc: 0.2520

Epoch 4/5
----------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 125/125 [15:56<00:00,  7.65s/it]


train Loss: 2.0135 Acc: 0.2628


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [01:27<00:00,  2.74s/it]


test Loss: 2.0411 Acc: 0.2400

Epoch 5/5
----------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 125/125 [16:11<00:00,  7.77s/it]


train Loss: 1.9728 Acc: 0.2641


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [01:28<00:00,  2.75s/it]

test Loss: 1.9690 Acc: 0.2540

Training complete in 91m 25s
Best test Acc: 0.254000





In [13]:
evaluate_model(trained_vgg_model, test_loader)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [01:27<00:00,  2.74s/it]

Accuracy: 0.2540, F1-score: 0.2220, Precision: 0.2549, Recall: 0.2586






## Try with a bigger VGG 16

In [17]:
# Load the pre-trained VGG-11 model and fine-tune on your custom dataset
vgg16_model = torchvision.models.vgg16(weights=torchvision.models.VGG16_Weights.DEFAULT)

# Modify the last layer to match the number of classes in your custom dataset
num_classes = len(set(dataset.classes))
vgg16_model.classifier[-1] = nn.Linear(vgg16_model.classifier[-1].in_features, num_classes)
# vgg19_model.fc = nn.Linear(vgg19_model.fc.in_features, num_classes)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vgg16_model.parameters(), lr=0.001)

evaluate_model(vgg16_model, test_loader)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:39<00:00,  4.99s/it]

Accuracy: 0.0810, F1-score: 0.0649, Precision: 0.0761, Recall: 0.0821






In [18]:
dataloaders = {"train": train_loader, "test": test_loader}

num_epochs = 5

# Train the model (using the train_model function defined earlier)
trained_vgg16_model, test_acc_history = train_model(vgg16_model, dataloaders, criterion, optimizer, device, num_epochs)


Epoch 1/5
----------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 125/125 [36:48<00:00, 17.67s/it]


train Loss: 2.2811 Acc: 0.1439


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:34<00:00,  4.83s/it]


test Loss: 2.2048 Acc: 0.1550

Epoch 2/5
----------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 125/125 [37:53<00:00, 18.19s/it]


train Loss: 2.1532 Acc: 0.1734


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:35<00:00,  4.87s/it]


test Loss: 2.1579 Acc: 0.1680

Epoch 3/5
----------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 125/125 [39:43<00:00, 19.07s/it]


train Loss: 2.1245 Acc: 0.1998


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:54<00:00,  5.46s/it]


test Loss: 2.1417 Acc: 0.1850

Epoch 4/5
----------


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 125/125 [1:49:09<00:00, 52.40s/it]


train Loss: 2.0741 Acc: 0.2279


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:56<00:00,  5.51s/it]


test Loss: 2.0507 Acc: 0.2415

Epoch 5/5
----------


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 125/125 [41:34<00:00, 19.96s/it]


train Loss: 2.0053 Acc: 0.2638


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:35<00:00,  4.87s/it]

test Loss: 1.9648 Acc: 0.2850

Training complete in 278m 49s
Best test Acc: 0.285000





In [19]:
evaluate_model(trained_vgg16_model, test_loader)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:36<00:00,  4.90s/it]

Accuracy: 0.2850, F1-score: 0.2417, Precision: 0.2500, Recall: 0.2916






In [20]:
import torch

# Save the trained model
torch.save(trained_vgg16_model.state_dict(), "../data/trained_vgg_model.pt")


In [21]:
# try a bigger vgg to see difference and ease of trying

# BERT

In [22]:
import pandas as pd
from nlp import Dataset
import numpy as np

In [23]:
# Step 1: Load and preprocess the data
csv_path = '../data/disaster.csv'
tweets = pd.read_csv(csv_path)


In [24]:
tweets.head(2)

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1


In [25]:
# the trainer is expecting a 'label' (see the forward method in the docs)
tweets['label'] = tweets['target']
del tweets['target']

tweet_dataset = Dataset.from_pandas(tweets)

In [26]:
BERT_MODEL = 'bert-base-uncased'  # uncased will lowercase everything and remove accents
from transformers import AutoTokenizer

bert_tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL)

In [27]:
# simple function to batch tokenize utterances with truncation
def preprocess_function(examples):
    return bert_tokenizer(examples["text"], truncation=True)

tweet_dataset = tweet_dataset.map(preprocess_function, batched=True)


  0%|          | 0/8 [00:00<?, ?it/s]

In [28]:
tweet_dataset = tweet_dataset.train_test_split(test_size=0.2)


  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

In [29]:
from transformers import DataCollatorWithPadding

# DataCollatorWithPadding creates batch of data. It also dynamically pads text to the 
#  length of the longest element in the batch, making them all the same length. 
#  It's possible to pad your text in the tokenizer function with padding=True, dynamic padding is more efficient.
data_collator = DataCollatorWithPadding(tokenizer=bert_tokenizer)

In [30]:
tweet_dataset['train'][0]


{'attention_mask': [1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1],
 'id': 8000,
 'input_ids': [101,
  1996,
  6745,
  1024,
  2062,
  5014,
  10958,
  5422,
  2011,
  2642,
  2662,
  3748,
  10273,
  1011,
  5925,
  2739,
  8299,
  1024,
  1013,
  1013,
  1056,
  1012,
  2522,
  1013,
  16731,
  2243,
  2595,
  3501,
  2620,
  13765,
  25509,
  102],
 'keyword': 'razed',
 'label': 1,
 'location': None,
 'text': 'The Latest: More Homes Razed by Northern California Wildfire - ABC News http://t.co/hCKxJ8eukt',
 'token_type_ids': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0]}

In [31]:
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification

sequence_classification_model = AutoModelForSequenceClassification.from_pretrained(
    BERT_MODEL, num_labels=2,
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False # Whether the model returns all hidden-states.
)

sequence_classification_model.config.id2label = {0: 'NOT DISASTER', 1: 'DISASTER'}

sequence_classification_model

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [32]:
# Define the compute_metrics function to calculate accuracy, f1, precision, and recall
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    accuracy = np.mean(preds == labels)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    return {'accuracy': accuracy, 'f1': f1, 'precision': precision, 'recall': recall}


In [33]:
# TODO show gradient accumulation and freezing maybe?

In [34]:
batch_size = 8
epochs = 5

training_args = TrainingArguments(
    output_dir='./clf/results',
    logging_dir='./clf/logs',
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    logging_strategy='steps',
    logging_first_step=True,
    load_best_model_at_end=True,
    logging_steps=1,
    evaluation_strategy='epoch',
    eval_steps=1,
    save_strategy='epoch',
    report_to="wandb",  # enable logging to W&B
)

# Define the trainer: 

trainer = Trainer(
    model=sequence_classification_model,
    args=training_args,
    train_dataset=tweet_dataset['train'],
    eval_dataset=tweet_dataset['test'],
    compute_metrics=compute_metrics,
    data_collator=data_collator
)

In [35]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 1523
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, keyword, id, location. If text, keyword, id, location are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


  _warn_prf(average, modifier, msg_start, len(result))
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[34m[1mwandb[0m: Currently logged in as: [33mprofoz[0m. Use [1m`wandb login --relogin`[0m to force relogin


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


{'eval_loss': 0.7463948130607605,
 'eval_accuracy': 0.42547603414313856,
 'eval_f1': 0.25399214198503345,
 'eval_precision': 0.18102985563017318,
 'eval_recall': 0.42547603414313856,
 'eval_runtime': 25.2601,
 'eval_samples_per_second': 60.293,
 'eval_steps_per_second': 7.561}

In [36]:
trainer.train()

***** Running training *****
  Num examples = 6090
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 3810
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, keyword, id, location. If text, keyword, id, location are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0539,0.66432,0.811556,0.812295,0.814857,0.811556
2,0.0506,0.503177,0.835194,0.834566,0.834742,0.835194
3,0.062,0.576744,0.839133,0.838164,0.838988,0.839133
4,0.0359,0.759813,0.822718,0.822822,0.822951,0.822718
5,0.0031,0.795127,0.83585,0.835491,0.835426,0.83585


***** Running Evaluation *****
  Num examples = 1523
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, keyword, id, location. If text, keyword, id, location are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
Saving model checkpoint to ./clf/results/checkpoint-762
Configuration saved in ./clf/results/checkpoint-762/config.json
Model weights saved in ./clf/results/checkpoint-762/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1523
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, keyword, id, location. If text, keyword, id, location are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
Saving model checkpoint to ./clf/results/checkpoint-1524
Confi

TrainOutput(global_step=3810, training_loss=0.32493502880043895, metrics={'train_runtime': 2224.9904, 'train_samples_per_second': 13.685, 'train_steps_per_second': 1.712, 'total_flos': 787984721921280.0, 'train_loss': 0.32493502880043895, 'epoch': 5.0})

In [37]:
trainer.evaluate()  # sanity check

***** Running Evaluation *****
  Num examples = 1523
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, keyword, id, location. If text, keyword, id, location are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.


{'eval_loss': 0.5031771659851074,
 'eval_accuracy': 0.835193696651346,
 'eval_f1': 0.8345657736205881,
 'eval_precision': 0.8347419648481531,
 'eval_recall': 0.835193696651346,
 'eval_runtime': 23.4894,
 'eval_samples_per_second': 64.838,
 'eval_steps_per_second': 8.131,
 'epoch': 5.0}