## Install the necessary libraries


In [2]:
%%capture
! pip install tqdm boto3 requests regex sentencepiece sacremoses
! pip install transformers

## BERT Features

In this part, you will use BERT features to classify DBPedia articles.
The data is already pre-processed, and the data loader is implemented below.

In [3]:
# Basics: dataset, data loaders, Classifier
import collections
import json
import torch
import torch.nn as nn
import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel


SPLITS = ['train', 'dev', 'test']

class DBPediaDataset(Dataset):
  '''DBPedia dataset.
    Args:
      path[str]: path to the original data.
  '''
  def __init__(self, path):
    with open(path) as fin:
      self._data = [json.loads(l) for l in fin]
    self._n_classes = len(set([datum['label'] for datum in self._data]))

  def __getitem__(self, index):
    return self._data[index]

  def __len__(self):
    return len(self._data)

  @property
  def n_classes(self):
    return self._n_classes

  @staticmethod
  def collate_fn(tokenizer, device, batch):
    '''The collate function that compresses a training batch.
      Args:
        batch[list[dict[str, Any]]]: data in the batch.
      Returns:
        labels[torch.LongTensor]: the labels in the batch.
        sentences[dict[str, torch.Tensor]]: sentences converted by tokenizers.
    '''
    labels = torch.tensor([datum['label'] for datum in batch]).long().to(device)
    sentences = tokenizer(
        [datum['sentence'] for datum in batch],
        return_tensors='pt',  # pt = pytorch style tensor
        padding=True)
    for key in sentences:
      sentences[key] = sentences[key].to(device)
    return labels, sentences

def construct_datasets(prefix, batch_size, tokenizer, device):
  '''Constructs datasets and data loaders.
    Args:
      prefix[str]: prefix of the dataset (e.g., dbpedia_).
      batch_size[int]: maximum number of examples in a batch.
      tokenizer: model tokenizer that converts sentences to integer tensors.
      device[torch.device]: the device (cpu/gpu) that the tensor should be on.
    Returns:
      datasets[dict[str, Dataset]]: a dict of constructed datasets.
      dataloaders[dict[str, DataLoader]]: a dict of constructed data loaders.
  '''
  datasets = collections.defaultdict()
  dataloaders = collections.defaultdict()
  for split in SPLITS:
    datasets[split] = DBPediaDataset(f'{prefix}{split}.json')
    dataloaders[split] = DataLoader(
        datasets[split],
        batch_size=batch_size,
        shuffle=(split == 'train'),
        collate_fn=lambda x:DBPediaDataset.collate_fn(tokenizer, device, x))
  return datasets, dataloaders

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import numpy as np

In [4]:
class Classifier(nn.Module):
    def __init__(self, input_dim, hidden_size, n_classes):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size)
        self.fc2 = nn.Linear(hidden_size, n_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## Training and Evaluation

# **1.1**

In [5]:
print(torch.cuda.is_available())

True


In [6]:
def set_seed(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def train_model(dataloaders, classifier, optimizer, loss_func):
    pbar = tqdm.tqdm(dataloaders['train'])
    for labels, sentences in pbar:
        with torch.no_grad():
            unpooled_features = bert_model(**sentences)['last_hidden_state']
        cls_token = unpooled_features[:, 0, :]
        outputs = classifier(cls_token)
        loss = loss_func(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        pbar.set_description(f"Loss: {loss.item():.4f}")

In [9]:
def evaluate(model, dataloaders, bert_model, is_test=False):
    model.eval()
    dataloader = dataloaders['test'] if is_test else dataloaders['dev']

    total, correct, total_loss = 0, 0, 0.0

    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for labels, sentences in dataloader:
            unpooled_features = bert_model(**sentences)['last_hidden_state']
            cls_token = unpooled_features[:, 0, :]
            outputs = model(cls_token)

            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    average_loss = total_loss / len(dataloader)
    accuracy = 100 * correct / total
    return accuracy, average_loss

In [10]:
# Hyperparameters
batch_size = 32
classifier_hidden_size = 32

tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
bert_model = AutoModel.from_pretrained('bert-base-cased')
if torch.cuda.is_available():
    bert_model = bert_model.cuda()

datasets, dataloaders = construct_datasets(prefix='dbpedia_', batch_size=batch_size, tokenizer=tokenizer, device=bert_model.device)

In [14]:
dev_accuracies = []
best_model = None
best_loss = float('inf')

for seed in range(5):
    set_seed(seed)
    classifier = Classifier(bert_model.config.hidden_size, classifier_hidden_size, datasets['train'].n_classes).to(bert_model.device)
    optimizer = torch.optim.Adam(classifier.parameters(), lr=5e-4)
    loss_func = nn.CrossEntropyLoss()

    train_model(dataloaders, classifier, optimizer, loss_func)
    accuracy, loss = evaluate(classifier, dataloaders, bert_model)
    dev_accuracies.append(accuracy)

    if loss < best_loss:
        best_loss = loss
        best_model = classifier

# Evaluate the best model on the test set
test_accuracy, test_loss = evaluate(best_model, dataloaders, bert_model, is_test=True)

# Calculate mean and standard deviation
mean_accuracy = np.mean(dev_accuracies)
std_dev_accuracy = np.std(dev_accuracies)

print(f"Mean Dev Accuracy: {mean_accuracy}")
print(f"Standard Deviation: {std_dev_accuracy}")
print(f"Test Set Accuracy of Best Model: {test_accuracy}")

Loss: 0.3805: 100%|██████████| 313/313 [00:14<00:00, 22.04it/s]
Loss: 0.4824: 100%|██████████| 313/313 [00:14<00:00, 22.15it/s]
Loss: 0.6758: 100%|██████████| 313/313 [00:14<00:00, 22.27it/s]
Loss: 0.3257: 100%|██████████| 313/313 [00:14<00:00, 21.76it/s]
Loss: 0.4815: 100%|██████████| 313/313 [00:14<00:00, 21.59it/s]


Mean Dev Accuracy: 96.16
Standard Deviation: 0.952050418832951
Test Set Accuracy of Best Model: 97.7


# **1.2**

1. Define Function

In [15]:
def mean_pooling(token_embeddings, attention_mask):
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)

    return sum_embeddings / sum_mask

def max_pooling(token_embeddings, attention_mask):
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    token_embeddings[input_mask_expanded == 0] = -1e9  # Set padding tokens to large negative value
    max_values = torch.max(token_embeddings, 1).values  # Extracting only the values
    return max_values

In [16]:
def train_model_pooling(dataloaders, model, optimizer, loss_func, bert_model, pooling_method):
    model.train()
    for labels, data in tqdm.tqdm(dataloaders['train']):
        # Since data is already tokenized, extract the necessary fields directly
        input_ids = data['input_ids'].to(bert_model.device)
        attention_mask = data['attention_mask'].to(bert_model.device)

        # Forward pass with BERT
        with torch.no_grad():
            outputs = bert_model(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state

        # Apply the specified pooling method
        if pooling_method == 'mean':
            pooled_outputs = mean_pooling(outputs, attention_mask)
        elif pooling_method == 'max':
            pooled_outputs = max_pooling(outputs, attention_mask)

        # Classifier forward pass
        logits = model(pooled_outputs)
        loss = loss_func(logits, labels.to(bert_model.device))

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [17]:
def evaluate_pooling(model, dataloaders, bert_model, pooling_method, is_test=False):
    model.eval()
    device = next(model.parameters()).device  # 모델 파라미터에서 디바이스 추출
    total, correct, total_loss = 0, 0, 0.0

    # 교차 엔트로피 손실 함수 초기화
    criterion = nn.CrossEntropyLoss()

    dataloader = dataloaders['test'] if is_test else dataloaders['dev']
    with torch.no_grad():
        for labels, data in dataloader:
            input_ids = data['input_ids'].to(device)
            attention_mask = data['attention_mask'].to(device)

            # BERT 포워드 패스
            outputs = bert_model(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state

            # 지정된 풀링 방법 적용
            if pooling_method == 'mean':
                pooled_outputs = mean_pooling(outputs, attention_mask)
            elif pooling_method == 'max':
                pooled_outputs = max_pooling(outputs, attention_mask)

            # 분류기 포워드 패스
            logits = model(pooled_outputs)

            # 손실 계산
            loss = criterion(logits, labels.to(device))
            total_loss += loss.item()

            _, predicted = torch.max(logits.data, 1)
            total += labels.size(0)
            correct += (predicted == labels.to(device)).sum().item()

    accuracy = correct / total * 100
    average_loss = total_loss / len(dataloader)
    return accuracy, average_loss

2. Mean Pooling

In [18]:
dev_accuracies = []
best_model = None
best_loss = float('inf')

for seed in range(5):
    set_seed(seed)
    classifier = Classifier(bert_model.config.hidden_size, classifier_hidden_size, datasets['train'].n_classes).to(bert_model.device)
    optimizer = torch.optim.Adam(classifier.parameters(), lr=5e-4)
    loss_func = nn.CrossEntropyLoss()

    train_model_pooling(dataloaders, classifier, optimizer, loss_func, bert_model, 'mean')
    accuracy, loss = evaluate_pooling(classifier, dataloaders, bert_model, 'mean')
    dev_accuracies.append(accuracy)

    if loss < best_loss:
        best_loss = loss
        best_model = classifier

# Evaluate the best model on the test set
test_accuracy, test_loss = evaluate_pooling(best_model, dataloaders, bert_model, 'mean', is_test=True)

# Calculate mean and standard deviation
mean_accuracy = np.mean(dev_accuracies)
std_dev_accuracy = np.std(dev_accuracies)

print(f"Mean Dev Accuracy: {mean_accuracy}")
print(f"Standard Deviation: {std_dev_accuracy}")
print(f"Test Set Accuracy of Best Model: {test_accuracy}")

100%|██████████| 313/313 [00:13<00:00, 23.47it/s]
100%|██████████| 313/313 [00:13<00:00, 23.59it/s]
100%|██████████| 313/313 [00:13<00:00, 23.56it/s]
100%|██████████| 313/313 [00:13<00:00, 23.28it/s]
100%|██████████| 313/313 [00:13<00:00, 23.54it/s]


Mean Dev Accuracy: 96.96000000000001
Standard Deviation: 0.21540659228537873
Test Set Accuracy of Best Model: 96.7


3. Max Pooling

In [19]:
dev_accuracies = []
best_model = None
best_loss = float('inf')

for seed in range(5):
    set_seed(seed)
    classifier = Classifier(bert_model.config.hidden_size, classifier_hidden_size, datasets['train'].n_classes).to(bert_model.device)
    optimizer = torch.optim.Adam(classifier.parameters(), lr=5e-4)
    loss_func = nn.CrossEntropyLoss()

    train_model_pooling(dataloaders, classifier, optimizer, loss_func, bert_model, 'max')
    accuracy, loss = evaluate_pooling(classifier, dataloaders, bert_model, 'max')
    dev_accuracies.append(accuracy)

    if loss < best_loss:
        best_loss = loss
        best_model = classifier

# Evaluate the best model on the test set
test_accuracy, test_loss = evaluate_pooling(best_model, dataloaders, bert_model, 'max', is_test=True)

# Calculate mean and standard deviation
mean_accuracy = np.mean(dev_accuracies)
std_dev_accuracy = np.std(dev_accuracies)

print(f"Mean Dev Accuracy: {mean_accuracy}")
print(f"Standard Deviation: {std_dev_accuracy}")
print(f"Test Set Accuracy of Best Model: {test_accuracy}")

100%|██████████| 313/313 [00:13<00:00, 23.51it/s]
100%|██████████| 313/313 [00:13<00:00, 23.58it/s]
100%|██████████| 313/313 [00:13<00:00, 23.50it/s]
100%|██████████| 313/313 [00:13<00:00, 23.31it/s]
100%|██████████| 313/313 [00:13<00:00, 23.57it/s]


Mean Dev Accuracy: 65.17999999999999
Standard Deviation: 4.676494413553813
Test Set Accuracy of Best Model: 68.2


# **1.3**

1. **First-Token Pooling**:

Mean Dev Accuracy: 96.16%

Standard Deviation: 0.952

Test Set Accuracy: 97.7%

2. **Mean Pooling**:

Mean Dev Accuracy: 96.96%

Standard Deviation: 0.215

Test Set Accuracy: 96.7%

3. **Max Pooling**:

Mean Dev Accuracy: 65.18%

Standard Deviation: 4.676

Test Set Accuracy: 68.2%

4. **Analysis:**

1) **Mean Development Accuracy:** This metric indicates how well the model performs on average on the development set. Higher is better. In this aspect, **mean pooling performs the best (96.96%)**, closely followed by first-token pooling (96.16%). Max pooling lags significantly behind (65.18%).

2) **Standard Deviation:** This metric measures the model's development set performance variability across different runs. Lower is better, as it indicates more consistency. Again, **mean pooling** shows the best performance with the lowest standard deviation (0.215), suggesting it is the most stable and consistent across runs.

3) **Test Set Accuracy:** This metric evaluates how well the model generalizes to unseen data. Higher is better. **First-token pooling leads (97.7%)**, with mean pooling slightly behind (96.7%). Max pooling has a much lower performance (68.2%).


5. **Conclusion:**

1) **Best Overall:** **Mean pooling** is the most effective method, considering it has the highest mean development accuracy and the lowest standard deviation, indicating high performance and consistency. Although its test set accuracy is slightly lower than that of the first-token pooling, the differences are minimal and mean pooling's superior performance in the other two metrics makes it the preferable choice.

2) **Least Effective:** **Max pooling** is the least effective in this scenario, with significantly lower development and test accuracies and higher standard deviation.

# **1.4**

In [23]:
def train_model_layer(dataloaders, classifier, optimizer, loss_func):
    pbar = tqdm.tqdm(dataloaders['train'])
    for labels, sentences in pbar:
        unpooled_features = bert_model(**sentences)['last_hidden_state']
        cls_token = unpooled_features[:, 0, :]
        outputs = classifier(cls_token)
        loss = loss_func(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        pbar.set_description(f"Loss: {loss.item():.4f}")

In [24]:
def evaluate_layer(model, dataloaders, bert_model, is_test=False):
    model.eval()
    dataloader = dataloaders['test'] if is_test else dataloaders['dev']
    total, correct, total_loss = 0, 0, 0.0

    # 교차 엔트로피 손실 함수 초기화
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for labels, sentences in dataloader:
            unpooled_features = bert_model(**sentences)['last_hidden_state']
            cls_token = unpooled_features[:, 0, :]
            outputs = model(cls_token)

            # 손실 계산
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    average_loss = total_loss / len(dataloader)
    return accuracy, average_loss

In [25]:
dev_accuracies = []
best_model = None
best_loss = float('inf')

for seed in range(5):
    set_seed(seed)
    classifier = Classifier(bert_model.config.hidden_size, classifier_hidden_size, datasets['train'].n_classes).to(bert_model.device)
    params = list()
    for name, param in bert_model.named_parameters():
      if name.startswith(('encoder.layer.10', 'encoder.layer.11')):
        param.requires_grad = True
        params.append(param)
      else:
        param.requires_grad = False
    optimizer = torch.optim.Adam(params + list(classifier.parameters()), lr=5e-4)
    loss_func = nn.CrossEntropyLoss()

    train_model_layer(dataloaders, classifier, optimizer, loss_func)
    accuracy, loss = evaluate_layer(classifier, dataloaders, bert_model)
    dev_accuracies.append(accuracy)

    if loss < best_loss:
        best_loss = loss
        best_model = classifier

# Evaluate the best model on the test set
test_accuracy, test_loss = evaluate(best_model, dataloaders, bert_model, is_test=True)

# Calculate mean and standard deviation
mean_accuracy = np.mean(dev_accuracies)
std_dev_accuracy = np.std(dev_accuracies)

print(f"Mean Dev Accuracy: {mean_accuracy}")
print(f"Standard Deviation: {std_dev_accuracy}")
print(f"Test Set Accuracy of Best Model: {test_accuracy}")

Loss: 0.0203: 100%|██████████| 313/313 [00:17<00:00, 17.42it/s]
Loss: 0.0090: 100%|██████████| 313/313 [00:18<00:00, 17.38it/s]
Loss: 0.0087: 100%|██████████| 313/313 [00:17<00:00, 17.42it/s]
Loss: 0.0044: 100%|██████████| 313/313 [00:18<00:00, 17.29it/s]
Loss: 0.0090: 100%|██████████| 313/313 [00:18<00:00, 17.30it/s]


Mean Dev Accuracy: 99.11999999999999
Standard Deviation: 0.3544009029333898
Test Set Accuracy of Best Model: 98.4


# **1.5**

In [49]:
from transformers import GPT2Model, GPT2Tokenizer
import copy

gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt_model = GPT2Model.from_pretrained('gpt2')
gpt_tokenizer.pad_token = gpt_tokenizer.eos_token

In [51]:
if torch.cuda.is_available():
    gpt_model = gpt_model.cuda()

In [28]:
gpt_model.config.n_embd

768

In [42]:
class GPTClassifier(nn.Module):
    def __init__(self, gpt_hidden_size, classifier_hidden_size, num_classes, dropout_rate=0.2):
        super(GPTClassifier, self).__init__()
        self.activation = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)  # Dropout layer
        self.linear1 = nn.Linear(gpt_hidden_size, classifier_hidden_size)

        # Additional intermediate layer
        self.intermediate_size = classifier_hidden_size // 2
        self.linear_intermediate = nn.Linear(classifier_hidden_size, self.intermediate_size)

        self.linear2 = nn.Linear(self.intermediate_size, num_classes)

    def forward(self, unpooled_features, attention_mask):
        # Selecting the last non-padded element based on the attention mask
        last_non_padded_idx = attention_mask.sum(dim=1) - 1
        pooled_features = unpooled_features[torch.arange(unpooled_features.size(0)), last_non_padded_idx]

        # Pass through the first linear layer, activation, and dropout
        x = self.linear1(pooled_features)
        x = self.activation(x)
        x = self.dropout(x)

        # Pass through the additional intermediate layer, activation, and dropout
        x = self.linear_intermediate(x)
        x = self.activation(x)
        x = self.dropout(x)

        # Final linear layer
        return self.linear2(x)

In [8]:
def train_model_gpt(dataloaders, classifier, optimizer, loss_func, gpt_model):
    pbar = tqdm.tqdm(dataloaders['train'])
    for labels, sentences in pbar:
        # Assuming sentences is a dictionary with 'input_ids' and 'attention_mask'
        input_ids = sentences['input_ids'].to(gpt_model.device)
        attention_mask = sentences['attention_mask'].to(gpt_model.device)
        labels = labels.to(gpt_model.device)

        optimizer.zero_grad()

        # Forward pass through GPT model
        with torch.no_grad():
            outputs = gpt_model(input_ids, attention_mask=attention_mask)
            unpooled_features = outputs.last_hidden_state

        # Forward pass through the classifier
        logits = classifier(unpooled_features, attention_mask)

        loss = loss_func(logits, labels)
        loss.backward()
        optimizer.step()

        pbar.set_description(f"Loss: {loss.item():.4f}")

In [9]:
def evaluate_gpt(model, dataloaders, gpt_model, is_test=False):
    model.eval()  # Set the model to evaluation mode
    dataloader = dataloaders['test'] if is_test else dataloaders['dev']
    total, correct, total_loss = 0, 0, 0.0

    # Initialize the cross-entropy loss function
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for batch in dataloader:
            labels, sentences = batch
            input_ids = sentences['input_ids'].to(gpt_model.device)
            attention_mask = sentences['attention_mask'].to(gpt_model.device)
            labels = labels.to(gpt_model.device)

            # Forward pass through GPT model
            outputs = gpt_model(input_ids, attention_mask=attention_mask)
            unpooled_features = outputs.last_hidden_state

            # Forward pass through the classifier
            logits = model(unpooled_features, attention_mask)

            # Calculate loss
            loss = criterion(logits, labels)
            total_loss += loss.item()

            _, predicted = torch.max(logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    average_loss = total_loss / len(dataloader)
    accuracy = 100 * correct / total
    return accuracy, average_loss

In [10]:
class DBPediaDataset(Dataset):
  '''DBPedia dataset.
    Args:
      path[str]: path to the original data.
  '''
  def __init__(self, path):
    with open(path) as fin:
      self._data = [json.loads(l) for l in fin]
    self._n_classes = len(set([datum['label'] for datum in self._data]))

  def __getitem__(self, index):
    return self._data[index]

  def __len__(self):
    return len(self._data)

  @property
  def n_classes(self):
    return self._n_classes

  @staticmethod
  def collate_fn(tokenizer, device, batch):
    labels = torch.tensor([datum['label'] for datum in batch]).long().to(device)
    sentences = tokenizer(
        [datum['sentence'] for datum in batch],
        return_tensors='pt',  # pt = pytorch style tensor
        padding='longest',  # Pad to the longest sequence in the batch
        truncation=True  # Truncate to the model's max input length
    )
    for key in sentences:
        sentences[key] = sentences[key].to(device)
    return labels, sentences

def construct_datasets(prefix, batch_size, tokenizer, device):
  '''Constructs datasets and data loaders.
    Args:
      prefix[str]: prefix of the dataset (e.g., dbpedia_).
      batch_size[int]: maximum number of examples in a batch.
      tokenizer: model tokenizer that converts sentences to integer tensors.
      device[torch.device]: the device (cpu/gpu) that the tensor should be on.
    Returns:
      datasets[dict[str, Dataset]]: a dict of constructed datasets.
      dataloaders[dict[str, DataLoader]]: a dict of constructed data loaders.
  '''
  datasets = collections.defaultdict()
  dataloaders = collections.defaultdict()
  for split in SPLITS:
    datasets[split] = DBPediaDataset(f'{prefix}{split}.json')
    dataloaders[split] = DataLoader(
        datasets[split],
        batch_size=batch_size,
        shuffle=(split == 'train'),
        collate_fn=lambda x:DBPediaDataset.collate_fn(tokenizer, device, x))
  return datasets, dataloaders

In [56]:
batch_size = 32
datasets, dataloaders = construct_datasets(prefix='dbpedia_', batch_size=batch_size, tokenizer=gpt_tokenizer, device=gpt_model.device)

In [57]:
classifier_hidden_size = 256

In [58]:
import torch
torch.cuda.empty_cache()

In [61]:
# Hyperparameters
num_classes = 14
learning_rate = 5e-4
num_seed = 5

# Initialize and Train the Model
dev_accuracies = []
best_model = None
best_loss = 100


for seed in range(num_seed):
    classifier = GPTClassifier(gpt_model.config.hidden_size, classifier_hidden_size, num_classes).to(gpt_model.device)
    optimizer = torch.optim.Adam(classifier.parameters(), lr=learning_rate)
    loss_func = nn.CrossEntropyLoss()
    train_model_gpt(dataloaders, classifier, optimizer, loss_func, gpt_model)

    accuracy, loss = evaluate_gpt(classifier, dataloaders, gpt_model)
    print(accuracy, loss)
    dev_accuracies.append(accuracy)

    if loss < best_loss:
        best_loss = loss
        best_model = classifier
        print('best')

# Evaluate the Best Model on the Test Set
test_accuracy, test_loss = evaluate_gpt(best_model, dataloaders, gpt_model, is_test=True)

# Results
mean_accuracy = np.mean(dev_accuracies)
std_dev_accuracy = np.std(dev_accuracies)

print(f"Mean Dev Accuracy: {mean_accuracy:.2f}%")
print(f"Standard Deviation: {std_dev_accuracy:.2f}")
print(f"Test Set Accuracy of Best Model: {test_accuracy:.2f}%")

Loss: 0.1343: 100%|██████████| 313/313 [00:16<00:00, 18.58it/s]


95.9 0.18426849134266376
best


Loss: 0.3300: 100%|██████████| 313/313 [00:16<00:00, 18.72it/s]


94.3 0.19865347375161946


Loss: 0.0780: 100%|██████████| 313/313 [00:16<00:00, 18.62it/s]


95.2 0.1793770119547844
best


Loss: 0.1635: 100%|██████████| 313/313 [00:16<00:00, 18.73it/s]


94.7 0.19335326505824924


Loss: 0.2603: 100%|██████████| 313/313 [00:16<00:00, 18.71it/s]


95.2 0.1700316604692489
best
Mean Dev Accuracy: 95.06%
Standard Deviation: 0.54
Test Set Accuracy of Best Model: 96.50%


# **Exporting the Notebook**

In [62]:
!apt-get install texlive texlive-xetex texlive-latex-extra pandoc
!pip install pypandoc

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
pandoc is already the newest version (2.9.2.1-3ubuntu2).
pandoc set to manually installed.
The following additional packages will be installed:
  dvisvgm fonts-droid-fallback fonts-lato fonts-lmodern fonts-noto-mono fonts-texgyre
  fonts-urw-base35 libapache-pom-java libcommons-logging-java libcommons-parent-java
  libfontbox-java libfontenc1 libgs9 libgs9-common libidn12 libijs-0.35 libjbig2dec0 libkpathsea6
  libpdfbox-java libptexenc1 libruby3.0 libsynctex2 libteckit0 libtexlua53 libtexluajit2 libwoff1
  libzzip-0-13 lmodern poppler-data preview-latex-style rake ruby ruby-net-telnet ruby-rubygems
  ruby-webrick ruby-xmlrpc ruby3.0 rubygems-integration t1utils teckit tex-common tex-gyre
  texlive-base texlive-binaries texlive-fonts-recommended texlive-latex-base
  texlive-latex-recommended texlive-pictures texlive-plain-generic tipa xfonts-encodings
  xfonts-utils
Suggested packages:
  fo

In [63]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [64]:
!jupyter nbconvert --to PDF '/content/drive/MyDrive/Colab Notebooks/NLP_HW3_MinjiPark.ipynb'

[NbConvertApp] Converting notebook /content/drive/MyDrive/Colab Notebooks/NLP_HW3_MinjiPark.ipynb to PDF
[NbConvertApp] Writing 118913 bytes to notebook.tex
[NbConvertApp] Building PDF
[NbConvertApp] Running xelatex 3 times: ['xelatex', 'notebook.tex', '-quiet']
[NbConvertApp] Running bibtex 1 time: ['bibtex', 'notebook']
[NbConvertApp] PDF successfully created
[NbConvertApp] Writing 106329 bytes to /content/drive/MyDrive/Colab Notebooks/NLP_HW3_MinjiPark.pdf
