In [1]:
import torch
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm
import gc
import itertools
from sklearn.model_selection import KFold

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Load data
df = pd.read_csv('Data Training.csv')

# Preprocess
tokenizer = BertTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
le = LabelEncoder()
# Encode labels
df['sentimen_encoded'] = le.fit_transform(df['Sentimen'])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/229k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

In [4]:
max_length = 128

encoded_data = tokenizer.batch_encode_plus(
    df['Ulasan'].tolist(),
    add_special_tokens=True,
    return_attention_mask=True,
    padding='max_length',
    truncation=True,
    max_length=max_length,
    return_tensors='pt'
)

In [5]:
input_ids = encoded_data['input_ids']
attention_masks = encoded_data['attention_mask']
labels = torch.tensor(df['sentimen_encoded'].tolist())

In [6]:
train_inputs, val_inputs, train_labels, val_labels = train_test_split(input_ids, labels, test_size=0.2, random_state=42)
train_masks, val_masks, _, _ = train_test_split(attention_masks, labels, test_size=0.2, random_state=42)

In [7]:
def create_model_and_optimizer(learning_rate):
    model = BertForSequenceClassification.from_pretrained(
        'indobenchmark/indobert-base-p2',
        num_labels=len(le.classes_),
        output_attentions=False,
        output_hidden_states=False
    )
    optimizer = AdamW(model.parameters(), lr=learning_rate, eps=1e-8)
    return model, optimizer

In [8]:
# Hyperparameter grid (tanpa epoch)
param_grid = {
    'batch_size': [16, 32],
    'learning_rate': [5e-5, 3e-5, 2e-5]
}

# Parameter early stopping
patience = 5
min_delta = 0.001

# DataFrame untuk menyimpan hasil
results_df = pd.DataFrame(columns=['batch_size', 'learning_rate', 'epochs', 'validation_accuracy', 'validation_f1', 'training_loss'])

# Modified train_and_evaluate function with early stopping
def train_and_evaluate(model, optimizer, train_dataloader, val_dataloader, device, patience, min_delta):
    total_steps = len(train_dataloader) * patience
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    model.to(device)

    best_val_f1 = 0
    best_val_accuracy = 0
    epochs_no_improve = 0
    avg_train_loss = 0

    for epoch in range(100):
        print(f'Epoch {epoch+1}')
        print('-' * 10)

        model.train()
        total_loss = 0
        for batch in tqdm(train_dataloader, desc="Training", leave=False):
            batch = tuple(t.to(device) for t in batch)
            inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}

            model.zero_grad()
            outputs = model(**inputs)
            loss = outputs.loss
            total_loss += loss.item()

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()

        avg_train_loss = total_loss / len(train_dataloader)
        print(f"Average training loss: {avg_train_loss:.4f}")

        # Validation
        model.eval()
        val_loss = 0
        predictions, true_labels = [], []
        for batch in tqdm(val_dataloader, desc="Validation", leave=False):
            batch = tuple(t.to(device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}
                outputs = model(**inputs)
            val_loss += outputs.loss.item()
            logits = outputs.logits

            predictions.extend(torch.argmax(logits, dim=1).cpu().numpy())
            true_labels.extend(inputs['labels'].cpu().numpy())

        avg_val_loss = val_loss / len(val_dataloader)
        val_accuracy = accuracy_score(true_labels, predictions)
        val_f1 = f1_score(true_labels, predictions, average='weighted')

        print(f"Validation Loss: {avg_val_loss:.4f}")
        print(f"Validation Accuracy: {val_accuracy:.4f}")
        print(f"Validation F1 Score: {val_f1:.4f}")

        # Early stopping logic
        if val_f1 - best_val_f1 > min_delta:
            best_val_f1 = val_f1
            best_val_accuracy = val_accuracy
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            print(f"Validation F1 did not improve. Patience: {epochs_no_improve}/{patience}")

        if epochs_no_improve >= patience:
            print("Early stopping triggered!")
            break

    return best_val_f1, best_val_accuracy, avg_val_loss, avg_train_loss, epoch + 1

# Hyperparameter optimization
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
best_params = None
best_f1 = 0

for batch_size, learning_rate in itertools.product(param_grid['batch_size'], param_grid['learning_rate']):
    print(f"\nTrying batch_size: {batch_size}, learning_rate: {learning_rate}")

    # Create DataLoader
    train_data = TensorDataset(train_inputs, train_masks, train_labels)
    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

    val_data = TensorDataset(val_inputs, val_masks, val_labels)
    val_dataloader = DataLoader(val_data, batch_size=batch_size)

    # Create model and optimizer
    model, optimizer = create_model_and_optimizer(learning_rate)

    # Train and evaluate with early stopping
    val_f1, val_accuracy, val_loss, train_loss, epochs_used = train_and_evaluate(model, optimizer, train_dataloader, val_dataloader, device, patience, min_delta)

    # Save results to DataFrame
    new_row = pd.DataFrame([{
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'epochs': epochs_used,
        'validation_accuracy': val_accuracy,
        'validation_f1': val_f1,
        'training_loss': train_loss
    }])

    results_df = pd.concat([results_df, new_row], ignore_index=True)

    if val_f1 > best_f1:
        best_f1 = val_f1
        best_params = {'batch_size': batch_size, 'learning_rate': learning_rate}

    # Clear cache
    torch.cuda.empty_cache()
    gc.collect()

# Save the DataFrame to a CSV file
results_df.to_csv('training_results.csv', index=False)

print("\nBest Hyperparameters:")
print(best_params)
print(f"Best Validation F1 Score: {best_f1:.4f}")


Trying batch_size: 16, learning_rate: 5e-05


pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1
----------




Average training loss: 0.2330




Validation Loss: 0.2308
Validation Accuracy: 0.9327
Validation F1 Score: 0.9331
Epoch 2
----------




Average training loss: 0.0809




Validation Loss: 0.3403
Validation Accuracy: 0.9377
Validation F1 Score: 0.9381
Epoch 3
----------




Average training loss: 0.0286




Validation Loss: 0.2309
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Epoch 4
----------




Average training loss: 0.0225




Validation Loss: 0.2341
Validation Accuracy: 0.9526
Validation F1 Score: 0.9527
Validation F1 did not improve. Patience: 1/5
Epoch 5
----------




Average training loss: 0.0009




Validation Loss: 0.2480
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Validation F1 did not improve. Patience: 2/5
Epoch 6
----------




Average training loss: 0.0002




Validation Loss: 0.2480
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Validation F1 did not improve. Patience: 3/5
Epoch 7
----------




Average training loss: 0.0003




Validation Loss: 0.2480
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Validation F1 did not improve. Patience: 4/5
Epoch 8
----------




Average training loss: 0.0001


  results_df = pd.concat([results_df, new_row], ignore_index=True)


Validation Loss: 0.2480
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Validation F1 did not improve. Patience: 5/5
Early stopping triggered!

Trying batch_size: 16, learning_rate: 3e-05


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1
----------




Average training loss: 0.2223




Validation Loss: 0.2668
Validation Accuracy: 0.9277
Validation F1 Score: 0.9281
Epoch 2
----------




Average training loss: 0.0770




Validation Loss: 0.1470
Validation Accuracy: 0.9576
Validation F1 Score: 0.9576
Epoch 3
----------




Average training loss: 0.0179




Validation Loss: 0.2168
Validation Accuracy: 0.9526
Validation F1 Score: 0.9526
Validation F1 did not improve. Patience: 1/5
Epoch 4
----------




Average training loss: 0.0064




Validation Loss: 0.2140
Validation Accuracy: 0.9626
Validation F1 Score: 0.9626
Epoch 5
----------




Average training loss: 0.0002




Validation Loss: 0.2100
Validation Accuracy: 0.9651
Validation F1 Score: 0.9652
Epoch 6
----------




Average training loss: 0.0001




Validation Loss: 0.2100
Validation Accuracy: 0.9651
Validation F1 Score: 0.9652
Validation F1 did not improve. Patience: 1/5
Epoch 7
----------




Average training loss: 0.0002




Validation Loss: 0.2100
Validation Accuracy: 0.9651
Validation F1 Score: 0.9652
Validation F1 did not improve. Patience: 2/5
Epoch 8
----------




Average training loss: 0.0001




Validation Loss: 0.2100
Validation Accuracy: 0.9651
Validation F1 Score: 0.9652
Validation F1 did not improve. Patience: 3/5
Epoch 9
----------




Average training loss: 0.0001




Validation Loss: 0.2100
Validation Accuracy: 0.9651
Validation F1 Score: 0.9652
Validation F1 did not improve. Patience: 4/5
Epoch 10
----------




Average training loss: 0.0001




Validation Loss: 0.2100
Validation Accuracy: 0.9651
Validation F1 Score: 0.9652
Validation F1 did not improve. Patience: 5/5
Early stopping triggered!

Trying batch_size: 16, learning_rate: 2e-05


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1
----------




Average training loss: 0.2200




Validation Loss: 0.1695
Validation Accuracy: 0.9401
Validation F1 Score: 0.9405
Epoch 2
----------




Average training loss: 0.0659




Validation Loss: 0.1871
Validation Accuracy: 0.9476
Validation F1 Score: 0.9479
Epoch 3
----------




Average training loss: 0.0191




Validation Loss: 0.2149
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Epoch 4
----------




Average training loss: 0.0013




Validation Loss: 0.2106
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Epoch 5
----------




Average training loss: 0.0002




Validation Loss: 0.2166
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 1/5
Epoch 6
----------




Average training loss: 0.0006




Validation Loss: 0.2166
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 2/5
Epoch 7
----------




Average training loss: 0.0003




Validation Loss: 0.2166
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 3/5
Epoch 8
----------




Average training loss: 0.0002




Validation Loss: 0.2166
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 4/5
Epoch 9
----------




Average training loss: 0.0002




Validation Loss: 0.2166
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 5/5
Early stopping triggered!

Trying batch_size: 32, learning_rate: 5e-05


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1
----------




Average training loss: 0.2819




Validation Loss: 0.1359
Validation Accuracy: 0.9526
Validation F1 Score: 0.9526
Epoch 2
----------




Average training loss: 0.0937




Validation Loss: 0.0855
Validation Accuracy: 0.9751
Validation F1 Score: 0.9751
Epoch 3
----------




Average training loss: 0.0309




Validation Loss: 0.1482
Validation Accuracy: 0.9601
Validation F1 Score: 0.9601
Validation F1 did not improve. Patience: 1/5
Epoch 4
----------




Average training loss: 0.0050




Validation Loss: 0.1625
Validation Accuracy: 0.9601
Validation F1 Score: 0.9601
Validation F1 did not improve. Patience: 2/5
Epoch 5
----------




Average training loss: 0.0040




Validation Loss: 0.1595
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 3/5
Epoch 6
----------




Average training loss: 0.0023




Validation Loss: 0.1595
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 4/5
Epoch 7
----------




Average training loss: 0.0004




Validation Loss: 0.1595
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 5/5
Early stopping triggered!

Trying batch_size: 32, learning_rate: 3e-05


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1
----------




Average training loss: 0.2240




Validation Loss: 0.1082
Validation Accuracy: 0.9526
Validation F1 Score: 0.9527
Epoch 2
----------




Average training loss: 0.0770




Validation Loss: 0.1865
Validation Accuracy: 0.9501
Validation F1 Score: 0.9504
Validation F1 did not improve. Patience: 1/5
Epoch 3
----------




Average training loss: 0.0209




Validation Loss: 0.1825
Validation Accuracy: 0.9551
Validation F1 Score: 0.9554
Epoch 4
----------




Average training loss: 0.0038




Validation Loss: 0.1541
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Epoch 5
----------




Average training loss: 0.0005




Validation Loss: 0.1680
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Validation F1 did not improve. Patience: 1/5
Epoch 6
----------




Average training loss: 0.0004




Validation Loss: 0.1680
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Validation F1 did not improve. Patience: 2/5
Epoch 7
----------




Average training loss: 0.0004




Validation Loss: 0.1680
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Validation F1 did not improve. Patience: 3/5
Epoch 8
----------




Average training loss: 0.0004




Validation Loss: 0.1680
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Validation F1 did not improve. Patience: 4/5
Epoch 9
----------




Average training loss: 0.0004




Validation Loss: 0.1680
Validation Accuracy: 0.9576
Validation F1 Score: 0.9578
Validation F1 did not improve. Patience: 5/5
Early stopping triggered!


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Trying batch_size: 32, learning_rate: 2e-05
Epoch 1
----------




Average training loss: 0.2269




Validation Loss: 0.1065
Validation Accuracy: 0.9551
Validation F1 Score: 0.9551
Epoch 2
----------




Average training loss: 0.0615




Validation Loss: 0.2535
Validation Accuracy: 0.9352
Validation F1 Score: 0.9356
Validation F1 did not improve. Patience: 1/5
Epoch 3
----------




Average training loss: 0.0215




Validation Loss: 0.2421
Validation Accuracy: 0.9526
Validation F1 Score: 0.9529
Validation F1 did not improve. Patience: 2/5
Epoch 4
----------




Average training loss: 0.0062




Validation Loss: 0.1340
Validation Accuracy: 0.9726
Validation F1 Score: 0.9726
Epoch 5
----------




Average training loss: 0.0008




Validation Loss: 0.1807
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 1/5
Epoch 6
----------




Average training loss: 0.0006




Validation Loss: 0.1807
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 2/5
Epoch 7
----------




Average training loss: 0.0006




Validation Loss: 0.1807
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 3/5
Epoch 8
----------




Average training loss: 0.0019




Validation Loss: 0.1807
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 4/5
Epoch 9
----------




Average training loss: 0.0006




Validation Loss: 0.1807
Validation Accuracy: 0.9626
Validation F1 Score: 0.9627
Validation F1 did not improve. Patience: 5/5
Early stopping triggered!

Best Hyperparameters:
{'batch_size': 32, 'learning_rate': 5e-05}
Best Validation F1 Score: 0.9751


In [10]:
#single_train

best_params = {
    'batch_size': 32,
    'learning_rate': 5e-5,
    'epochs': 4
}

# Training function
def train_and_evaluate(model, optimizer, train_dataloader, val_dataloader, epochs, device):
    total_steps = len(train_dataloader) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    model.to(device)

    for epoch in range(epochs):
        print(f'Epoch {epoch + 1}/{epochs}')
        print('-' * 10)

        model.train()
        total_loss = 0
        progress_bar = tqdm(train_dataloader, desc="Training", leave=False)

        for batch in progress_bar:
            batch = tuple(t.to(device) for t in batch)
            inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}

            model.zero_grad()
            outputs = model(**inputs)
            loss = outputs.loss
            total_loss += loss.item()

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()

            progress_bar.set_postfix({'loss': f'{total_loss / (progress_bar.n + 1):.4f}'})

        avg_train_loss = total_loss / len(train_dataloader)
        print(f"Average training loss: {avg_train_loss:.4f}")

        # Validation
        model.eval()
        val_loss = 0
        predictions, true_labels = [], []
        for batch in tqdm(val_dataloader, desc="Validation", leave=False):
            batch = tuple(t.to(device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}
                outputs = model(**inputs)
            val_loss += outputs.loss.item()
            logits = outputs.logits

            predictions.extend(torch.argmax(logits, dim=1).cpu().numpy())
            true_labels.extend(inputs['labels'].cpu().numpy())

        avg_val_loss = val_loss / len(val_dataloader)
        val_accuracy = accuracy_score(true_labels, predictions)
        val_f1 = f1_score(true_labels, predictions, average='weighted')
        val_recall = recall_score(true_labels, predictions, average='weighted')
        val_precision = precision_score(true_labels, predictions, average='weighted')

        print(f"Validation Loss: {avg_val_loss:.4f}")
        print(f"Validation Accuracy: {val_accuracy:.4f}")
        print(f"Validation F1 Score: {val_f1:.4f}")
        print(f"Validation Recall: {val_recall:.4f}")
        print(f"Validation Precision: {val_precision:.4f}")

    return val_accuracy, val_f1, avg_val_loss

# Single training and validation split
def single_train_validate(input_ids, attention_masks, labels):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Split the data into training and validation (80-20 split for example)
    train_size = int(0.8 * len(input_ids))
    val_size = len(input_ids) - train_size
    train_inputs, val_inputs = input_ids[:train_size], input_ids[train_size:]
    train_masks, val_masks = attention_masks[:train_size], attention_masks[train_size:]
    train_labels, val_labels = labels[:train_size], labels[train_size:]

    # Create DataLoader
    train_data = TensorDataset(train_inputs, train_masks, train_labels)
    train_dataloader = DataLoader(train_data, batch_size=best_params['batch_size'], shuffle=True)

    val_data = TensorDataset(val_inputs, val_masks, val_labels)
    val_dataloader = DataLoader(val_data, batch_size=best_params['batch_size'])

    # Create model and optimizer
    model, optimizer = create_model_and_optimizer(best_params['learning_rate'])

    # Train and evaluate
    val_accuracy, val_f1, val_loss = train_and_evaluate(model, optimizer, train_dataloader, val_dataloader, best_params['epochs'], device)

    return {
        'val_accuracy': val_accuracy,
        'val_f1': val_f1,
        'val_loss': val_loss
    }

# Perform training and validation
results = single_train_validate(input_ids, attention_masks, labels)

# Display results
print("\nSingle Training and Validation Results:")
print(f"Validation Accuracy: {results['val_accuracy']:.4f}")
print(f"Validation F1 Score: {results['val_f1']:.4f}")
print(f"Validation Loss: {results['val_loss']:.4f}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4
----------




Average training loss: 0.2667




Validation Loss: 0.2469
Validation Accuracy: 0.9077
Validation F1 Score: 0.9076
Validation Recall: 0.9077
Validation Precision: 0.9108
Epoch 2/4
----------




Average training loss: 0.0902




Validation Loss: 0.1615
Validation Accuracy: 0.9601
Validation F1 Score: 0.9600
Validation Recall: 0.9601
Validation Precision: 0.9623
Epoch 3/4
----------




Average training loss: 0.0305




Validation Loss: 0.1428
Validation Accuracy: 0.9651
Validation F1 Score: 0.9651
Validation Recall: 0.9651
Validation Precision: 0.9653
Epoch 4/4
----------




Average training loss: 0.0023


                                                           

Validation Loss: 0.1519
Validation Accuracy: 0.9601
Validation F1 Score: 0.9601
Validation Recall: 0.9601
Validation Precision: 0.9605

Single Training and Validation Results:
Validation Accuracy: 0.9601
Validation F1 Score: 0.9601
Validation Loss: 0.1519




In [12]:
torch.save(model.state_dict(), "fine_tuned_bert.bin")