In [1]:
import pandas as pd
import os
import torch
import numpy as np
import re
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import LongformerForSequenceClassification, LongformerTokenizerFast
from transformers import LongformerTokenizer, AdamW, get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torch.utils.data import DataLoader
from datasets import Dataset
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix
import time
from sklearn.model_selection import train_test_split
import random
from sklearn.metrics import make_scorer, f1_score, precision_score, recall_score, accuracy_score, confusion_matrix
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier

In [2]:
data = pd.read_json('/kaggle/input/action-final-data/action_240307_sequence.json')

In [3]:
# data split function
def split_data(data, sample_SEED, fixSEED=0):
  # normal bot split
  normal = data[data['restrict']==0].reset_index(drop=True)
  bot = data[data['restrict']==1].reset_index(drop=True)
  # normal underampling
  random.seed(sample_SEED)
  sample = random.sample(list(range(len(normal))), len(bot))
  normal = normal.loc[sample].reset_index(drop=True)
  data = pd.concat([normal, bot], axis=0)
  data['restrict'].value_counts()
  # train valid test split
  train, test = train_test_split(data, test_size=0.3, random_state=fixSEED, stratify = data['restrict'])
  train, valid = train_test_split(train, test_size=0.3, random_state=fixSEED, stratify = train['restrict'])
  train = train.reset_index(drop=True)
  valid = valid.reset_index(drop=True)
  test = test.reset_index(drop=True)
  # 1:1 fix
  random.seed(fixSEED)
  move = random.sample(list(valid[valid['restrict']==1].index), 1)
  test = pd.concat([test, valid.loc[move]], axis=0).reset_index(drop=True)
  valid = valid.drop(move, axis=0).reset_index(drop=True)
  return train, valid, test

def extract_first_two_sentences(text):
    sentences = re.split(r'(?<=\.)\s+', text)
    return ' '.join(sentences[:2])

In [4]:
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup

In [5]:
# Seed 설정
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Training function
def train(model, train_loader, optimizer, scheduler):
    model.train()
    total_loss = 0
    scaler = torch.cuda.amp.GradScaler()

    for step, batch in enumerate(tqdm(train_loader, desc="Training")):
        with torch.cuda.amp.autocast():
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['restrict'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss / gradient_accumulation_steps
            total_loss += loss.item()

        scaler.scale(loss).backward()

        if (step + 1) % gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()
            optimizer.zero_grad()

    return total_loss / len(train_loader)

# Evaluation function
def evaluate(model, data_loader):
    model.eval()
    preds, true_labels = [], []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['restrict'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds.extend(torch.argmax(logits, axis=1).cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    return preds, true_labels



In [6]:
# Tokenization function
def tokenize_function(examples, key, max_length):
    return tokenizer(examples[key], padding='max_length', truncation=True, max_length=max_length)

# Preprocessing function
def prepare_data(df, key, max_length):
    data = {key: df[key].tolist(), 'restrict': df['restrict'].tolist()}
    dataset = Dataset.from_dict(data)
    dataset = dataset.map(lambda x: tokenize_function(x, key, max_length), batched=True)
    dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'restrict'])
    return dataset

# Dataloader function
def dataloader(df_train, df_valid, df_test, variable):
    # Define the variable-to-key mapping
    key_map = {
        'text': ('text', 512),
        'longtext': ('longtext', 512),
        'longlongtext': ('longlongtext', 512),
        'base': ('base', 31)
    }
    
    # Retrieve the appropriate key and max_length for the variable
    key, max_length = key_map[variable]
    
    # Prepare datasets
    train_dataset = prepare_data(df_train, key, max_length)
    val_dataset = prepare_data(df_valid, key, max_length)
    test_dataset = prepare_data(df_test, key, max_length)
    
    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=8)
    test_loader = DataLoader(test_dataset, batch_size=8)
    
    return train_loader, val_loader, test_loader



# SEED = 5

In [11]:
# train valid test split
df_train, df_valid, df_test = split_data(data, 5)

# Apply the function to the text_column and create a new column
df_train['base'] = df_train['longlongtext'].apply(extract_first_two_sentences)
df_valid['base'] = df_valid['longlongtext'].apply(extract_first_two_sentences)
df_test['base'] = df_test['longlongtext'].apply(extract_first_two_sentences)

## text5

In [12]:
# Bert model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

train_loader, val_loader, test_loader = dataloader(df_train, df_valid, df_test, 'text')

# Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)
num_epochs = 5

# Learning rate scheduler
num_training_steps = num_epochs * len(train_loader)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
gradient_accumulation_steps = 8
seed = 0
set_seed(seed)


# 훈련 시간 측정 시작
start_time = time.time()

for epoch in range(num_epochs):
    epoch_start_time = time.time()
    train_loss = train(model, train_loader, optimizer, scheduler)
    epoch_end_time = time.time()

    print(f"Epoch {epoch + 1}, Loss: {train_loss}")
    print(f"Epoch {epoch + 1} Training Time: {epoch_end_time - epoch_start_time:.2f} seconds")

    # validation data 평가
    predicted_labels, true_labels = evaluate(model, val_loader)
    f1 = f1_score(true_labels, predicted_labels, zero_division=1)
    precision = precision_score(true_labels, predicted_labels, zero_division=1)
    recall = recall_score(true_labels, predicted_labels, zero_division=1)
    accuracy = accuracy_score(true_labels, predicted_labels)
    conf_matrix = confusion_matrix(true_labels, predicted_labels)

    print(f"Epoch {epoch + 1}, Validation F1 Score: {f1}")
    print(f"Validation Precision: {precision}")
    print(f"Validation Recall: {recall}")
    print(f"Validation Accuracy: {accuracy}")
    print("Validation Confusion Matrix:\n", conf_matrix)


# 훈련 시간 측정 종료
end_time = time.time()
total_training_time = end_time - start_time # 훈련시간

# Final prediction
predicted_labels, true_labels = evaluate(model, test_loader)
f1 = f1_score(true_labels, predicted_labels, zero_division=1)
precision = precision_score(true_labels, predicted_labels, zero_division=1)
recall = recall_score(true_labels, predicted_labels, zero_division=1)
accuracy = accuracy_score(true_labels, predicted_labels)
conf_matrix = confusion_matrix(true_labels, predicted_labels)

print("Final Test Results:")
print(f"Test F1 Score: {f1}")
print(f"Test Precision: {precision}")
print(f"Test Recall: {recall}")
print(f"Test Accuracy: {accuracy}")
print("Test Confusion Matrix:\n", conf_matrix)
print(f"Total Training Time: {total_training_time:.2f} seconds")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/5642 [00:00<?, ? examples/s]

Map:   0%|          | 0/2418 [00:00<?, ? examples/s]

Map:   0%|          | 0/3456 [00:00<?, ? examples/s]

  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 1, Loss: 0.08022137066460196
Epoch 1 Training Time: 281.75 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.37it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 1, Validation F1 Score: 0.7211815561959656
Validation Precision: 0.6388002552648373
Validation Recall: 0.8279569892473119
Validation Accuracy: 0.6799007444168734
Validation Confusion Matrix:
 [[ 643  566]
 [ 208 1001]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 2, Loss: 0.07427663303299599
Epoch 2 Training Time: 281.60 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.38it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 2, Validation F1 Score: 0.7114467408585056
Validation Precision: 0.6847742922723795
Validation Recall: 0.7402812241521919
Validation Accuracy: 0.6997518610421837
Validation Confusion Matrix:
 [[797 412]
 [314 895]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 3, Loss: 0.07333799529683489
Epoch 3 Training Time: 281.69 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.37it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 3, Validation F1 Score: 0.7322920450417726
Validation Precision: 0.6528497409326425
Validation Recall: 0.8337468982630273
Validation Accuracy: 0.69520264681555
Validation Confusion Matrix:
 [[ 673  536]
 [ 201 1008]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 4, Loss: 0.07131375874743583
Epoch 4 Training Time: 281.67 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.37it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 4, Validation F1 Score: 0.7405611361274679
Validation Precision: 0.6370679380214541
Validation Recall: 0.8842018196856907
Validation Accuracy: 0.6902398676592225
Validation Confusion Matrix:
 [[ 600  609]
 [ 140 1069]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 5, Loss: 0.07083960633102288
Epoch 5 Training Time: 281.60 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.37it/s]


Epoch 5, Validation F1 Score: 0.7393364928909952
Validation Precision: 0.6610169491525424
Validation Recall: 0.8387096774193549
Validation Accuracy: 0.7043010752688172
Validation Confusion Matrix:
 [[ 689  520]
 [ 195 1014]]


Evaluating: 100%|██████████| 432/432 [00:51<00:00,  8.39it/s]

Final Test Results:
Test F1 Score: 0.7377731529656606
Test Precision: 0.6701323251417769
Test Recall: 0.8206018518518519
Test Accuracy: 0.7083333333333334
Test Confusion Matrix:
 [[1030  698]
 [ 310 1418]]
Total Training Time: 1589.33 seconds





## longtext5

In [13]:
# Bert model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

train_loader, val_loader, test_loader = dataloader(df_train, df_valid, df_test, 'longtext')

# Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)
num_epochs = 5

# Learning rate scheduler
num_training_steps = num_epochs * len(train_loader)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
gradient_accumulation_steps = 8
seed = 0
set_seed(seed)


# 훈련 시간 측정 시작
start_time = time.time()

for epoch in range(num_epochs):
    epoch_start_time = time.time()
    train_loss = train(model, train_loader, optimizer, scheduler)
    epoch_end_time = time.time()

    print(f"Epoch {epoch + 1}, Loss: {train_loss}")
    print(f"Epoch {epoch + 1} Training Time: {epoch_end_time - epoch_start_time:.2f} seconds")

    # validation data 평가
    predicted_labels, true_labels = evaluate(model, val_loader)
    f1 = f1_score(true_labels, predicted_labels, zero_division=1)
    precision = precision_score(true_labels, predicted_labels, zero_division=1)
    recall = recall_score(true_labels, predicted_labels, zero_division=1)
    accuracy = accuracy_score(true_labels, predicted_labels)
    conf_matrix = confusion_matrix(true_labels, predicted_labels)

    print(f"Epoch {epoch + 1}, Validation F1 Score: {f1}")
    print(f"Validation Precision: {precision}")
    print(f"Validation Recall: {recall}")
    print(f"Validation Accuracy: {accuracy}")
    print("Validation Confusion Matrix:\n", conf_matrix)


# 훈련 시간 측정 종료
end_time = time.time()
total_training_time = end_time - start_time # 훈련시간

# Final prediction
predicted_labels, true_labels = evaluate(model, test_loader)
f1 = f1_score(true_labels, predicted_labels, zero_division=1)
precision = precision_score(true_labels, predicted_labels, zero_division=1)
recall = recall_score(true_labels, predicted_labels, zero_division=1)
accuracy = accuracy_score(true_labels, predicted_labels)
conf_matrix = confusion_matrix(true_labels, predicted_labels)

print("Final Test Results:")
print(f"Test F1 Score: {f1}")
print(f"Test Precision: {precision}")
print(f"Test Recall: {recall}")
print(f"Test Accuracy: {accuracy}")
print("Test Confusion Matrix:\n", conf_matrix)
print(f"Total Training Time: {total_training_time:.2f} seconds")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/5642 [00:00<?, ? examples/s]

Map:   0%|          | 0/2418 [00:00<?, ? examples/s]

Map:   0%|          | 0/3456 [00:00<?, ? examples/s]

  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 1, Loss: 0.08056996091567085
Epoch 1 Training Time: 281.67 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.38it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 1, Validation F1 Score: 0.7258123402701716
Validation Precision: 0.6496732026143791
Validation Recall: 0.8221670802315963
Validation Accuracy: 0.6894127377998346
Validation Confusion Matrix:
 [[673 536]
 [215 994]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 2, Loss: 0.0742372595217005
Epoch 2 Training Time: 281.67 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.38it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 2, Validation F1 Score: 0.7008750994431185
Validation Precision: 0.675095785440613
Validation Recall: 0.728701406120761
Validation Accuracy: 0.6889991728701406
Validation Confusion Matrix:
 [[785 424]
 [328 881]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 3, Loss: 0.07307075846296532
Epoch 3 Training Time: 281.68 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.37it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 3, Validation F1 Score: 0.738037865748709
Validation Precision: 0.6320754716981132
Validation Recall: 0.8866832092638545
Validation Accuracy: 0.6852770885028949
Validation Confusion Matrix:
 [[ 585  624]
 [ 137 1072]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 4, Loss: 0.07095441196863105
Epoch 4 Training Time: 281.80 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.37it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 4, Validation F1 Score: 0.7344064386317909
Validation Precision: 0.6175972927241963
Validation Recall: 0.9057071960297767
Validation Accuracy: 0.6724565756823822
Validation Confusion Matrix:
 [[ 531  678]
 [ 114 1095]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 5, Loss: 0.06992039329269452
Epoch 5 Training Time: 281.81 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.39it/s]


Epoch 5, Validation F1 Score: 0.7390988372093025
Validation Precision: 0.6591056383668179
Validation Recall: 0.8411910669975186
Validation Accuracy: 0.7030603804797353
Validation Confusion Matrix:
 [[ 683  526]
 [ 192 1017]]


Evaluating: 100%|██████████| 432/432 [00:51<00:00,  8.38it/s]

Final Test Results:
Test F1 Score: 0.7335406946604458
Test Precision: 0.6643192488262911
Test Recall: 0.8188657407407407
Test Accuracy: 0.7025462962962963
Test Confusion Matrix:
 [[1013  715]
 [ 313 1415]]
Total Training Time: 1589.56 seconds





## longlongtext5

In [14]:
# Bert model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

train_loader, val_loader, test_loader = dataloader(df_train, df_valid, df_test, 'longlongtext')

# Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)
num_epochs = 5

# Learning rate scheduler
num_training_steps = num_epochs * len(train_loader)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
gradient_accumulation_steps = 8
seed = 0
set_seed(seed)


# 훈련 시간 측정 시작
start_time = time.time()

for epoch in range(num_epochs):
    epoch_start_time = time.time()
    train_loss = train(model, train_loader, optimizer, scheduler)
    epoch_end_time = time.time()

    print(f"Epoch {epoch + 1}, Loss: {train_loss}")
    print(f"Epoch {epoch + 1} Training Time: {epoch_end_time - epoch_start_time:.2f} seconds")

    # validation data 평가
    predicted_labels, true_labels = evaluate(model, val_loader)
    f1 = f1_score(true_labels, predicted_labels, zero_division=1)
    precision = precision_score(true_labels, predicted_labels, zero_division=1)
    recall = recall_score(true_labels, predicted_labels, zero_division=1)
    accuracy = accuracy_score(true_labels, predicted_labels)
    conf_matrix = confusion_matrix(true_labels, predicted_labels)

    print(f"Epoch {epoch + 1}, Validation F1 Score: {f1}")
    print(f"Validation Precision: {precision}")
    print(f"Validation Recall: {recall}")
    print(f"Validation Accuracy: {accuracy}")
    print("Validation Confusion Matrix:\n", conf_matrix)


# 훈련 시간 측정 종료
end_time = time.time()
total_training_time = end_time - start_time # 훈련시간

# Final prediction
predicted_labels, true_labels = evaluate(model, test_loader)
f1 = f1_score(true_labels, predicted_labels, zero_division=1)
precision = precision_score(true_labels, predicted_labels, zero_division=1)
recall = recall_score(true_labels, predicted_labels, zero_division=1)
accuracy = accuracy_score(true_labels, predicted_labels)
conf_matrix = confusion_matrix(true_labels, predicted_labels)

print("Final Test Results:")
print(f"Test F1 Score: {f1}")
print(f"Test Precision: {precision}")
print(f"Test Recall: {recall}")
print(f"Test Accuracy: {accuracy}")
print("Test Confusion Matrix:\n", conf_matrix)
print(f"Total Training Time: {total_training_time:.2f} seconds")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/5642 [00:00<?, ? examples/s]

Map:   0%|          | 0/2418 [00:00<?, ? examples/s]

Map:   0%|          | 0/3456 [00:00<?, ? examples/s]

  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 1, Loss: 0.08068086607935746
Epoch 1 Training Time: 281.74 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.38it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 1, Validation F1 Score: 0.7117516629711751
Validation Precision: 0.6432865731462926
Validation Recall: 0.7965260545905707
Validation Accuracy: 0.6774193548387096
Validation Confusion Matrix:
 [[675 534]
 [246 963]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 2, Loss: 0.0750864031632291
Epoch 2 Training Time: 281.76 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.37it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 2, Validation F1 Score: 0.6921241050119331
Validation Precision: 0.6666666666666666
Validation Recall: 0.7196029776674938
Validation Accuracy: 0.6799007444168734
Validation Confusion Matrix:
 [[774 435]
 [339 870]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 3, Loss: 0.07395072294024503
Epoch 3 Training Time: 281.78 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.36it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 3, Validation F1 Score: 0.7339514978601998
Validation Precision: 0.645141065830721
Validation Recall: 0.8511166253101737
Validation Accuracy: 0.6914805624483044
Validation Confusion Matrix:
 [[ 643  566]
 [ 180 1029]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 4, Loss: 0.0715086487129795
Epoch 4 Training Time: 281.79 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.41it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 4, Validation F1 Score: 0.7352941176470588
Validation Precision: 0.6375227686703097
Validation Recall: 0.8684863523573201
Validation Accuracy: 0.6873449131513648
Validation Confusion Matrix:
 [[ 612  597]
 [ 159 1050]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [04:41<00:00,  2.51it/s]


Epoch 5, Loss: 0.07081844448705571
Epoch 5 Training Time: 281.53 seconds


Evaluating: 100%|██████████| 303/303 [00:36<00:00,  8.39it/s]


Epoch 5, Validation F1 Score: 0.7292048369366069
Validation Precision: 0.6546052631578947
Validation Recall: 0.8229942100909843
Validation Accuracy: 0.6943755169561621
Validation Confusion Matrix:
 [[684 525]
 [214 995]]


Evaluating: 100%|██████████| 432/432 [00:51<00:00,  8.39it/s]

Final Test Results:
Test F1 Score: 0.7313120752744381
Test Precision: 0.6668255481410867
Test Recall: 0.8096064814814815
Test Accuracy: 0.7025462962962963
Test Confusion Matrix:
 [[1029  699]
 [ 329 1399]]
Total Training Time: 1589.47 seconds





## base5

In [15]:
# Bert model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

train_loader, val_loader, test_loader = dataloader(df_train, df_valid, df_test, 'base')

# Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)
num_epochs = 5

# Learning rate scheduler
num_training_steps = num_epochs * len(train_loader)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
gradient_accumulation_steps = 8
seed = 0
set_seed(seed)


# 훈련 시간 측정 시작
start_time = time.time()

for epoch in range(num_epochs):
    epoch_start_time = time.time()
    train_loss = train(model, train_loader, optimizer, scheduler)
    epoch_end_time = time.time()

    print(f"Epoch {epoch + 1}, Loss: {train_loss}")
    print(f"Epoch {epoch + 1} Training Time: {epoch_end_time - epoch_start_time:.2f} seconds")

    # validation data 평가
    predicted_labels, true_labels = evaluate(model, val_loader)
    f1 = f1_score(true_labels, predicted_labels, zero_division=1)
    precision = precision_score(true_labels, predicted_labels, zero_division=1)
    recall = recall_score(true_labels, predicted_labels, zero_division=1)
    accuracy = accuracy_score(true_labels, predicted_labels)
    conf_matrix = confusion_matrix(true_labels, predicted_labels)

    print(f"Epoch {epoch + 1}, Validation F1 Score: {f1}")
    print(f"Validation Precision: {precision}")
    print(f"Validation Recall: {recall}")
    print(f"Validation Accuracy: {accuracy}")
    print("Validation Confusion Matrix:\n", conf_matrix)


# 훈련 시간 측정 종료
end_time = time.time()
total_training_time = end_time - start_time # 훈련시간

# Final prediction
predicted_labels, true_labels = evaluate(model, test_loader)
f1 = f1_score(true_labels, predicted_labels, zero_division=1)
precision = precision_score(true_labels, predicted_labels, zero_division=1)
recall = recall_score(true_labels, predicted_labels, zero_division=1)
accuracy = accuracy_score(true_labels, predicted_labels)
conf_matrix = confusion_matrix(true_labels, predicted_labels)

print("Final Test Results:")
print(f"Test F1 Score: {f1}")
print(f"Test Precision: {precision}")
print(f"Test Recall: {recall}")
print(f"Test Accuracy: {accuracy}")
print("Test Confusion Matrix:\n", conf_matrix)
print(f"Total Training Time: {total_training_time:.2f} seconds")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/5642 [00:00<?, ? examples/s]

Map:   0%|          | 0/2418 [00:00<?, ? examples/s]

Map:   0%|          | 0/3456 [00:00<?, ? examples/s]

  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [00:30<00:00, 23.07it/s]


Epoch 1, Loss: 0.08318476096091122
Epoch 1 Training Time: 30.61 seconds


Evaluating: 100%|██████████| 303/303 [00:04<00:00, 74.85it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 1, Validation F1 Score: 0.6820208023774145
Validation Precision: 0.6190155091031693
Validation Recall: 0.7593052109181141
Validation Accuracy: 0.6459884201819686
Validation Confusion Matrix:
 [[644 565]
 [291 918]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [00:30<00:00, 23.02it/s]


Epoch 2, Loss: 0.07960184032450993
Epoch 2 Training Time: 30.68 seconds


Evaluating: 100%|██████████| 303/303 [00:04<00:00, 75.37it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 2, Validation F1 Score: 0.7122736418511066
Validation Precision: 0.5989847715736041
Validation Recall: 0.8784119106699751
Validation Accuracy: 0.6451612903225806
Validation Confusion Matrix:
 [[ 498  711]
 [ 147 1062]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [00:30<00:00, 22.91it/s]


Epoch 3, Loss: 0.0788536058269209
Epoch 3 Training Time: 30.82 seconds


Evaluating: 100%|██████████| 303/303 [00:04<00:00, 74.53it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 3, Validation F1 Score: 0.7124492557510149
Validation Precision: 0.6027475672581568
Validation Recall: 0.8709677419354839
Validation Accuracy: 0.6484698097601324
Validation Confusion Matrix:
 [[ 515  694]
 [ 156 1053]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [00:30<00:00, 22.91it/s]


Epoch 4, Loss: 0.07846196447485905
Epoch 4 Training Time: 30.82 seconds


Evaluating: 100%|██████████| 303/303 [00:04<00:00, 75.74it/s]
  scaler = torch.cuda.amp.GradScaler()


Epoch 4, Validation F1 Score: 0.7142857142857143
Validation Precision: 0.5987688864017907
Validation Recall: 0.8850289495450786
Validation Accuracy: 0.6459884201819686
Validation Confusion Matrix:
 [[ 492  717]
 [ 139 1070]]


  with torch.cuda.amp.autocast():
Training: 100%|██████████| 706/706 [00:30<00:00, 22.95it/s]


Epoch 5, Loss: 0.07819248123817336
Epoch 5 Training Time: 30.77 seconds


Evaluating: 100%|██████████| 303/303 [00:04<00:00, 75.09it/s]


Epoch 5, Validation F1 Score: 0.7131886477462437
Validation Precision: 0.5979843225083986
Validation Recall: 0.8833746898263027
Validation Accuracy: 0.6447477253928867
Validation Confusion Matrix:
 [[ 491  718]
 [ 141 1068]]


Evaluating: 100%|██████████| 432/432 [00:05<00:00, 75.35it/s]

Final Test Results:
Test F1 Score: 0.7113772455089821
Test Precision: 0.6068655496526358
Test Recall: 0.859375
Test Accuracy: 0.6513310185185185
Test Confusion Matrix:
 [[ 766  962]
 [ 243 1485]]
Total Training Time: 173.98 seconds



