In [28]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
from pytorch_lightning import seed_everything


# 한글 자연어 처리 데이터셋
# from Korpora import Korpora

# 토크나이저 관련 경고 무시하기 위하여 설정
os.environ["TOKENIZERS_PARALLELISM"] = 'true'

# device 지정
# device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
device = torch.device('cuda')
print(f'사용 디바이스: {device}')

사용 디바이스: cuda


In [29]:
num_seed = 42
seed_everything(num_seed, workers=True)

df = pd.read_csv("/home/son/ml/hanyang/datasets/final_data.csv")

Seed set to 42


In [30]:
df

Unnamed: 0,text,label1
0,활동지에 보면은 스텝1 질문이 나와 있는데 혹시 다들 확인하셨을까요?,0
1,그러면은 확인 아직 못하신분 없는 지 다 확인하셨네요.\n그러면 지금부터 스텝 1의...,0
2,이제 먼저 본 자료에 따르면 김한양 씨가 지금 20분 정도 듣고 있는 수업을 운영을...,1
3,이렇게 있는 상황인데 일단은 이 상황이 문제가 될 수밖에 없었던 이유에는 일단은 개...,2
4,제가 이어서 해보겠습니다. 저도 이번 문제 상황을 보면서 분명 A1님처럼 비슷한 생...,2
...,...,...
3761,아 얘를... 이미지를 LMS 거라도 넣을까요?,0
3762,좋아요,8
3763,이론적 근거…,0
3764,교안을 참고했다고 하면…,2


In [31]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.3, random_state=num_seed, stratify=df['label1'])
# val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=config["seed"], stratify=temp_df['label1'])  # val 15%, test 15%

In [32]:
train_df

Unnamed: 0,text,label1
1456,사실 자료에 대한 부분이 목적이나 기대 효과 적은 거죠?,4
2091,또 생각해 보면 평가 같은 것도 목표랑 안 맞는 평가가 이루어질 수도 있잖아요.\n...,2
2330,교수학습 이론으로서의 개별화 학습을 생각을 하고 있었는데 당연히 에듀테크와 접목된....,11
66,그러면은 제가 지금까지 얘기들을 한번 종합을 해보면요.\n일단은 전체적으로 학습자의...,0
604,저희가 얘기했던 게 목표를 수정을 해야 되냐 말아야 되냐,5
...,...,...
2522,그런 의미에서의 수업 전중후으로 전개되는 건 어떤가 저희가 플립러닝을 취하는 만큼 ...,2
1628,네 이 정도면 돼 네 뭔가 제가 지금 어느 정도,8
944,그러니까 단위로 피드백하는 것도 어느 정도 이제 업무 부담을 줄이기 위한 개별화 학...,8
1325,먼저 시작해 주시면 좋을 것 같습니다. 먼저 좀 진행을,0


In [33]:
CHECKPOINT_NAME = 'kykim/bert-kor-base'

In [34]:
import torch
from transformers import BertTokenizerFast
from torch.utils.data import Dataset, DataLoader


class TokenDataset(Dataset):
  
    def __init__(self, dataframe, tokenizer_pretrained):
        # sentence, label 컬럼으로 구성된 데이터프레임 전달
        self.data = dataframe        
        # Huggingface 토크나이저 생성
        self.tokenizer = BertTokenizerFast.from_pretrained(tokenizer_pretrained)
  
    def __len__(self):
        return len(self.data)
  
    def __getitem__(self, idx):
        sentence = self.data.iloc[idx]['text']
        label = self.data.iloc[idx]['label1']

        # 토큰화 처리
        tokens = self.tokenizer(
            sentence,                # 1개 문장 
            return_tensors='pt',     # 텐서로 반환
            truncation=True,         # 잘라내기 적용
            padding='max_length',    # 패딩 적용
            add_special_tokens=True  # 스페셜 토큰 적용
        )

        input_ids = tokens['input_ids'].squeeze(0)           # 2D -> 1D
        attention_mask = tokens['attention_mask'].squeeze(0) # 2D -> 1D
        token_type_ids = torch.zeros_like(attention_mask)

        # input_ids, attention_mask, token_type_ids 이렇게 3가지 요소를 반환하도록 합니다.
        # input_ids: 토큰
        # attention_mask: 실제 단어가 존재하면 1, 패딩이면 0 (패딩은 0이 아닐 수 있습니다)
        # token_type_ids: 문장을 구분하는 id. 단일 문장인 경우에는 전부 0
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask, 
            'token_type_ids': token_type_ids,
        }, torch.tensor(label)
    

In [35]:
# 토크나이저 지정
tokenizer_pretrained = CHECKPOINT_NAME

# train, test 데이터셋 생성
train_data = TokenDataset(train_df, tokenizer_pretrained)
test_data = TokenDataset(test_df, tokenizer_pretrained)

# DataLoader로 이전에 생성한 Dataset를 지정하여, batch 구성, shuffle, num_workers 등을 설정합니다.
train_loader = DataLoader(train_data, batch_size=8, shuffle=True, num_workers=8)
test_loader = DataLoader(test_data, batch_size=8, shuffle=False, num_workers=8)

In [36]:
train_df

Unnamed: 0,text,label1
1456,사실 자료에 대한 부분이 목적이나 기대 효과 적은 거죠?,4
2091,또 생각해 보면 평가 같은 것도 목표랑 안 맞는 평가가 이루어질 수도 있잖아요.\n...,2
2330,교수학습 이론으로서의 개별화 학습을 생각을 하고 있었는데 당연히 에듀테크와 접목된....,11
66,그러면은 제가 지금까지 얘기들을 한번 종합을 해보면요.\n일단은 전체적으로 학습자의...,0
604,저희가 얘기했던 게 목표를 수정을 해야 되냐 말아야 되냐,5
...,...,...
2522,그런 의미에서의 수업 전중후으로 전개되는 건 어떤가 저희가 플립러닝을 취하는 만큼 ...,2
1628,네 이 정도면 돼 네 뭔가 제가 지금 어느 정도,8
944,그러니까 단위로 피드백하는 것도 어느 정도 이제 업무 부담을 줄이기 위한 개별화 학...,8
1325,먼저 시작해 주시면 좋을 것 같습니다. 먼저 좀 진행을,0


In [37]:
# 1개의 batch 꺼내기
inputs, labels = next(iter(train_loader))

# 데이터셋을 device 설정
inputs = {k: v.to(device) for k, v in inputs.items()}
labels.to(device)

tensor([ 0,  0,  4,  8,  5, 11,  0,  9], device='cuda:0')

In [38]:
inputs.keys()

dict_keys(['input_ids', 'attention_mask', 'token_type_ids'])

In [39]:
# key 별 shape 확인
inputs['input_ids'].shape, inputs['attention_mask'].shape, inputs['token_type_ids'].shape

(torch.Size([8, 512]), torch.Size([8, 512]), torch.Size([8, 512]))

In [40]:
from transformers import BertConfig

config = BertConfig.from_pretrained(CHECKPOINT_NAME)

In [41]:
from transformers import BertModel

# 모델 생성
model_bert = BertModel.from_pretrained(CHECKPOINT_NAME).to(device)

In [42]:
output = model_bert(**inputs)
output.keys()

odict_keys(['last_hidden_state', 'pooler_output'])

In [43]:
output['last_hidden_state'].shape, output['pooler_output'].shape

(torch.Size([8, 512, 768]), torch.Size([8, 768]))

In [44]:
# last_hidden_state 출력
last_hidden_state = output['last_hidden_state']
print(last_hidden_state.shape)
print(last_hidden_state[:, 0, :])

torch.Size([8, 512, 768])
tensor([[-5.5146e-01, -5.6928e-01, -2.4631e+00,  ..., -4.3594e-01,
         -7.0296e-01, -6.7802e-01],
        [ 3.6580e-02,  2.0433e-01,  6.0096e-01,  ..., -3.3282e-01,
          4.6959e-01,  5.1616e-01],
        [ 6.1061e-03, -1.5451e-01,  1.1625e+00,  ...,  3.1391e-01,
         -6.1737e-03,  6.8747e-01],
        ...,
        [ 3.0435e-02,  8.4460e-01, -2.4790e+00,  ...,  3.5709e-01,
         -5.0359e-01, -2.1547e-01],
        [ 1.5444e-01,  3.7095e-02, -1.8522e+00,  ...,  4.8192e-01,
         -4.2783e-02, -2.2267e-01],
        [-1.6724e-01, -1.7641e-01,  7.6846e-01,  ...,  1.0581e-01,
         -2.9410e-01, -7.2378e-04]], device='cuda:0', grad_fn=<SliceBackward0>)


In [45]:
# pooler_output 출력
pooler_output = output['pooler_output']
print(pooler_output.shape)
print(pooler_output)

torch.Size([8, 768])
tensor([[ 0.4619, -0.1234, -0.9953,  ..., -0.8513,  0.7774,  0.3498],
        [-0.1993,  0.0541, -0.9373,  ..., -0.7232,  0.2479,  0.7904],
        [-0.9171,  0.2437, -0.8902,  ...,  0.0717, -0.4788,  0.5831],
        ...,
        [ 0.2636,  0.5777, -0.1181,  ..., -0.9775,  0.4298,  0.6413],
        [-0.1838,  0.5343,  0.9997,  ..., -0.9611,  0.3863,  0.2239],
        [-0.5162,  0.2542, -0.5982,  ..., -0.3634,  0.1225,  0.8352]],
       device='cuda:0', grad_fn=<TanhBackward0>)


In [46]:
fc = nn.Linear(768, 10)
fc.to(device)
fc_output = fc(last_hidden_state[:, 0, :])
print(fc_output.shape)
print(fc_output.argmax(dim=1))

torch.Size([8, 10])
tensor([0, 7, 7, 9, 9, 0, 9, 7], device='cuda:0')


In [47]:
class CustomBertModel(nn.Module):
    def __init__(self, bert_pretrained, dropout_rate=0.5):
        # 부모클래스 초기화
        super(CustomBertModel, self).__init__()
        # 사전학습 모델 지정
        self.bert = BertModel.from_pretrained(bert_pretrained)
        # dropout 설정
        self.dr = nn.Dropout(p=dropout_rate)
        # 최종 출력층 정의
        self.fc = nn.Linear(768, 10)
    
    def forward(self, input_ids, attention_mask, token_type_ids):
        # 입력을 pre-trained bert model 로 대입
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        # 결과의 last_hidden_state 가져옴
        last_hidden_state = output['last_hidden_state']
        # last_hidden_state[:, 0, :]는 [CLS] 토큰을 가져옴
        x = self.dr(last_hidden_state[:, 0, :])
        # FC 을 거쳐 최종 출력
        x = self.fc(x)
        return x

In [48]:
bert = CustomBertModel(CHECKPOINT_NAME)
bert.to(device)

CustomBertModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(42000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwi

In [49]:
# loss 정의: CrossEntropyLoss
loss_fn = nn.CrossEntropyLoss()

# 옵티마이저 정의: bert.paramters()와 learning_rate 설정
optimizer = optim.Adam(bert.parameters(), lr=1e-5)

In [50]:
from tqdm import tqdm
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

def model_train(model, data_loader, loss_fn, optimizer, device):
    model.train()
    
    running_loss = 0
    corr = 0
    counts = 0
    
    prograss_bar = tqdm(data_loader, unit='batch', total=len(data_loader), mininterval=1)
    
    # mini-batch 학습을 시작합니다.
    for idx, (inputs, labels) in enumerate(prograss_bar):
        inputs = {k:v.to(device) for k, v in inputs.items()}
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        output = model(**inputs)
        
        loss = loss_fn(output, labels)
        
        loss.backward()
        
        optimizer.step()
        
        _, pred = output.max(dim=1)

        corr += pred.eq(labels).sum().item()
        counts += len(labels)
        
        running_loss += loss.item() * labels.size(0)
        
        prograss_bar.set_description(f"training loss: {running_loss/(idx+1):.5f}, training accuracy: {corr / counts:.5f}")
        
    acc = corr / len(data_loader.dataset)
    
    return running_loss / len(data_loader.dataset), acc

In [51]:
def model_evaluate(model, data_loader, loss_fn, device):
    model.eval()
    
    with torch.no_grad():
        corr = 0
        running_loss = 0
        
        for inputs, labels in data_loader:
            inputs = {k:v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)
            
            output = model(**inputs)
            
            _, pred = output.max(dim=1)

            corr += torch.sum(pred.eq(labels)).item()
            
            running_loss += loss_fn(output, labels).item() * labels.size(0)
        
        acc = corr / len(data_loader.dataset)
        
        return running_loss / len(data_loader.dataset), acc

In [52]:
num_epochs = 10

model_name = 'bert-kor-base'

min_loss = np.inf

for epoch in range(num_epochs):
    train_loss, train_acc = model_train(bert, train_loader, loss_fn, optimizer, device)

    val_loss, val_acc = model_evaluate(bert, test_loader, loss_fn, device)   
    
    if val_loss < min_loss:
        print(f'[INFO] val_loss has been improved from {min_loss:.5f} to {val_loss:.5f}. Saving Model!')
        min_loss = val_loss
        torch.save(bert.state_dict(), f'{model_name}.pth')
    
    print(f'epoch {epoch+1:02d}, loss: {train_loss:.5f}, acc: {train_acc:.5f}, val_loss: {val_loss:.5f}, val_accuracy: {val_acc:.5f}')

training loss: 20.38883, training accuracy: 0.12500:   0%|          | 1/330 [00:01<06:33,  1.20s/batch]../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [3,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [7,0,0] Assertion `t >= 0 && t < n_classes` failed.
training loss: 20.38883, training accuracy: 0.12500:   0%|          | 1/330 [00:01<10:30,  1.92s/batch]


RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [53]:
# 통합
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
from pytorch_lightning import seed_everything


# 한글 자연어 처리 데이터셋
# from Korpora import Korpora

# 토크나이저 관련 경고 무시하기 위하여 설정
os.environ["TOKENIZERS_PARALLELISM"] = 'true'

# device 지정
# device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
device = torch.device('cuda')
print(f'사용 디바이스: {device}')

num_seed = 42
seed_everything(num_seed, workers=True)

df = pd.read_csv("/home/son/ml/hanyang/datasets/final_data.csv")

from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.3, random_state=num_seed, stratify=df['label1'])
# val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=config["seed"], stratify=temp_df['label1'])  # val 15%, test 15%

CHECKPOINT_NAME = 'kykim/bert-kor-base'

import torch
from transformers import BertTokenizerFast
from torch.utils.data import Dataset, DataLoader


class TokenDataset(Dataset):
  
    def __init__(self, dataframe, tokenizer_pretrained):
        # sentence, label 컬럼으로 구성된 데이터프레임 전달
        self.data = dataframe        
        # Huggingface 토크나이저 생성
        self.tokenizer = BertTokenizerFast.from_pretrained(tokenizer_pretrained)
  
    def __len__(self):
        return len(self.data)
  
    def __getitem__(self, idx):
        sentence = self.data.iloc[idx]['text']
        label = self.data.iloc[idx]['label1']

        # 토큰화 처리
        tokens = self.tokenizer(
            sentence,                # 1개 문장 
            return_tensors='pt',     # 텐서로 반환
            truncation=True,         # 잘라내기 적용
            padding='max_length',    # 패딩 적용
            add_special_tokens=True  # 스페셜 토큰 적용
        )

        input_ids = tokens['input_ids'].squeeze(0)           # 2D -> 1D
        attention_mask = tokens['attention_mask'].squeeze(0) # 2D -> 1D
        token_type_ids = torch.zeros_like(attention_mask)

        # input_ids, attention_mask, token_type_ids 이렇게 3가지 요소를 반환하도록 합니다.
        # input_ids: 토큰
        # attention_mask: 실제 단어가 존재하면 1, 패딩이면 0 (패딩은 0이 아닐 수 있습니다)
        # token_type_ids: 문장을 구분하는 id. 단일 문장인 경우에는 전부 0
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask, 
            'token_type_ids': token_type_ids,
        }, torch.tensor(label)
    
# 토크나이저 지정
tokenizer_pretrained = CHECKPOINT_NAME

# train, test 데이터셋 생성
train_data = TokenDataset(train_df, tokenizer_pretrained)
test_data = TokenDataset(test_df, tokenizer_pretrained)

# DataLoader로 이전에 생성한 Dataset를 지정하여, batch 구성, shuffle, num_workers 등을 설정합니다.
train_loader = DataLoader(train_data, batch_size=8, shuffle=True, num_workers=8)
test_loader = DataLoader(test_data, batch_size=8, shuffle=False, num_workers=8)

# 1개의 batch 꺼내기
inputs, labels = next(iter(train_loader))

# 데이터셋을 device 설정
inputs = {k: v.to(device) for k, v in inputs.items()}
labels.to(device)

from transformers import BertConfig

config = BertConfig.from_pretrained(CHECKPOINT_NAME)

from transformers import BertModel

# 모델 생성
model_bert = BertModel.from_pretrained(CHECKPOINT_NAME).to(device)

output = model_bert(**inputs)
output.keys()

# last_hidden_state 출력
last_hidden_state = output['last_hidden_state']

pooler_output = output['pooler_output']
fc = nn.Linear(768, 10)
fc.to(device)
fc_output = fc(last_hidden_state[:, 0, :]) 

class CustomBertModel(nn.Module):
    def __init__(self, bert_pretrained, dropout_rate=0.5):
        # 부모클래스 초기화
        super(CustomBertModel, self).__init__()
        # 사전학습 모델 지정
        self.bert = BertModel.from_pretrained(bert_pretrained)
        # dropout 설정
        self.dr = nn.Dropout(p=dropout_rate)
        # 최종 출력층 정의
        self.fc = nn.Linear(768, 10)
    
    def forward(self, input_ids, attention_mask, token_type_ids):
        # 입력을 pre-trained bert model 로 대입
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        # 결과의 last_hidden_state 가져옴
        last_hidden_state = output['last_hidden_state']
        # last_hidden_state[:, 0, :]는 [CLS] 토큰을 가져옴
        x = self.dr(last_hidden_state[:, 0, :])
        # FC 을 거쳐 최종 출력
        x = self.fc(x)
        return x

bert = CustomBertModel(CHECKPOINT_NAME)
bert.to(device)


loss_fn = nn.CrossEntropyLoss()


optimizer = optim.Adam(bert.parameters(), lr=1e-5)

from tqdm import tqdm
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

def model_train(model, data_loader, loss_fn, optimizer, device):
    model.train()
    
    running_loss = 0
    corr = 0
    counts = 0
    
    prograss_bar = tqdm(data_loader, unit='batch', total=len(data_loader), mininterval=1)
    
    # mini-batch 학습을 시작합니다.
    for idx, (inputs, labels) in enumerate(prograss_bar):
        inputs = {k:v.to(device) for k, v in inputs.items()}
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        output = model(**inputs)
        
        loss = loss_fn(output, labels)
        
        loss.backward()
        
        optimizer.step()
        
        _, pred = output.max(dim=1)

        corr += pred.eq(labels).sum().item()
        counts += len(labels)
        
        running_loss += loss.item() * labels.size(0)
        
        prograss_bar.set_description(f"training loss: {running_loss/(idx+1):.5f}, training accuracy: {corr / counts:.5f}")
        
    acc = corr / len(data_loader.dataset)
    
    return running_loss / len(data_loader.dataset), acc

def model_evaluate(model, data_loader, loss_fn, device):
    model.eval()
    
    with torch.no_grad():
        corr = 0
        running_loss = 0
        
        for inputs, labels in data_loader:
            inputs = {k:v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)
            
            output = model(**inputs)
            
            _, pred = output.max(dim=1)

            corr += torch.sum(pred.eq(labels)).item()
            
            running_loss += loss_fn(output, labels).item() * labels.size(0)
        
        acc = corr / len(data_loader.dataset)
        
        return running_loss / len(data_loader.dataset), acc

num_epochs = 10

model_name = 'bert-kor-base'

min_loss = np.inf

for epoch in range(num_epochs):
    train_loss, train_acc = model_train(bert, train_loader, loss_fn, optimizer, device)

    val_loss, val_acc = model_evaluate(bert, test_loader, loss_fn, device)   
    
    if val_loss < min_loss:
        print(f'[INFO] val_loss has been improved from {min_loss:.5f} to {val_loss:.5f}. Saving Model!')
        min_loss = val_loss
        torch.save(bert.state_dict(), f'{model_name}.pth')
    
    print(f'epoch {epoch+1:02d}, loss: {train_loss:.5f}, acc: {train_acc:.5f}, val_loss: {val_loss:.5f}, val_accuracy: {val_acc:.5f}')

Seed set to 42


사용 디바이스: cuda


RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [77]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from pytorch_lightning import LightningModule, Trainer
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizerFast, BertModel
from sklearn.model_selection import train_test_split

num_seed = 42
device = torch.device("cpu")

# Manually set the seed for CPU operations
torch.manual_seed(num_seed)
np.random.seed(num_seed)
random.seed(num_seed)

# 데이터 로딩 및 전처리
df = pd.read_csv("/home/son/ml/hanyang/datasets/final_data.csv")
df['label1'] = df['label1'].astype(int)

print(df['label1'].unique())
print(df['label1'].min(), df['label1'].max())

train_df, test_df = train_test_split(df, test_size=0.3, random_state=num_seed, stratify=df['label1'])

CHECKPOINT_NAME = 'kykim/bert-kor-base'

class TokenDataset(Dataset):
    def __init__(self, dataframe, tokenizer_pretrained):
        self.data = dataframe        
        self.tokenizer = BertTokenizerFast.from_pretrained(tokenizer_pretrained)
  
    def __len__(self):
        return len(self.data)
  
    def __getitem__(self, idx):
        sentence = self.data.iloc[idx]['text']
        label = self.data.iloc[idx]['label1']
        tokens = self.tokenizer(
            sentence, return_tensors='pt', truncation=True, padding='max_length', add_special_tokens=True
        )
        input_ids = tokens['input_ids'].squeeze(0)
        attention_mask = tokens['attention_mask'].squeeze(0)
        token_type_ids = torch.zeros_like(attention_mask)
        return {'input_ids': input_ids, 'attention_mask': attention_mask, 'token_type_ids': token_type_ids}, torch.tensor(label)

# train, test 데이터셋 생성
train_data = TokenDataset(train_df, CHECKPOINT_NAME)
test_data = TokenDataset(test_df, CHECKPOINT_NAME)

train_loader = DataLoader(train_data, batch_size=8, shuffle=True, num_workers=4)
test_loader = DataLoader(test_data, batch_size=8, shuffle=False, num_workers=4)

# Lightning 모듈 정의
class BertLightningModel(LightningModule):
    def __init__(self, bert_pretrained, num_labels=12, lr=1e-5):
        super(BertLightningModel, self).__init__()
        self.save_hyperparameters()
        self.bert = BertModel.from_pretrained(bert_pretrained)
        self.fc = nn.Linear(768, num_labels)
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, input_ids, attention_mask, token_type_ids):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        last_hidden_state = output['last_hidden_state']
        cls_output = last_hidden_state[:, 0, :]
        logits = self.fc(cls_output)
        return logits
    
    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(**inputs)
        print("Outputs shape:", outputs.shape)
        print("Labels shape:", labels.shape)
        loss = self.loss_fn(outputs, labels)
        return loss


    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(**inputs)
        loss = self.loss_fn(outputs, labels)
        _, preds = torch.max(outputs, dim=1)
        acc = torch.sum(preds == labels).item() / len(labels)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.hparams.lr)
        return optimizer

bert_model = BertLightningModel(CHECKPOINT_NAME)
trainer = Trainer(max_epochs=10, gpus=0)
trainer.fit(bert_model, train_loader, test_loader)


RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [68]:
print(df['label1'].unique())
print(df['label1'].min(), df['label1'].max())

[ 0  1  2  3  4  5  6  7  8  9 10 11]
0 11
