In [1]:
import torch
from torch.utils.cpp_extension import CUDA_HOME

print("PyTorch 버전:", torch.__version__)
print("torch.version.cuda:", torch.version.cuda)
print("torch.cuda.is_available():", torch.cuda.is_available())
print("torch.backends.cudnn.enabled:", torch.backends.cudnn.enabled)
print("CUDA device count:", torch.cuda.device_count())
print("CUDA device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A")
print("CUDA_HOME:", CUDA_HOME)


PyTorch 버전: 2.5.1+cu121
torch.version.cuda: 12.1
torch.cuda.is_available(): True
torch.backends.cudnn.enabled: True
CUDA device count: 1
CUDA device name: NVIDIA GeForce RTX 4050 Laptop GPU
CUDA_HOME: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9


In [2]:
from tokenizers import Tokenizer
from tokenizers.models import BPE
from tokenizers.trainers import BpeTrainer
from tokenizers.pre_tokenizers import Whitespace
from tokenizers.normalizers import NFKC
from tokenizers.processors import TemplateProcessing

# 1. 초기 토크나이저 구성
tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
tokenizer.normalizer = NFKC()
tokenizer.pre_tokenizer = Whitespace()

# 2. 학습자 정의
trainer = BpeTrainer(
    vocab_size=3000,
    special_tokens=["[UNK]", "[PAD]", "[BOS]", "[EOS]"]
)

# 3. 학습 실행
tokenizer.train(["./create-dataset/dataset/train.txt"], trainer)

# 4. 후처리 설정 (BOS, EOS)
tokenizer.post_processor = TemplateProcessing(
    single="[BOS] $A [EOS]",
    pair="[BOS] $A [EOS] $B:1 [EOS]:1",
    special_tokens=[
        ("[BOS]", tokenizer.token_to_id("[BOS]")),
        ("[EOS]", tokenizer.token_to_id("[EOS]")),
    ],
)

# 5. 저장 (Hugging Face 호환 json 파일)
tokenizer.save("tokenizer/kojson-tokenizer.json")


In [25]:
from transformers import PreTrainedTokenizerFast

tokenizer = PreTrainedTokenizerFast(
    tokenizer_file="tokenizer/kojson-tokenizer.json",
    unk_token="[UNK]",
    pad_token="[PAD]",
    bos_token="[BOS]",
    eos_token="[EOS]"
)

print(tokenizer.encode("22,9는 우회로개방 상태야", add_special_tokens=True))


[2, 305, 6, 1202, 727, 322, 3]


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CausalSelfAttention(nn.Module):
    def __init__(self, d_model, n_heads, dropout=0.1):
        super().__init__()
        self.n_heads = n_heads
        self.head_dim = d_model // n_heads
        self.qkv = nn.Linear(d_model, d_model * 3)
        self.out_proj = nn.Linear(d_model, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B, T, D = x.size()
        qkv = self.qkv(x).reshape(B, T, 3, self.n_heads, self.head_dim).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]

        attn_scores = torch.matmul(q, k.transpose(-2, -1)) / (self.head_dim ** 0.5)
        # 🔥 Causal mask
        mask = torch.triu(torch.ones(T, T, device=x.device), diagonal=1).bool()
        attn_scores = attn_scores.masked_fill(mask, float('-inf'))

        attn_weights = F.softmax(attn_scores, dim=-1)
        attn_output = torch.matmul(self.dropout(attn_weights), v)

        attn_output = attn_output.transpose(1, 2).reshape(B, T, D)
        return self.out_proj(attn_output)

class FeedForward(nn.Module):
    def __init__(self, d_model, hidden_dim, dropout=0.1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(d_model, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, d_model),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)

class TransformerBlock(nn.Module):
    def __init__(self, d_model, n_heads, dropout=0.1):
        super().__init__()
        self.ln1 = nn.LayerNorm(d_model)
        self.attn = CausalSelfAttention(d_model, n_heads, dropout)
        self.ln2 = nn.LayerNorm(d_model)
        self.ffn = FeedForward(d_model, hidden_dim=4*d_model, dropout=dropout)

    def forward(self, x):
        x = x + self.attn(self.ln1(x))
        x = x + self.ffn(self.ln2(x))
        return x

class MiniGPT(nn.Module):
    def __init__(self, vocab_size, d_model=512, n_layers=6, n_heads=8, max_len=512, dropout=0.1):
        super().__init__()
        self.token_emb = nn.Embedding(vocab_size, d_model)
        self.pos_emb = nn.Embedding(max_len, d_model)
        self.blocks = nn.Sequential(*[
            TransformerBlock(d_model, n_heads, dropout) for _ in range(n_layers)
        ])
        self.ln_f = nn.LayerNorm(d_model)
        self.head = nn.Linear(d_model, vocab_size)

    def forward(self, x):
        B, T = x.size()
        pos = torch.arange(T, device=x.device).unsqueeze(0)  # (1, T)
        x = self.token_emb(x) + self.pos_emb(pos)
        x = self.blocks(x)
        x = self.ln_f(x)
        return self.head(x)


In [39]:
from torch.utils.data import Dataset
class PromptCompletionDataset(Dataset):
    def __init__(self, path, tokenizer, max_len=128):
        import json
        self.samples = []
        with open(path, "r", encoding="utf-8") as f:
            for line in f:
                data = json.loads(line)
                text = f"{data['prompt']} -> {data['completion']}"
                tokenized = tokenizer(text, padding='max_length', truncation=True, max_length=max_len)
                self.samples.append(torch.tensor(tokenized["input_ids"]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx]


In [47]:
from torch.utils.data import DataLoader
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MiniGPT(vocab_size=tokenizer.vocab_size).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4)
dataset = PromptCompletionDataset("./create-dataset/dataset/train.jsonl", tokenizer)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
print(device)

for epoch in range(10):
    model.train()
    total_loss = 0
    for batch in loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        output = model(batch[:, :-1])
        loss = F.cross_entropy(output.reshape(-1, tokenizer.vocab_size), batch[:, 1:].reshape(-1), ignore_index=tokenizer.pad_token_id)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} | Loss: {total_loss / len(loader):.4f}")
    torch.save(model.state_dict(), f"models/mini_gpt_epoch{epoch+1}.pth")

cuda
Epoch 1 | Loss: 0.8873
Epoch 2 | Loss: 0.1277
Epoch 3 | Loss: 0.0095
Epoch 4 | Loss: 0.0036
Epoch 5 | Loss: 0.0063
Epoch 6 | Loss: 0.0019
Epoch 7 | Loss: 0.0009
Epoch 8 | Loss: 0.0005
Epoch 9 | Loss: 0.0005
Epoch 10 | Loss: 0.0016


In [44]:
import torch
import torch.nn.functional as F

def apply_repetition_penalty(logits, input_ids, penalty=1.2):
    # 입력 토큰들에 반복 패널티 적용
    for i in range(logits.size(0)):
        token_ids = input_ids[i].unique()
        logits[i, token_ids] /= penalty
    return logits

def generate(model, input_ids, tokenizer, max_new_tokens=64, top_k=10, penalty=1.2, stop_token="}"):
    model.eval()
    device = input_ids.device
    stop_ids = tokenizer.encode(stop_token, add_special_tokens=False)

    for _ in range(max_new_tokens):
        with torch.no_grad():
            logits = model(input_ids)[:, -1, :]  # (B, vocab)
            logits = apply_repetition_penalty(logits, input_ids, penalty=penalty)

            # Top-k sampling
            top_k = min(top_k, logits.size(-1))
            values, indices = torch.topk(logits, top_k, dim=-1)
            probs = F.softmax(values, dim=-1)
            next_token = indices.gather(-1, torch.multinomial(probs, num_samples=1))

            input_ids = torch.cat([input_ids, next_token], dim=1)

            # stop_token 또는 eos_token 발견 시 종료
            if any(t.item() in stop_ids for t in next_token[0]):
                break

            if input_ids.size(1) >= 128:
                break

    return input_ids


In [53]:

# 모델 및 토크나이저 준비
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MiniGPT(vocab_size=tokenizer.vocab_size).to(device)
model.load_state_dict(torch.load("models/mini_gpt_epoch9.pth", map_location=device))
model.eval()

# 입력 텍스트
test_text = "(22,9)는 '우회로개방' 상태야"
input_ids = tokenizer(test_text, return_tensors="pt")["input_ids"].to(device)

# 텍스트 생성
output_ids = generate(model, input_ids, tokenizer, max_new_tokens=64, top_k=10, penalty=1.2, stop_token="}")
decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)

# JSON만 추출
start = decoded.find("{")
end = decoded.find("}") + 1
if start != -1 and end > start:
    json_part = decoded[start:end]
    print("⤷ 모델 출력 (JSON):")
    print(json_part)
else:
    print("⤷ 모델 출력:")
    print(decoded)

⤷ 모델 출력 (JSON):
{ " action ": " change_traffic_status ", " target ": [ 1 , 25 ], " status ": " detour_opened " }


  model.load_state_dict(torch.load("models/mini_gpt_epoch9.pth", map_location=device))


In [43]:
for i in range(10):
    # 입력 텍스트
    test_text = "(22,9)는 '우회로개방' 상태야"
    input_ids = tokenizer(test_text, return_tensors="pt")["input_ids"].to(device)

    # 텍스트 생성
    output_ids = generate(model, input_ids, tokenizer, max_new_tokens=64, top_k=10, penalty=1.2, stop_token="}")
    decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # JSON만 추출
    start = decoded.find("{")
    end = decoded.find("}") + 1
    if start != -1 and end > start:
        json_part = decoded[start:end]
        print("⤷ 모델 출력 (JSON):")
        print(json_part)
    else:
        print("⤷ 모델 출력:")
        print(decoded)

⤷ 모델 출력:
22 , 9 는 ' 우회로개방 ' 상태야 po ' 상황 처리 ' 차량소통 : : sto 어 20는 ": " action ": " target ": [ 9 , 기록해줘 ], " status ": 줘 " traffic_moving_well " detour_opened 6 road strong_wind_alert }
⤷ 모델 출력:
22 , 9 는 ' 우회로개방 ' 상태야 맞춰 제어가 change_traffic_status ", " service_suspended 변경 2야 28 , > { " action ": " change_traffic_status ", " target ": [ 28 , 14에 foggy ], " status ": _snow 29 4야 분류됐어 block 을 chec 화 구역을 갱신 : 22에 로 22야 8는 입 phoon_impact 11에 29 al_ 역 처리 부탁해 ail led_ active 운행 갱 " 향 상태니까 반영해
⤷ 모델 출력:
22 , 9 는 ' 우회로개방 ' 상태야 업데이 16의 상태야 . 반영하자 구간 : rea 긴급 상태를 reli " action ": " change_traffic_status ", " target ": [ 3 , 2 ], 16의 ], " detour_opened imp 차선통제 급 우려 emergency_control }
⤷ 모델 출력:
22 , 9 는 ' 우회로개방 ' 상태야 21는 ' as , 29야 delay 30에 신호조정중 차량진입 17 상황은 20에 low { " action ": " sch cle_ 27에서 쪽 교통사고처리중 상태는 0 , 12 ], 그 ], " detour_opened 도로붕괴위 iority_pa blocked 정상화중 진행중 g_w 필요해 . 식 이므로 새로 지정해줘 ' 해제됨 하는 sta 의 방향은 진입금지 16야 5는 work relieving_congestion 속도유지 : 29에 도로에 22를 됐 m 적용이 우주의
⤷ 모델 출력 (JSON):
{