In [1]:
# Install required libraries
!pip install -q -U torch transformers peft bitsandbytes accelerate pandas datasets trl

[0m

In [2]:
import os
import torch
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging
)
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer

# Set random seed
def seed_everything(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

  warn(


Using device: cuda


## 1. Configuration

In [3]:
CONF = {
    "model_name": "upstage/SOLAR-10.7B-Instruct-v1.0",
    "data_path": "./data/",
    "output_dir": "./results_solar",
    "lora_r": 8,
    "lora_alpha": 32,
    "lora_dropout": 0.05,
    "lr": 1e-4,
    "batch_size": 2,
    "grad_accum": 4,
    "epochs": 1,
    "max_seq_length": 1024,  # Adjust based on VRAM availability, 1024 is safe for 24GB with QLoRA
}

## 2. Data Preparation
Formatting data into SOLAR Instruct format:
```
### User:
{dialogue}

### Assistant:
{summary}
```

In [4]:
def load_data(data_path):
    train_df = pd.read_csv(os.path.join(data_path, 'train.csv'))
    dev_df = pd.read_csv(os.path.join(data_path, 'dev.csv'))
    return train_df, dev_df

def format_instruction(row):
    # SOLAR Instruct format
    prompt = f"### User:\nSummarize the following dialogue:\n\n{row['dialogue']}\n\n### Assistant:\n"
    if 'summary' in row:
        prompt += f"{row['summary']}"
    return prompt

train_df, dev_df = load_data(CONF['data_path'])

# Create datasets
train_dataset = Dataset.from_pandas(train_df)
dev_dataset = Dataset.from_pandas(dev_df)

# Apply formatting
train_dataset = train_dataset.map(lambda x: {'text': format_instruction(x)})
dev_dataset = dev_dataset.map(lambda x: {'text': format_instruction(x)})

print(f"Train samples: {len(train_dataset)}")
print(f"Sample text:\n{train_dataset[0]['text'][:500]}...")

Map:   0%|          | 0/12457 [00:00<?, ? examples/s]

Map:   0%|          | 0/499 [00:00<?, ? examples/s]

Train samples: 12457
Sample text:
### User:
Summarize the following dialogue:

#Person1#: 안녕하세요, Mr. Smith. 저는 Dr. Hawkins입니다. 오늘 무슨 일로 오셨어요? #Person2#: 건강검진을 받으려고 왔어요. #Person1#: 네, 5년 동안 검진을 안 받으셨네요. 매년 한 번씩 받으셔야 해요. #Person2#: 알죠. 특별히 아픈 데가 없으면 굳이 갈 필요가 없다고 생각했어요. #Person1#: 음, 심각한 질병을 피하려면 미리 발견하는 게 제일 좋거든요. 본인을 위해서라도 매년 한 번은 오세요. #Person2#: 알겠습니다. #Person1#: 여기 좀 볼까요. 눈과 귀는 괜찮으시네요. 깊게 숨 한 번 쉬어보세요. Mr. Smith, 담배 피우세요? #Person2#: 네. #Person1#: 담배가 폐암하고 심장병의 주된 원인인 거 아시죠? 끊으셔야 해요. #Person2#: 수백 번 시도했는데, 도저히 습관이 안 끊어져요. #Person...


## 3. Model & Tokenizer Loading (QLoRA)

In [5]:
# Quantization Config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

# Load Model
model = AutoModelForCausalLM.from_pretrained(
    CONF['model_name'],
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

model.config.use_cache = False
model.config.pretraining_tp = 1

# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(CONF['model_name'], trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

## 4. LoRA Configuration

In [6]:
peft_config = LoraConfig(
    lora_alpha=CONF['lora_alpha'],
    lora_dropout=CONF['lora_dropout'],
    r=CONF['lora_r'],
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] # Target all linear layers for better performance
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 31,457,280 || all params: 10,762,981,376 || trainable%: 0.2923


## 5. Training

In [7]:
from trl import SFTTrainer, SFTConfig
import trl
print(f"TRL Version: {trl.__version__}")

# Prepare arguments
training_args_dict = {
    "output_dir": CONF['output_dir'],
    "num_train_epochs": CONF['epochs'],
    "per_device_train_batch_size": CONF['batch_size'],
    "gradient_accumulation_steps": CONF['grad_accum'],
    "optim": "paged_adamw_32bit",
    "save_steps": 100,
    "logging_steps": 10,
    "learning_rate": CONF['lr'],
    "weight_decay": 0.001,
    "fp16": True,
    "bf16": False,
    "max_grad_norm": 0.3,
    "warmup_ratio": 0.03,
    "group_by_length": True,
    "lr_scheduler_type": "constant",
    "report_to": "none",
    "gradient_checkpointing": True,
}

# Handle SFTConfig/SFTTrainer version compatibility
try:
    # Try initializing SFTConfig with max_seq_length (Newer trl versions)
    training_args = SFTConfig(
        **training_args_dict,
        max_seq_length=CONF['max_seq_length'],
        packing=False
    )
    trainer_kwargs = {}
except TypeError:
    # Fallback: If SFTConfig rejects max_seq_length, try setting it as attribute or passing to Trainer
    print("SFTConfig rejected max_seq_length. Attempting fallback configuration...")
    training_args = SFTConfig(**training_args_dict)
    
    # Manually set attributes if they are missing from __init__ but expected in args
    training_args.max_seq_length = CONF['max_seq_length']
    training_args.packing = False
    
    trainer_kwargs = {}

# Using SFTTrainer
try:
    # Try initializing with processing_class (Newer trl versions renamed tokenizer to processing_class)
    trainer = SFTTrainer(
        model=model,
        train_dataset=train_dataset,
        peft_config=peft_config,
        formatting_func=lambda x: x['text'],
        processing_class=tokenizer,
        args=training_args,
        **trainer_kwargs
    )
except TypeError as e:
    if "unexpected keyword argument 'processing_class'" in str(e):
        # Fallback for older versions using tokenizer
        trainer = SFTTrainer(
            model=model,
            train_dataset=train_dataset,
            peft_config=peft_config,
            formatting_func=lambda x: x['text'],
            tokenizer=tokenizer,
            args=training_args,
            **trainer_kwargs
        )
    else:
        # If the error is 'tokenizer' unexpected (which is what happened), it means we should have used processing_class.
        # But we tried processing_class first in this block.
        # So if we are here, it means processing_class failed with something else, OR the first try raised something else.
        # Wait, if the user's code failed with 'tokenizer' unexpected, it means they were using the OLD code.
        # My NEW code tries processing_class first.
        raise e

print("Starting training...")
trainer.train()

trainer.model.save_pretrained(CONF['output_dir'])
print(f"Model saved to {CONF['output_dir']}")

TRL Version: 0.25.1
SFTConfig rejected max_seq_length. Attempting fallback configuration...




Applying formatting function to train dataset:   0%|          | 0/12457 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/12457 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/12457 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/12457 [00:00<?, ? examples/s]

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Starting training...


  return fn(*args, **kwargs)


Step,Training Loss
10,1.2876
20,0.9851
30,0.9471
40,0.8972
50,0.8324
60,0.9682
70,0.9037
80,0.8469
90,0.8266
100,0.7759


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Model saved to ./results_solar


## 6. Inference

In [8]:
from tqdm import tqdm

def generate_summary(model, tokenizer, dialogue, max_new_tokens=128):
    prompt = f"### User:\nSummarize the following dialogue:\n\n{dialogue}\n\n### Assistant:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False, # Greedy generation
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            repetition_penalty=1.2
        )
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Extract summary part
    try:
        summary = generated_text.split("### Assistant:\n")[1].strip()
    except:
        summary = generated_text
        
    return summary

# Load Test Data
test_df = pd.read_csv(os.path.join(CONF['data_path'], 'test.csv'))

print("Starting inference...")
summaries = []

# Inference Loop
model.eval()
for dialogue in tqdm(test_df['dialogue']):
    summary = generate_summary(model, tokenizer, dialogue)
    summaries.append(summary)

# Save Results
submission = pd.DataFrame({
    'fname': test_df['fname'],
    'summary': summaries
})

submission.to_csv("./prediction/submit_solar_qlora.csv", index=False)
print("Inference complete. Saved to ./prediction/submit_solar_qlora.csv")

Starting inference...


100%|██████████| 499/499 [3:44:48<00:00, 27.03s/it]  

Inference complete. Saved to ./prediction/submit_solar_qlora.csv





In [None]:
# Check sample output
print(submission.head())

    fname                                            summary
0  test_0  Mr. Smith는 Ms. Dawson에게 새로운 규정을 설명하고, 이 규정은 모든...
1  test_1  #Person2#는 출퇴근 시간에 교통체증으로 인한 불편함과 자신의 차가 환경에 부...
2  test_2  Masha와 Hero의 갑작스러운 이혼에 관한 소식과 양육권 및 재산 분할 상황에 ...
3  test_3     Brian의 생일 파티에서 #Person1#과 Brian은 함께 춤추며 대화합니다.
4  test_4            #Person1#과 #Person2#가 올림픽 공원을 걷며 설명합니다.


: 