In [4]:
!pip install -q transformers datasets accelerate bitsandbytes wandb peft trl

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.[0m[31m
[0m

In [2]:

import huggingface_hub
huggingface_hub.login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
import os
import time
import wandb
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM, SFTConfig
from peft import LoraConfig, get_peft_model, LoraConfig, TaskType

# wandb 초기화
wandb.init(project="Hanghae99_week8_advanced", name="gpt2_plain_experiment")

data_path = "corpus.json"

with open(data_path, "r", encoding="utf-8") as f:
    corpus = json.load(f)

dataset = Dataset.from_dict({
    "question": [item["question"] for item in corpus],
    "answer": [item["answer"] for item in corpus]
})

train_split = dataset.train_test_split(test_size=0.2, shuffle=True, seed=42)
train_dataset = train_split["train"]
eval_dataset = train_split["test"]

# ===== 모델과 토크나이저 로드 (경량화 기법 없이) =====
MODEL_NAME = "openai-community/gpt2"
print(f"Loading model {MODEL_NAME} normally (without lightweight techniques)...")
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.model_max_length = 1024

# 모델 파라미터 수 로깅
n_params = sum(p.numel() for p in model.parameters())
wandb.log({f"{MODEL_NAME}_num_params": n_params})
print(f"{MODEL_NAME} parameters: {n_params}")

# ===== 평가용 함수 정의 =====
def evaluate_inference_speed(model, tokenizer, prompt, num_runs=3):
    device = 0 if torch.cuda.is_available() else -1
    generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
    times = []
    for i in range(num_runs):
        start = time.time()
        _ = generator(prompt, max_length=100)
        elapsed = time.time() - start
        times.append(elapsed)
        print(f"Run {i+1}: {elapsed:.3f} sec")
    avg_time = sum(times) / len(times)
    return avg_time

# ===== 미세조정 전 평가 (Pre fine-tuning evaluation) =====
instruction = "How does the VIX (Volatility Index) influence trading decisions in the stock market"

print("\n[Pre fine-tuning evaluation]")

# 정량적 평가: 추론 속도 측정
avg_time = evaluate_inference_speed(model, tokenizer, instruction)
wandb.log({f"{MODEL_NAME}_avg_inference_time_pre": avg_time})
print(f"Avg Inference Time (pre): {avg_time:.3f} sec")

# 정성적 평가: 동일 instruction에 대한 응답 생성
device = 0 if torch.cuda.is_available() else -1
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
output = generator(instruction, max_length=250)
response_text = output[0]['generated_text']
wandb.log({f"{MODEL_NAME}_response_pre": response_text})
print(f"Response (pre):\n{response_text}")
print("-" * 60)

# ===== 미세조정(Fine-tuning) 준비 =====
def formatting_prompts_func(data):
    output_texts = []
    for i in range(len(data['question'])):
        text = f"### Question: {data['question'][i]}\n ### Answer: {data['answer'][i]}"
        output_texts.append(text)
    return output_texts

response_template = " ### Answer:"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

# ===== Fine-tuning 설정 및 실행 =====
training_config = SFTConfig(
    output_dir="./gpt2_2b_lt_output",
    save_total_limit=1,
    logging_steps=100,
    eval_steps=100,
    max_steps=1000,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    save_steps=100,
    save_strategy="steps",
    logging_strategy="steps",
    evaluation_strategy="steps",
)

trainer = SFTTrainer(
    model=model,
    args=training_config,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    formatting_func=formatting_prompts_func,
    data_collator=collator,
)

print(f"\nStarting fine-tuning for {MODEL_NAME} ...")
train_start_time = time.time()
trainer.train()
train_end_time = time.time()
total_training_time = (train_end_time - train_start_time) / 60
wandb.log({"train/total_training_time_min": total_training_time})
print(f"Fine-tuning for {MODEL_NAME} completed.")

# ===== 7. 미세조정 후 평가 (Post fine-tuning evaluation) =====
print("\n[Post fine-tuning evaluation]")

avg_time_post = evaluate_inference_speed(model, tokenizer, instruction)
wandb.log({f"{MODEL_NAME}_avg_inference_time_post": avg_time_post})
print(f"Avg Inference Time (post): {avg_time_post:.3f} sec")

# 정성 평가: 동일 instruction에 대한 응답 생성
output_post = generator(instruction, max_length=250)
response_text_post = output_post[0]['generated_text']
wandb.log({f"{MODEL_NAME}_response_post": response_text_post})
print(f"Response (post):\n{response_text_post}")
print("-" * 60)

# ===== 8. 마무리 =====
wandb.finish()


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcode-sugartoast[0m ([33mcode-sugartoast-sugartoast[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Loading model openai-community/gpt2 normally (without lightweight techniques)...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0


openai-community/gpt2 parameters: 124439808

[Pre fine-tuning evaluation]


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Run 1: 1.912 sec
Run 2: 0.818 sec


Device set to use cuda:0


Run 3: 0.817 sec
Avg Inference Time (pre): 1.182 sec
Response (pre):
How does the VIX (Volatility Index) influence trading decisions in the stock market? How does the VIX impact the trading of stocks and other investments in the stock market?

To develop, understand, and assess VIX fundamentals, we asked for participants from several fields, including:

Market researchers, trade analysts, financial analysts, retail investors, investment planners, retail investors, forecasters, and investment analysts

Market participants from other U.S. states, Canada, and U.K.

Other large U.S. cities and counties

Market participants from around the globe from over 5,000 research, business advisory, market analysis, and technical, scientific, and academic institutions in the United States, Canada and other countries.

Participants are asked to complete a VIX (vix) questionnaire or to provide their current financial details in our market research section.

In addition to the VIX question, participants



Applying formatting function to train dataset:   0%|          | 0/5732 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/5732 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/5732 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/5732 [00:00<?, ? examples/s]

Applying formatting function to eval dataset:   0%|          | 0/1433 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/1433 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/1433 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/1433 [00:00<?, ? examples/s]


Starting fine-tuning for openai-community/gpt2 ...




Step,Training Loss,Validation Loss
100,2.5954,2.272718
200,2.3665,2.161808
300,2.2549,2.093556
400,2.1971,2.04453
500,2.1748,2.013694
600,2.141,1.98466
700,2.1157,1.966273
800,2.0485,1.951562
900,2.0459,1.941659
1000,2.0971,1.93854


Device set to use cuda:0


Fine-tuning for openai-community/gpt2 completed.

[Post fine-tuning evaluation]
Run 1: 0.848 sec
Run 2: 0.841 sec
Run 3: 0.834 sec
Avg Inference Time (post): 0.841 sec
Response (post):
How does the VIX (Volatility Index) influence trading decisions in the stock market? During early trading volumes, investors can focus on asset performance, historical performance, and overall market conditions that predict the future price of VIX stocks. The VIX is driven by market optimism and sentiment, which can influence the trading decisions of trading traders.

VIX Traders often assess the impact of economic conditions, market sentiment, and economic forecasts. Factors such as GDP growth, the participation of large multinational corporations, and changing investor expectations can influence stock prices.

Historical Trends and Sentiment Factors

Historical trends and sentiment factors are crucial for trading decisions as they predict market trends, market sentiment—and consumer opinions—based on f

0,1
eval/loss,█▆▄▃▃▂▂▁▁▁
eval/mean_token_accuracy,▁▃▅▆▆▇▇███
eval/runtime,▃▁▁▂▁█▂▃▃▁
eval/samples_per_second,▆██▇█▁▇▆▆█
eval/steps_per_second,▆██▇█▁▇▆▆█
openai-community/gpt2_avg_inference_time_post,▁
openai-community/gpt2_avg_inference_time_pre,▁
openai-community/gpt2_num_params,▁
train/epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██████

0,1
eval/loss,1.93854
eval/mean_token_accuracy,0.55998
eval/runtime,4.6288
eval/samples_per_second,309.581
eval/steps_per_second,77.557
openai-community/gpt2_avg_inference_time_post,0.84097
openai-community/gpt2_avg_inference_time_pre,1.18247
openai-community/gpt2_num_params,124439808
openai-community/gpt2_response_post,How does the VIX (Vo...
openai-community/gpt2_response_pre,How does the VIX (Vo...
