# Finetuning을 통해 분류 성능 올리기 - 데이터 학습하고 추론하기

In [1]:
from openai import OpenAI

In [2]:
client = OpenAI()

## Upload File

In [3]:
# 최소 10개 샘플 이상 필요
train_file = client.files.create(
  file=open("conv_sent_train.jsonl", "rb"),
  purpose="fine-tune"
)

In [4]:
train_file.id

'file-GeSKdjaiHQ7X6MpWPfTwPA'

In [5]:
valid_file = client.files.create(
  file=open("conv_sent_valid.jsonl", "rb"),
  purpose="fine-tune"
)

In [6]:
valid_file.id

'file-22UJQN3kPJ14Ubd2mfkXwJ'

## Finetuning

### Finetuning job 제출하기

In [7]:
job = client.fine_tuning.jobs.create(
  training_file=train_file.id,
  validation_file=valid_file.id, 
  model="gpt-3.5-turbo-1106",
  hyperparameters={
    "n_epochs": 3 # default: 3
  }
)

In [8]:
job.id

'ftjob-TWMZQDujEHH5wHGcB4uNlIQ0'

In [9]:
print("Job ID:", job.id)
print("Status:", job.status)

Job ID: ftjob-TWMZQDujEHH5wHGcB4uNlIQ0
Status: validating_files


### 현재 Finetuning 상태 가져오기


In [10]:
job = client.fine_tuning.jobs.retrieve(job.id)

In [11]:
job.dict()

/var/folders/1x/st3vh8xs6715dcgqc1gk2hhh0000gn/T/ipykernel_37197/127413622.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  job.dict()


{'id': 'ftjob-TWMZQDujEHH5wHGcB4uNlIQ0',
 'created_at': 1744703039,
 'error': {'code': None, 'message': None, 'param': None},
 'fine_tuned_model': None,
 'finished_at': None,
 'hyperparameters': {'batch_size': 'auto',
  'learning_rate_multiplier': 'auto',
  'n_epochs': 3},
 'model': 'gpt-3.5-turbo-1106',
 'object': 'fine_tuning.job',
 'organization_id': 'org-qzBJNqx2R9Pz1HmKd4Zh0Dmj',
 'result_files': [],
 'seed': 1770077225,
 'status': 'validating_files',
 'trained_tokens': None,
 'training_file': 'file-GeSKdjaiHQ7X6MpWPfTwPA',
 'validation_file': 'file-22UJQN3kPJ14Ubd2mfkXwJ',
 'estimated_finish': None,
 'integrations': [],
 'metadata': None,
 'method': {'dpo': None,
  'supervised': {'hyperparameters': {'batch_size': 'auto',
    'learning_rate_multiplier': 'auto',
    'n_epochs': 3}},
  'type': 'supervised'},
 'user_provided_suffix': None}

In [12]:
print("Job ID:", job.id)
print("Status:", job.status)

Job ID: ftjob-TWMZQDujEHH5wHGcB4uNlIQ0
Status: validating_files


### 학습 과정 확인

In [13]:
# List up to 10 events from a fine-tuning job
response = client.fine_tuning.jobs.list_events(fine_tuning_job_id=job.id, limit=10)
events = response.data
events.reverse()

for event in events:
    print(event.message)

Created fine-tuning job: ftjob-TWMZQDujEHH5wHGcB4uNlIQ0
Validating training file: file-GeSKdjaiHQ7X6MpWPfTwPA and validation file: file-22UJQN3kPJ14Ubd2mfkXwJ


## Finetuning된 모델 Inference하기

In [14]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser

In [15]:
infer_model = "ft:gpt-3.5-turbo-1106:personal::SOMETHING"

In [16]:
llm = ChatOpenAI(model=infer_model)

In [17]:
example_conv_text = """\
김민준: 안녕하세요, 처음 뵙겠습니다. 김민준이라고 해요.
이서연: 네, 안녕하세요. 이서연입니다. 
김민준: 요즘 어떻게 지내세요? 취미가 뭐에요?
이서연: 그냥 그래요. 별로 특별한 취미는 없고요, 여기 와서 할 이야기가 그게 전부인가요?
김민준: 아니, 그게... 좀 더 서로를 알아가는 과정이라 생각해서요.
이서연: 솔직히 말해서 여기 오는 것 자체가 별로였어요. 죄송하지만 저는 이만 가볼게요.
"""

In [18]:
sentiment_prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "다음 대화의 내용에 대해 '긍정' 또는 '부정'으로 감성 분석해줘."),
        ("human", "{input}" )
    ]
)

In [19]:
sentiment_chain = sentiment_prompt_template | llm | StrOutputParser()

In [20]:
sentiment_chain.invoke({"input": example_conv_text})

'부정'