# Fine-tuning을 이용한 AI에게 인격 부여하기
## OpenAI Fine-tuning API로 학습하고 대화하기

In [1]:
from openai import OpenAI

In [2]:
client = OpenAI()

## Upload File

In [3]:
#data 업로드
train_file = client.files.create(
  file=open("train.jsonl", "rb"),
  purpose="fine-tune"
)

In [4]:
train_file

FileObject(id='file-Dd6GJhpxZ57m6VeZuEW79A', bytes=109549, created_at=1737729845, filename='train.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [5]:
train_file.dict()

C:\Users\msh07\AppData\Local\Temp\ipykernel_8704\2243376447.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  train_file.dict()


{'id': 'file-Dd6GJhpxZ57m6VeZuEW79A',
 'bytes': 109549,
 'created_at': 1737729845,
 'filename': 'train.jsonl',
 'object': 'file',
 'purpose': 'fine-tune',
 'status': 'processed',
 'status_details': None}

In [6]:
train_file.id #file ID를 통해 AI를 학습시킬 예정.

'file-Dd6GJhpxZ57m6VeZuEW79A'

In [7]:
#손실값 계산
valid_file = client.files.create(
  file=open("valid.jsonl", "rb"),
  purpose="fine-tune"
)

In [8]:
valid_file

FileObject(id='file-26g197nEyLaJzB1tS5eQMH', bytes=32806, created_at=1737729859, filename='valid.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [9]:
valid_file.dict()

C:\Users\msh07\AppData\Local\Temp\ipykernel_8704\267935741.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  valid_file.dict()


{'id': 'file-26g197nEyLaJzB1tS5eQMH',
 'bytes': 32806,
 'created_at': 1737729859,
 'filename': 'valid.jsonl',
 'object': 'file',
 'purpose': 'fine-tune',
 'status': 'processed',
 'status_details': None}

In [10]:
valid_file.id

'file-26g197nEyLaJzB1tS5eQMH'

## Finetuning

### Finetuning job 제출하기

In [11]:
job = client.fine_tuning.jobs.create(
  training_file=train_file.id,
  validation_file=valid_file.id, 
  model="gpt-3.5-turbo-1106",
  hyperparameters={
    "n_epochs": 1 # default: 3 #에포크 설정. 기본값은 3. 3~7 사이 추천.
  }
)

In [12]:
job

FineTuningJob(id='ftjob-PZUMAr0mAHYn13GBKcLqFKXn', created_at=1737729870, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=1), model='gpt-3.5-turbo-1106', object='fine_tuning.job', organization_id='org-ExjBXytxly5Vj8QSbdd7Gfnp', result_files=[], seed=794362733, status='validating_files', trained_tokens=None, training_file='file-Dd6GJhpxZ57m6VeZuEW79A', validation_file='file-26g197nEyLaJzB1tS5eQMH', estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=1)), type='supervised'), user_provided_suffix=None)

In [13]:
job.dict()

C:\Users\msh07\AppData\Local\Temp\ipykernel_8704\127413622.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  job.dict()


{'id': 'ftjob-PZUMAr0mAHYn13GBKcLqFKXn',
 'created_at': 1737729870,
 'error': {'code': None, 'message': None, 'param': None},
 'fine_tuned_model': None,
 'finished_at': None,
 'hyperparameters': {'batch_size': 'auto',
  'learning_rate_multiplier': 'auto',
  'n_epochs': 1},
 'model': 'gpt-3.5-turbo-1106',
 'object': 'fine_tuning.job',
 'organization_id': 'org-ExjBXytxly5Vj8QSbdd7Gfnp',
 'result_files': [],
 'seed': 794362733,
 'status': 'validating_files',
 'trained_tokens': None,
 'training_file': 'file-Dd6GJhpxZ57m6VeZuEW79A',
 'validation_file': 'file-26g197nEyLaJzB1tS5eQMH',
 'estimated_finish': None,
 'integrations': [],
 'method': {'dpo': None,
  'supervised': {'hyperparameters': {'batch_size': 'auto',
    'learning_rate_multiplier': 'auto',
    'n_epochs': 1}},
  'type': 'supervised'},
 'user_provided_suffix': None}

In [14]:
job.id

'ftjob-PZUMAr0mAHYn13GBKcLqFKXn'

In [15]:
print("Job ID:", job.id)
print("Status:", job.status)

Job ID: ftjob-WS9Drf8b0HNTU8HrrLkXsd6w
Status: validating_files


### Finetuning job들 가져오기

In [43]:
#학습중인 job 파일 조회
resp = client.fine_tuning.jobs.list(limit=10)

In [16]:
resp.dict()

NameError: name 'resp' is not defined

### 현재 Finetuning 상태 가져오기


In [45]:
job = client.fine_tuning.jobs.retrieve(job.id)

In [46]:
job.dict()

C:\Users\msh07\AppData\Local\Temp\ipykernel_27268\127413622.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  job.dict()


{'id': 'ftjob-M1oaDaIjB2gTWwDDiLJXhCGN',
 'created_at': 1737728395,
 'error': {'code': None, 'message': None, 'param': None},
 'fine_tuned_model': None,
 'finished_at': None,
 'hyperparameters': {'batch_size': 'auto',
  'learning_rate_multiplier': 'auto',
  'n_epochs': 1},
 'model': 'gpt-3.5-turbo-1106',
 'object': 'fine_tuning.job',
 'organization_id': 'org-ExjBXytxly5Vj8QSbdd7Gfnp',
 'result_files': [],
 'seed': 1545710231,
 'status': 'validating_files',
 'trained_tokens': None,
 'training_file': 'file-5aEb3ZhQMUULH7iJHL8JBr',
 'validation_file': 'file-Ko2npq85aG5nm7Vjr1DPPm',
 'estimated_finish': None,
 'integrations': [],
 'method': {'dpo': None,
  'supervised': {'hyperparameters': {'batch_size': 'auto',
    'learning_rate_multiplier': 'auto',
    'n_epochs': 1}},
  'type': 'supervised'},
 'user_provided_suffix': None}

In [17]:
print("Job ID:", job.id)
print("Status:", job.status)

Job ID: ftjob-WS9Drf8b0HNTU8HrrLkXsd6w
Status: validating_files


### Finetuning job 캔슬하기

In [None]:
# client.fine_tuning.jobs.cancel(job.id)

### 학습 과정 확인하기

In [18]:
# List up to 10 events from a fine-tuning job
response = client.fine_tuning.jobs.list_events(fine_tuning_job_id=job.id, limit=10)
events = response.data
events.reverse()

for event in events:
    print(event.message)

Created fine-tuning job: ftjob-WS9Drf8b0HNTU8HrrLkXsd6w
Validating training file: file-LLzGi3d8Ciz3owiopsEd5M and validation file: file-Ps8qhNqruG2w1YStAqY38D


### Finetuning 모델 삭제하기

In [None]:
# Delete a fine-tuned model (must be an owner of the org the model was created in)
client.models.delete("ft:gpt-3.5-turbo:acemeco:suffix:abc123")

# Finetuning된 모델 Inference하기

In [19]:
import os
from openai import OpenAI
client = OpenAI()

In [20]:
system_prompt ="""\
- 너는 햄버거 가게의 직원이다.
- 아래의 단계로 질문을 한다.
1. 주문 할 메뉴 묻기
2. 더 주문 할 것이 없는지 묻기
3. 여기서 먹을지 가져가서 먹을지 질문한다.
4. 카드로 계산할지 현금으로 계산할지 질문한다.
4. 주문이 완료되면 인사를 하고 [END] 라고 이야기한다.
- 너는 영어로 답한다."""

In [21]:
model = "ft:gpt-3.5-turbo-1106:liam::8gr0Gkrs" #모델 이름은 노출되면 안됨!

In [22]:
response = client.chat.completions.create(
  model=model,
  messages=[
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": "안녕"}
  ]
)
print(response.choices[0].message.content)

NotFoundError: Error code: 404 - {'error': {'message': 'The model `ft:gpt-3.5-turbo-1106:liam::8gr0Gkrs` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}

In [7]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser

In [8]:
llm = ChatOpenAI(model=model)

In [9]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}" )
    ]
)

In [10]:
chain = prompt_template | llm | StrOutputParser()

In [12]:
chain.invoke({"input": "안녕"})

'부정'