# Finetuning API를 통해 감성 분석 학습해보기

In [1]:
from openai import OpenAI

In [2]:
client = OpenAI()

## Upload File

In [3]:
train_file = client.files.create(
  file=open("nsmc_train.jsonl", "rb"),
  purpose="fine-tune"
)

In [4]:
train_file

FileObject(id='file-WCctoMYTp5nY9DMhBZpgKF', bytes=27351, created_at=1744632282, filename='nsmc_train.jsonl', object='file', purpose='fine-tune', status='processed', expires_at=None, status_details=None)

In [5]:
train_file.dict()

/var/folders/1x/st3vh8xs6715dcgqc1gk2hhh0000gn/T/ipykernel_70467/2243376447.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  train_file.dict()


{'id': 'file-WCctoMYTp5nY9DMhBZpgKF',
 'bytes': 27351,
 'created_at': 1744632282,
 'filename': 'nsmc_train.jsonl',
 'object': 'file',
 'purpose': 'fine-tune',
 'status': 'processed',
 'expires_at': None,
 'status_details': None}

In [6]:
train_file.id

'file-WCctoMYTp5nY9DMhBZpgKF'

In [7]:
valid_file = client.files.create(
  file=open("nsmc_valid.jsonl", "rb"),
  purpose="fine-tune"
)

In [8]:
valid_file

FileObject(id='file-8idbvccPD3Lb1JXG7phjfY', bytes=8338, created_at=1744632288, filename='nsmc_valid.jsonl', object='file', purpose='fine-tune', status='processed', expires_at=None, status_details=None)

In [9]:
valid_file.dict()

/var/folders/1x/st3vh8xs6715dcgqc1gk2hhh0000gn/T/ipykernel_70467/267935741.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  valid_file.dict()


{'id': 'file-8idbvccPD3Lb1JXG7phjfY',
 'bytes': 8338,
 'created_at': 1744632288,
 'filename': 'nsmc_valid.jsonl',
 'object': 'file',
 'purpose': 'fine-tune',
 'status': 'processed',
 'expires_at': None,
 'status_details': None}

In [10]:
valid_file.id

'file-8idbvccPD3Lb1JXG7phjfY'

## Finetuning

### Finetuning job 제출하기

In [11]:
job = client.fine_tuning.jobs.create(
  training_file=train_file.id,
  validation_file=valid_file.id, 
  model="gpt-3.5-turbo-1106",
  hyperparameters={
    "n_epochs": 1 # default: 3
  }
)

In [12]:
job

FineTuningJob(id='ftjob-zRnS7Vg3c06VvhCq6BEakRiW', created_at=1744632296, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=1), model='gpt-3.5-turbo-1106', object='fine_tuning.job', organization_id='org-qzBJNqx2R9Pz1HmKd4Zh0Dmj', result_files=[], seed=845720967, status='validating_files', trained_tokens=None, training_file='file-WCctoMYTp5nY9DMhBZpgKF', validation_file='file-8idbvccPD3Lb1JXG7phjfY', estimated_finish=None, integrations=[], metadata=None, method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=1)), type='supervised'), user_provided_suffix=None)

In [13]:
job.dict()

/var/folders/1x/st3vh8xs6715dcgqc1gk2hhh0000gn/T/ipykernel_70467/127413622.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  job.dict()


{'id': 'ftjob-zRnS7Vg3c06VvhCq6BEakRiW',
 'created_at': 1744632296,
 'error': {'code': None, 'message': None, 'param': None},
 'fine_tuned_model': None,
 'finished_at': None,
 'hyperparameters': {'batch_size': 'auto',
  'learning_rate_multiplier': 'auto',
  'n_epochs': 1},
 'model': 'gpt-3.5-turbo-1106',
 'object': 'fine_tuning.job',
 'organization_id': 'org-qzBJNqx2R9Pz1HmKd4Zh0Dmj',
 'result_files': [],
 'seed': 845720967,
 'status': 'validating_files',
 'trained_tokens': None,
 'training_file': 'file-WCctoMYTp5nY9DMhBZpgKF',
 'validation_file': 'file-8idbvccPD3Lb1JXG7phjfY',
 'estimated_finish': None,
 'integrations': [],
 'metadata': None,
 'method': {'dpo': None,
  'supervised': {'hyperparameters': {'batch_size': 'auto',
    'learning_rate_multiplier': 'auto',
    'n_epochs': 1}},
  'type': 'supervised'},
 'user_provided_suffix': None}

In [14]:
job.id

'ftjob-zRnS7Vg3c06VvhCq6BEakRiW'

In [15]:
print("Job ID:", job.id)
print("Status:", job.status)

Job ID: ftjob-zRnS7Vg3c06VvhCq6BEakRiW
Status: validating_files


### Finetuning job들 가져오기

In [16]:
resp = client.fine_tuning.jobs.list(limit=10)

In [17]:
resp.dict()

/var/folders/1x/st3vh8xs6715dcgqc1gk2hhh0000gn/T/ipykernel_70467/3483486788.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  resp.dict()


{'data': [{'id': 'ftjob-zRnS7Vg3c06VvhCq6BEakRiW',
   'created_at': 1744632296,
   'error': {'code': None, 'message': None, 'param': None},
   'fine_tuned_model': None,
   'finished_at': None,
   'hyperparameters': {'batch_size': 'auto',
    'learning_rate_multiplier': 'auto',
    'n_epochs': 1},
   'model': 'gpt-3.5-turbo-1106',
   'object': 'fine_tuning.job',
   'organization_id': 'org-qzBJNqx2R9Pz1HmKd4Zh0Dmj',
   'result_files': [],
   'seed': 845720967,
   'status': 'validating_files',
   'trained_tokens': None,
   'training_file': 'file-WCctoMYTp5nY9DMhBZpgKF',
   'validation_file': 'file-8idbvccPD3Lb1JXG7phjfY',
   'estimated_finish': None,
   'integrations': [],
   'metadata': None,
   'method': {'dpo': None,
    'supervised': {'hyperparameters': {'batch_size': 'auto',
      'learning_rate_multiplier': 'auto',
      'n_epochs': 1}},
    'type': 'supervised'},
   'user_provided_suffix': None}],
 'has_more': False,
 'object': 'list'}

### 현재 Finetuning 상태 가져오기


In [18]:
job = client.fine_tuning.jobs.retrieve(job.id)

In [19]:
job.dict()

/var/folders/1x/st3vh8xs6715dcgqc1gk2hhh0000gn/T/ipykernel_70467/127413622.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  job.dict()


{'id': 'ftjob-zRnS7Vg3c06VvhCq6BEakRiW',
 'created_at': 1744632296,
 'error': {'code': None, 'message': None, 'param': None},
 'fine_tuned_model': None,
 'finished_at': None,
 'hyperparameters': {'batch_size': 'auto',
  'learning_rate_multiplier': 'auto',
  'n_epochs': 1},
 'model': 'gpt-3.5-turbo-1106',
 'object': 'fine_tuning.job',
 'organization_id': 'org-qzBJNqx2R9Pz1HmKd4Zh0Dmj',
 'result_files': [],
 'seed': 845720967,
 'status': 'validating_files',
 'trained_tokens': None,
 'training_file': 'file-WCctoMYTp5nY9DMhBZpgKF',
 'validation_file': 'file-8idbvccPD3Lb1JXG7phjfY',
 'estimated_finish': None,
 'integrations': [],
 'metadata': None,
 'method': {'dpo': None,
  'supervised': {'hyperparameters': {'batch_size': 'auto',
    'learning_rate_multiplier': 'auto',
    'n_epochs': 1}},
  'type': 'supervised'},
 'user_provided_suffix': None}

In [20]:
print("Job ID:", job.id)
print("Status:", job.status)

Job ID: ftjob-zRnS7Vg3c06VvhCq6BEakRiW
Status: validating_files


### Finetuning job 캔슬하기

In [None]:
# client.fine_tuning.jobs.cancel(job.id)

### 학습 과정 확인하기

In [21]:
# List up to 10 events from a fine-tuning job
response = client.fine_tuning.jobs.list_events(fine_tuning_job_id=job.id, limit=10)
events = response.data
events.reverse()

for event in events:
    print(event.message)

Created fine-tuning job: ftjob-zRnS7Vg3c06VvhCq6BEakRiW
Validating training file: file-WCctoMYTp5nY9DMhBZpgKF and validation file: file-8idbvccPD3Lb1JXG7phjfY


### Finetuning 모델 삭제하기

In [22]:
# Delete a fine-tuned model (must be an owner of the org the model was created in)
client.models.delete("ft:gpt-3.5-turbo:acemeco:suffix:abc123")

NotFoundError: Error code: 404 - {'error': {'message': "The model 'ft:gpt-3.5-turbo:acemeco:suffix:abc123' does not exist", 'type': 'invalid_request_error', 'param': 'model', 'code': 'model_not_found'}}