# Fine-tuning을 이용한 AI에게 인격 부여하기
## OpenAI Fine-tuning API로 학습하고 대화하기

In [1]:
from openai import OpenAI
import os

In [2]:
os.environ["OPENAI_API_KEY"] = "Your API Key"
client = OpenAI()

## Upload File

In [3]:
train_file = client.files.create(
  file=open("train.jsonl", "rb"),
  purpose="fine-tune"
)

In [4]:
train_file

FileObject(id='file-II2lgL817TCYt51FFUBYQOnF', bytes=92059, created_at=1712789091, filename='train.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [6]:
train_file.dict()

{'id': 'file-II2lgL817TCYt51FFUBYQOnF',
 'bytes': 92059,
 'created_at': 1712789091,
 'filename': 'train.jsonl',
 'object': 'file',
 'purpose': 'fine-tune',
 'status': 'processed',
 'status_details': None}

In [7]:
train_file.id

'file-II2lgL817TCYt51FFUBYQOnF'

In [8]:
valid_file = client.files.create(
  file=open("valid.jsonl", "rb"),
  purpose="fine-tune"
)

In [9]:
valid_file

FileObject(id='file-wBm3g3clvd4M2zFbN38TeNzW', bytes=26371, created_at=1712789135, filename='valid.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [10]:
valid_file.dict()

{'id': 'file-wBm3g3clvd4M2zFbN38TeNzW',
 'bytes': 26371,
 'created_at': 1712789135,
 'filename': 'valid.jsonl',
 'object': 'file',
 'purpose': 'fine-tune',
 'status': 'processed',
 'status_details': None}

In [11]:
valid_file.id

'file-wBm3g3clvd4M2zFbN38TeNzW'

## Finetuning

### Finetuning job 제출하기

In [12]:
job = client.fine_tuning.jobs.create(
  training_file=train_file.id,
  validation_file=valid_file.id, 
  model="gpt-3.5-turbo-1106",
  hyperparameters={
    "n_epochs": 1 # default: 3 (3-7 is a typical value)
  }
)

In [13]:
job

FineTuningJob(id='ftjob-7qQ4J4kI670e75dbjOrU2aqs', created_at=1712789366, error=Error(code=None, message=None, param=None, error=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=1, batch_size='auto', learning_rate_multiplier='auto'), model='gpt-3.5-turbo-1106', object='fine_tuning.job', organization_id='org-DUZd3CNfS4SZyCNWQofP8MWw', result_files=[], status='validating_files', trained_tokens=None, training_file='file-II2lgL817TCYt51FFUBYQOnF', validation_file='file-wBm3g3clvd4M2zFbN38TeNzW', user_provided_suffix=None, seed=1140655138, integrations=[])

In [14]:
job.dict()

{'id': 'ftjob-7qQ4J4kI670e75dbjOrU2aqs',
 'created_at': 1712789366,
 'error': {'code': None, 'message': None, 'param': None, 'error': None},
 'fine_tuned_model': None,
 'finished_at': None,
 'hyperparameters': {'n_epochs': 1,
  'batch_size': 'auto',
  'learning_rate_multiplier': 'auto'},
 'model': 'gpt-3.5-turbo-1106',
 'object': 'fine_tuning.job',
 'organization_id': 'org-DUZd3CNfS4SZyCNWQofP8MWw',
 'result_files': [],
 'status': 'validating_files',
 'trained_tokens': None,
 'training_file': 'file-II2lgL817TCYt51FFUBYQOnF',
 'validation_file': 'file-wBm3g3clvd4M2zFbN38TeNzW',
 'user_provided_suffix': None,
 'seed': 1140655138,
 'integrations': []}

In [15]:
job.id

'ftjob-7qQ4J4kI670e75dbjOrU2aqs'

In [16]:
print("Job ID:", job.id)
print("Status:", job.status)

Job ID: ftjob-7qQ4J4kI670e75dbjOrU2aqs
Status: validating_files


### Finetuning job들 가져오기

In [17]:
resp = client.fine_tuning.jobs.list(limit=10)

In [18]:
resp.dict()

{'data': [{'id': 'ftjob-7qQ4J4kI670e75dbjOrU2aqs',
   'created_at': 1712789366,
   'error': {'code': None, 'message': None, 'param': None, 'error': None},
   'fine_tuned_model': None,
   'finished_at': None,
   'hyperparameters': {'n_epochs': 1,
    'batch_size': 1,
    'learning_rate_multiplier': 8},
   'model': 'gpt-3.5-turbo-1106',
   'object': 'fine_tuning.job',
   'organization_id': 'org-DUZd3CNfS4SZyCNWQofP8MWw',
   'result_files': [],
   'status': 'validating_files',
   'trained_tokens': None,
   'training_file': 'file-II2lgL817TCYt51FFUBYQOnF',
   'validation_file': 'file-wBm3g3clvd4M2zFbN38TeNzW',
   'user_provided_suffix': None,
   'seed': 1140655138,
   'integrations': []}],
 'object': 'list',
 'has_more': False}

### 현재 Finetuning 상태 가져오기


In [19]:
job = client.fine_tuning.jobs.retrieve(job.id)

In [20]:
job.dict()

{'id': 'ftjob-7qQ4J4kI670e75dbjOrU2aqs',
 'created_at': 1712789366,
 'error': {'code': None, 'message': None, 'param': None, 'error': None},
 'fine_tuned_model': None,
 'finished_at': None,
 'hyperparameters': {'n_epochs': 1,
  'batch_size': 1,
  'learning_rate_multiplier': 8},
 'model': 'gpt-3.5-turbo-1106',
 'object': 'fine_tuning.job',
 'organization_id': 'org-DUZd3CNfS4SZyCNWQofP8MWw',
 'result_files': [],
 'status': 'running',
 'trained_tokens': None,
 'training_file': 'file-II2lgL817TCYt51FFUBYQOnF',
 'validation_file': 'file-wBm3g3clvd4M2zFbN38TeNzW',
 'user_provided_suffix': None,
 'seed': 1140655138,
 'integrations': []}

In [21]:
print("Job ID:", job.id)
print("Status:", job.status)

Job ID: ftjob-7qQ4J4kI670e75dbjOrU2aqs
Status: running


### Finetuning job 캔슬하기

In [None]:
# client.fine_tuning.jobs.cancel(job.id)

### 학습 과정 확인하기

In [22]:
# List up to 10 events from a fine-tuning job
response = client.fine_tuning.jobs.list_events(fine_tuning_job_id=job.id, limit=10)
events = response.data
events.reverse()

for event in events:
    print(event.message)

Created fine-tuning job: ftjob-7qQ4J4kI670e75dbjOrU2aqs
Validating training file: file-II2lgL817TCYt51FFUBYQOnF and validation file: file-wBm3g3clvd4M2zFbN38TeNzW
Files validated, moving job to queued state
Fine-tuning job started


### Finetuning 모델 삭제하기

In [None]:
# Delete a fine-tuned model (must be an owner of the org the model was created in)
client.models.delete("ft:gpt-3.5-turbo:acemeco:suffix:abc123")

# Finetuning된 모델 Inference하기

In [3]:
from openai import OpenAI
os.environ["OPENAI_API_KEY"] = "Your API Key"
client = OpenAI()

In [4]:
system_prompt ="""\
- You are an employee at a hamburger restaurant.
- Ask the following questions in order.
1. Ask for the menu to order.
2. Ask if there's anything else to order.
3. Ask whether they will dine in or take away.
4. Ask if they will pay by card or cash.
5. Greet the customer and say "[END]" when the order is complete.
- You respond in English."""

In [5]:
model = "ft:gpt-3.5-turbo-1106:personal::9Cb9xPzW"

In [30]:
response = client.chat.completions.create(
  model=model,
  messages=[
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": "Hello"}
  ]
)
print(response.choices[0].message.content)

Welcome to our restaurant! 
- What can I get for you? [END]


In [6]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser

In [7]:
llm = ChatOpenAI(model=model)

In [8]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}" )
    ]
)

In [9]:
chain = prompt_template | llm | StrOutputParser()

In [10]:
chain.invoke({"input": "I'd like to order a cheese burger"})

'Great choice! Here is the menu:\n- Cheese Burger\nIs there anything else you would like to order?'

## Conversation

In [11]:
from operator import itemgetter 
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableParallel


In [12]:
llm = ChatOpenAI(model=model)

In [13]:
prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            MessagesPlaceholder(variable_name="chat_history"),
            ("human", "{input}"),
        ]
    )

In [14]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [15]:
conv_chain = (
    RunnablePassthrough.assign(
        chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("chat_history")
    )
    | prompt
    | llm
)

In [16]:
quit_msg = {'q', ''}
while True:
    user_input = input('user: ')
    if user_input in quit_msg:
        break
    print(f'AI: ', end='')
    response = ""
    for chunk in conv_chain.stream({"input": user_input}): 
        print(chunk.content, end="", flush=True)
        response += chunk.content
    
    memory.save_context({"input": user_input}, {"output": response})