# Creating personality onto AI using Fine-tuning
## Creating a Dataset for Training with OpenAI's Fine-tuning API
- jsonline format
- Examples
```json
{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]}
{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote 'Romeo and Juliet'?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]}
{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "How far is the Moon from Earth?"}, {"role": "assistant", "content": "Around 384,400 kilometers. Give or take a few, like that really matters."}]}
```

In [1]:
from operator import itemgetter
import os
import json

from tqdm.notebook import tqdm
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI

## Data Generation

In [2]:
os.environ["OPENAI_API_KEY"] = "Your API Key"
# The temperature is used to control the randomness of the output. When you set it higher, you'll get more random outputs. 
model = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.8)

In [3]:
ai_1_system_prompt = f"""\
- You are a person who came to a hamburger restaurant to place an order.
- Order any hamburger and also order a random drink.
- You respond in English.
"""

ai_2_system_prompt = f"""\
- You are an employee at a hamburger restaurant.
- Ask the following questions in order
1. Ask for the menu to order.
2. Ask if there's anything else to order.
3. Ask whether they will dine in or take away.
4. Ask if they will pay by card or cash.
5. Greet the customer and say "[END]" when the order is complete.

- You respond in English.\
"""


def get_new_ai_chains():
    ai_1_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", ai_1_system_prompt),
            MessagesPlaceholder(variable_name="chat_history"),
            ("human", "{input}"),
        ]
    )
    # pipeline
    # let's say  # ai_1_chain({'input':'aa'})
    ai_1_memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    ai_1_chain = (   # {'input':'aa'}
        RunnablePassthrough.assign(
            chat_history=RunnableLambda(ai_1_memory.load_memory_variables) | itemgetter("chat_history")
        )   # {'input':'aa', 'chat_history': ''}
        | ai_1_prompt 
        | model
    )
   
    
    
    ai_2_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", ai_2_system_prompt),
            MessagesPlaceholder(variable_name="chat_history"),
            ("human", "{input}"),
        ]
    )
    ai_2_memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    ai_2_chain = (
        RunnablePassthrough.assign(
            chat_history=RunnableLambda(ai_2_memory.load_memory_variables) | itemgetter("chat_history")
        )
        | ai_2_prompt
        | model
    )
    return ai_1_chain, ai_1_memory, ai_2_chain, ai_2_memory

In [4]:
conversation_list = []

n_conversation =  130 # make 130 training data.  train:100, validation 30
n_max_turn = 20 # how many turns you want to make per a conversation

for _ in tqdm(range(n_conversation), total=n_conversation):
    ai_1_chain, ai_1_memory, ai_2_chain, ai_2_memory = get_new_ai_chains()
    ai_2_output = model.invoke("Create a brief and welcoming message suitable for a hamburger restaurant. single sentence").content # inital conversation.
    
    print("*"*10 + "새로운 대화" + "*"*30)
    print(f"햄버거 가게 점원: {ai_2_output}")
    for _ in range(n_max_turn):
        ai_1_output =  ai_1_chain.invoke({"input": ai_2_output}).content
        ai_1_memory.save_context({"input": ai_2_output}, {"output": ai_1_output})
        print(f"손님: {ai_1_output}")
    
        ai_2_output =  ai_2_chain.invoke({"input": ai_1_output}).content
        ai_2_memory.save_context({"input": ai_1_output}, {"output": ai_2_output})
        print(f"햄버거 가게 점원: {ai_2_output}")
        if "[END]" in ai_2_output:
            break
    conversation_list.append(ai_2_memory.chat_memory.dict()['messages'])

  0%|          | 0/130 [00:00<?, ?it/s]

**********새로운 대화******************************
햄버거 가게 점원: Welcome to our burger joint, where every bite is a taste of happiness!
손님: Thank you! I'll have a classic cheeseburger with a side of fries, please. And for my drink, I'll go with a vanilla milkshake.
햄버거 가게 점원: Sure! Here is our menu for you to order:
- Classic Cheeseburger
- Side of Fries
- Vanilla Milkshake

Is there anything else you would like to order?
손님: That will be all for now, thank you!
햄버거 가게 점원: Great! Will you be dining in or taking your order away?
손님: I'll be dining in, thank you.
햄버거 가게 점원: Will you be paying by card or cash today?
손님: I'll be paying by card.
햄버거 가게 점원: Great choice! Thank you for placing your order. Your Classic Cheeseburger with a side of fries, a vanilla milkshake, and dining in will be all set for you. Have a great meal! [END]
**********새로운 대화******************************
햄버거 가게 점원: Welcome to our hamburger haven, where juicy patties and tasty toppings await to satisfy your cravings!
손님: H

In [7]:
conversation_list[0]

[{'content': "Thank you! I'll have a classic cheeseburger with a side of fries, please. And for my drink, I'll go with a vanilla milkshake.",
  'additional_kwargs': {},
  'response_metadata': {},
  'type': 'human',
  'name': None,
  'id': None,
  'example': False},
 {'content': 'Sure! Here is our menu for you to order:\n- Classic Cheeseburger\n- Side of Fries\n- Vanilla Milkshake\n\nIs there anything else you would like to order?',
  'additional_kwargs': {},
  'response_metadata': {},
  'type': 'ai',
  'name': None,
  'id': None,
  'example': False},
 {'content': 'That will be all for now, thank you!',
  'additional_kwargs': {},
  'response_metadata': {},
  'type': 'human',
  'name': None,
  'id': None,
  'example': False},
 {'content': 'Great! Will you be dining in or taking your order away?',
  'additional_kwargs': {},
  'response_metadata': {},
  'type': 'ai',
  'name': None,
  'id': None,
  'example': False},
 {'content': "I'll be dining in, thank you.",
  'additional_kwargs': {},


In [5]:
conversation_list[0][:3] # first conversation

[{'content': "Thank you! I'll have a classic cheeseburger with a side of fries, please. And for my drink, I'll go with a vanilla milkshake.",
  'additional_kwargs': {},
  'response_metadata': {},
  'type': 'human',
  'name': None,
  'id': None,
  'example': False},
 {'content': 'Sure! Here is our menu for you to order:\n- Classic Cheeseburger\n- Side of Fries\n- Vanilla Milkshake\n\nIs there anything else you would like to order?',
  'additional_kwargs': {},
  'response_metadata': {},
  'type': 'ai',
  'name': None,
  'id': None,
  'example': False},
 {'content': 'That will be all for now, thank you!',
  'additional_kwargs': {},
  'response_metadata': {},
  'type': 'human',
  'name': None,
  'id': None,
  'example': False}]

## LangChain format to OpenAI format

In [8]:
def convert_message(msg):
    new_msg = {
        "role": 'user' if msg['type'] == 'human' else 'assistant',
        "content": msg['content']
    }
    return new_msg

In [7]:
# new_conversation_list = []

# system_msg = {"role": "system", "content": ai_2_system_prompt}

# for conversation in conversation_list:
#     new_conversation = [system_msg]
#     for msg in conversation[1:]:
#         new_conversation.append(convert_message(msg))
#     new_conversation_list.append(new_conversation)

In [17]:
new_conversation_list = []

for conversation in conversation_list:
    new_conversation = []
    for msg in conversation:
        new_conversation.append(convert_message(msg))
    new_conversation_list.append({"messages": new_conversation})

In [18]:
new_conversation_list

[{'messages': [{'role': 'user',
    'content': "Thank you! I'll have a classic cheeseburger with a side of fries, please. And for my drink, I'll go with a vanilla milkshake."},
   {'role': 'assistant',
    'content': 'Sure! Here is our menu for you to order:\n- Classic Cheeseburger\n- Side of Fries\n- Vanilla Milkshake\n\nIs there anything else you would like to order?'},
   {'role': 'user', 'content': 'That will be all for now, thank you!'},
   {'role': 'assistant',
    'content': 'Great! Will you be dining in or taking your order away?'},
   {'role': 'user', 'content': "I'll be dining in, thank you."},
   {'role': 'assistant',
    'content': 'Will you be paying by card or cash today?'},
   {'role': 'user', 'content': "I'll be paying by card."},
   {'role': 'assistant',
    'content': 'Great choice! Thank you for placing your order. Your Classic Cheeseburger with a side of fries, a vanilla milkshake, and dining in will be all set for you. Have a great meal! [END]'}]},
 {'messages': [{

## 데이터셋 나누기

In [19]:
n_train = 100
train_dataset = new_conversation_list[:n_train]
valid_dataset = new_conversation_list[n_train:]

## 데이터 저장하기

In [20]:
def save_as_json_lines(list_of_dicts, file_name):
    with open(file_name, 'w') as file:
        for dictionary in list_of_dicts:
            json_line = json.dumps(dictionary, ensure_ascii=False)
            file.write(json_line + '\n')

In [21]:
save_as_json_lines(train_dataset, "./train.jsonl")
save_as_json_lines(valid_dataset, "./valid.jsonl")

## 더 필요한 과정

- 데이터셋 정제하기
- 더 좋은 모델로 데이터 샘플링하기