In [7]:
import os
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

In [9]:
# Upload Training and Validation Files
training_file = client.files.create(
    file=open("data.jsonl", "rb"), purpose="fine-tune"
)
print("Training file id: ", training_file.id)

Training file id:  file-Ru1c5UJwxwCNVKBkze8I3I6F


In [10]:

# Create Fine-Tuning Job
suffix_name = "llm"
response = client.fine_tuning.jobs.create(
    training_file='file-Ru1c5UJwxwCNVKBkze8I3I6F',
    model="gpt-3.5-turbo",
    suffix=suffix_name,
)
print(response)

FineTuningJob(id='ftjob-9Rp36wVMvyizIpqtaFurkCW1', created_at=1727878288, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-uXFhRQZlPYdmwJvwmyYmTROo', result_files=[], seed=534979875, status='validating_files', trained_tokens=None, training_file='file-Ru1c5UJwxwCNVKBkze8I3I6F', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix='llm')


Check Finetuning jobs

In [8]:
response = client.fine_tuning.jobs.retrieve("ftjob-9Rp36wVMvyizIpqtaFurkCW1")
response

FineTuningJob(id='ftjob-9Rp36wVMvyizIpqtaFurkCW1', created_at=1727878288, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-3.5-turbo-0125:imutably-oy:llm:ADudBVBQ', finished_at=1727879315, hyperparameters=Hyperparameters(n_epochs=3, batch_size=1, learning_rate_multiplier=2), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-uXFhRQZlPYdmwJvwmyYmTROo', result_files=['file-E8uZSc8IGReo70k5bIaSvbWJ'], seed=534979875, status='succeeded', trained_tokens=335838, training_file='file-Ru1c5UJwxwCNVKBkze8I3I6F', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix='llm')

In [9]:
fine_tuned_model_id = response.fine_tuned_model
print('\nFine tuned model id: ', fine_tuned_model_id)


Fine tuned model id:  ft:gpt-3.5-turbo-0125:imutably-oy:llm:ADudBVBQ


Use FineTuning Models

In [5]:
import json
import os
import aiohttp
from dotenv import load_dotenv

# Lớp HANDLER để xử lý dữ liệu
class HANDLER:
    
    @staticmethod
    def process_json(json_file):
        try:
            with open(json_file, 'r', encoding='utf-8') as file:
                data = json.load(file)
            
            # Tạo danh sách các prompt mà không có role là "assistant"
            prompts = []
            for entry in data:
                prompt_parts = []
                for message in entry.get("messages", []):
                    if message.get("role") != "assistant":  # Bỏ qua vai trò 'assistant'
                        prompt_parts.append({"role": message["role"], "content": message["content"]})
                
                if prompt_parts:
                    prompts.append(prompt_parts)  # Lưu lại các prompt không có 'assistant'

            return prompts

        except FileNotFoundError:
            return "File not found. Please check the file path."
        except json.JSONDecodeError:
            return "Error decoding JSON. Please check the file format."
    
    @staticmethod
    async def call_api_teacher(prompt):
        url = "https://api.openai.com/v1/chat/completions"
        
        headers = {
            "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",  # Use OpenAI API key
            "Content-Type": "application/json"
        }
        
        # Payload for OpenAI's GPT model
        payload = json.dumps({
            "model": "ft:gpt-3.5-turbo-0125:imutably-oy:llm:ADudBVBQ", 
            "temperature": 0.5,  # Optional: adjust the temperature for creativity
            "messages": prompt
        })

        # Sending the POST request using aiohttp
        async with aiohttp.ClientSession() as session:
            async with session.post(url, headers=headers, data=payload) as response:
                # Check if the response content type is JSON
                if response.headers['Content-Type'] == 'application/json':
                    response_json = await response.json()
                    if 'error' in response_json:
                        return f"Error: {response_json['error']}"
                    
                    # Extracting the content from the message in choices array
                    content = response_json["choices"][0]["message"]["content"]
                    return content
                else:
                    # If not JSON, read the response as text
                    response_text = await response.text()
                    return f"Unexpected response: {response_text}"

    @staticmethod
    async def loop_json(input_json_path, output_json_path):
        # Đọc file JSON và xử lý các prompt
        prompts = HANDLER.process_json(input_json_path)
        
        results = []  # Danh sách để lưu trữ kết quả
        
        for index, prompt in enumerate(prompts):
            print(f"\n_____________ Processing Prompt {index + 1} _____________")
            
            # Gọi API bất đồng bộ với prompt
            assistant_reply = await HANDLER.call_api_teacher(prompt)
            
            # Kiểm tra lỗi từ phản hồi API và lưu lại kết quả
            result_entry = {
                "prompt_index": index + 1,
                "prompt": prompt,
                "response": assistant_reply if "Error" not in assistant_reply else "Error"
            }
            
            # Thêm kết quả vào danh sách
            results.append(result_entry)
            
            print(f"~~~~~~~~~~~~~~ Success for Prompt {index + 1} ~~~~~~~~~~~~~~\n")
        
        # Lưu toàn bộ kết quả vào file JSON khi hoàn tất
        with open(output_json_path, 'w', encoding='utf-8') as output_file:
            json.dump(results, output_file, ensure_ascii=False, indent=4)


In [6]:
import asyncio
async def main():
    load_dotenv() 
    input_json_path = "../data/phase-02/100-test.json" 
    output_json_path = "../output/phase-01/output-teacher.json"
    await HANDLER().loop_json(input_json_path, output_json_path)

await main()


_____________ Processing Prompt 1 _____________
~~~~~~~~~~~~~~ Success for Prompt 1 ~~~~~~~~~~~~~~


_____________ Processing Prompt 2 _____________
~~~~~~~~~~~~~~ Success for Prompt 2 ~~~~~~~~~~~~~~


_____________ Processing Prompt 3 _____________
~~~~~~~~~~~~~~ Success for Prompt 3 ~~~~~~~~~~~~~~


_____________ Processing Prompt 4 _____________
~~~~~~~~~~~~~~ Success for Prompt 4 ~~~~~~~~~~~~~~


_____________ Processing Prompt 5 _____________
~~~~~~~~~~~~~~ Success for Prompt 5 ~~~~~~~~~~~~~~


_____________ Processing Prompt 6 _____________
~~~~~~~~~~~~~~ Success for Prompt 6 ~~~~~~~~~~~~~~


_____________ Processing Prompt 7 _____________
~~~~~~~~~~~~~~ Success for Prompt 7 ~~~~~~~~~~~~~~


_____________ Processing Prompt 8 _____________
~~~~~~~~~~~~~~ Success for Prompt 8 ~~~~~~~~~~~~~~


_____________ Processing Prompt 9 _____________
~~~~~~~~~~~~~~ Success for Prompt 9 ~~~~~~~~~~~~~~


_____________ Processing Prompt 10 _____________
~~~~~~~~~~~~~~ Success for Prompt 10 ~~~~