In [None]:
pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.54.5
    Uninstalling openai-1.54.5:
      Successfully uninstalled openai-1.54.5
Successfully installed openai-0.28.0


In [None]:
import openai
import os
import json
import time
from google.colab import files

# OpenAI API 키 설정
os.environ["OPENAI_API_KEY"] = "-----"  # API 키를 여기에 입력
openai.api_key = os.getenv("OPENAI_API_KEY")

# JSON 데이터 로드
def load_data(file_path):
    """
    JSON 파일에서 데이터를 로드합니다.
    """
    try:
        with open(file_path, "r") as file:
            data = json.load(file)
        return [(item["instruction"], item["input"]) for item in data]
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None
    except json.JSONDecodeError:
        print(f"Error: Failed to parse JSON in '{file_path}'.")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

# GPT-3.5-turbo 응답 생성 함수
def generate_output(instruction, input_text, model="gpt-3.5-turbo"):
    """
    OpenAI GPT-3.5-turbo 모델을 사용하여 출력 데이터를 생성합니다.
    """
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": f"Instruction: {instruction}\nInput: {input_text}\n\nOutput:"}
    ]
    try:
        response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            temperature=0.7,
            max_tokens=150
        )
        # 응답 내용 추출
        return response["choices"][0]["message"]["content"].strip()
    except openai.error.RateLimitError:
        print("Rate limit exceeded. Retrying after 10 seconds...")
        time.sleep(10)
        return generate_output(instruction, input_text, model)
    except openai.error.APIConnectionError:
        print("API connection error. Please check your network.")
        return None
    except openai.error.OpenAIError as e:
        print(f"OpenAI API Error: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

# 중간 저장 함수
def save_progress(data, output_file="generated_data.json"):
    """
    현재까지 생성된 데이터를 파일에 저장합니다.
    """
    try:
        with open(output_file, "w") as file:
            json.dump(data, file, indent=4)
        print(f"Progress saved to '{output_file}'.")
    except Exception as e:
        print(f"Error saving progress: {e}")

# 데이터 처리 및 결과 저장
def process_data(data, output_file="generated_data.json"):
    """
    데이터를 처리하고 결과를 JSON 파일로 저장합니다.
    """
    generated_data = []
    for idx, (instruction, input_text) in enumerate(data):
        print(f"Processing {idx+1}/{len(data)}")
        try:
            output = generate_output(instruction, input_text, model="gpt-3.5-turbo")
            if output:
                generated_data.append({
                    "instruction": instruction,
                    "input": input_text,
                    "output": output
                })
                print(f"Generated output: {output[:50]}...")  # 일부 출력 확인
            else:
                print(f"No output generated for item {idx+1}. Skipping...")
        except Exception as e:
            print(f"Error processing item {idx+1}: {e}")

        # 중간 저장
        if (idx + 1) % 10 == 0:  # 10개 항목마다 저장
            save_progress(generated_data, output_file)
        time.sleep(0.5)  # API 호출 간 0.5초 간격

    # 최종 저장
    save_progress(generated_data, output_file)
    print(f"Data generation complete. Results saved to '{output_file}'.")

# 파일 업로드
uploaded = files.upload()
input_file = list(uploaded.keys())[0]

# 데이터 로드 및 처리
data = load_data(input_file)
if data:
    process_data(data)

Saving train_data.json to train_data (4).json
Processing 1/610
Generated output: Based on your movie history, a great next movie fo...
Processing 2/610
Generated output: Based on the movies you have watched, you might en...
Processing 3/610
Generated output: Based on the movies you have seen, a must-watch fi...
Processing 4/610
Generated output: Based on the movies you have watched, you might li...
Processing 5/610
Generated output: Based on the movie titles provided, I would sugges...
Processing 6/610
Generated output: A great follow-up movie for you to continue your j...
Processing 7/610
Generated output: Based on your movie history, a good next movie for...
Processing 8/610
Generated output: Based on your movie history, the best next movie f...
Processing 9/610
Generated output: Based on the movies watched, the user might enjoy ...
Processing 10/610
Generated output: Based on the movies you have seen, the next must-w...
Progress saved to 'generated_data.json'.
Processing 11/610
Gene

In [None]:
files.download("generated_data.json")