In [None]:
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables from .env file
load_dotenv()

# OpenAI APIキーの設定（環境変数から読み取る）
openai.api_key = os.getenv("OPENAI_API_KEY")

In [19]:
import pandas as pd

def process_data(df):
    """
    データを処理し、フォーマットされたテキストを返す。
    
    :param df: データフレーム
    :return: フォーマットされたデータ
    """
    processed_data = []
    for _, row in df.iterrows():
        review = row['review']
        reply = row['replyContent']
        thumbsUpCount = row['thumbsUpCount']
        reviewCreatedVersion = row['reviewCreatedVersion']
        timeToReply = row['timeToReply']
        processed_data.append(f"Review: {review}\nthumbsUpCount: {thumbsUpCount}, reviewCreatedVersion: {reviewCreatedVersion}\nReply: {reply}\ntimeToReply: {timeToReply}")
    return processed_data

# データの読み込み
import pandas as pd
test_df = pd.read_csv('../test.csv')

new_df = pd.DataFrame()

reviews = process_data(test_df)  # process_data関数は既存のコードからそのまま使用

save_file = "test_formatted.jsonl"

model_name = "ft:gpt-4o-mini-2024-07-18:personal::9w9nRfyc"

import json

# JSONLファイルに書き込む
with open(save_file, 'w', encoding='utf-8') as f:
    for i, review in enumerate(reviews):
        data = {
            "custom_id": f"request-{i+2}",  # request-2から始まるように変更
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": model_name,  # モデルを指定
                "messages": [
                    {
                        "role": "system",
                        "content": "You are an AI assistant specialized in analyzing detailed app reviews for BANKApp and predicting review scores. Given a review that includes the review text, thumbs up count, app version, official reply , and reply time, you will predict the likely star rating (0-4 stars) the reviewer would give. Consider all provided information in your analysis. Factors to consider include the sentiment of the review, the app version, the presence and quality of an official reply, and the response time. Provide only the numerical score without any explanation or additional commentary."
                    },
                    {
                        "role": "user",
                        "content": review
                    }
                ],
                "max_tokens": 1000  # 必要に応じて調整
            }
        }
        json.dump(data, f, ensure_ascii=False)
        f.write('\n')  # 各JSONオブジェクトの後に改行を追加

print(f"JSONLファイルが作成されました: {save_file}")

from openai import OpenAI
client = OpenAI()

test = client.files.create(
  file=open("test_formatted.jsonl", "rb"),
  purpose="batch"
)

test_batch = client.batches.create(
    input_file_id=test.id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
)
print(test_batch)

JSONLファイルが作成されました: test_formatted.jsonl
Batch(id='batch_21DNE8b1uVFnw9tBvQ6ZKDpJ', completion_window='24h', created_at=1723672217, endpoint='/v1/chat/completions', input_file_id='file-QvW8mdvuU0qwXS9QIyuZPdsi', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1723758617, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))


In [4]:
from openai import OpenAI
client = OpenAI()

client.batches.retrieve("batch_NvG7laxzQIyMCFpwolT9cCpU")

Batch(id='batch_NvG7laxzQIyMCFpwolT9cCpU', completion_window='24h', created_at=1723670958, endpoint='/v1/chat/completions', input_file_id='file-V0vdVB9nWJYNyRC07i3wHCr3', object='batch', status='cancelled', cancelled_at=1723673522, cancelling_at=1723672530, completed_at=None, error_file_id='file-SToCk9t7gMQNA4fvignVq79e', errors=None, expired_at=None, expires_at=1723757358, failed_at=None, finalizing_at=None, in_progress_at=1723670963, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=2181, total=5846))

In [3]:
from openai import OpenAI
client = OpenAI()

client.batches.cancel("batch_ovI3kPD1Vo2j7YQCcM0rvQUj")

Batch(id='batch_ovI3kPD1Vo2j7YQCcM0rvQUj', completion_window='24h', created_at=1723721154, endpoint='/v1/chat/completions', input_file_id='file-K8kzDiu6FSA63fBcHba2jAEH', object='batch', status='cancelling', cancelled_at=None, cancelling_at=1723722548, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1723807554, failed_at=None, finalizing_at=None, in_progress_at=1723721160, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=6166, failed=0, total=8767))

In [10]:
import json

from openai import OpenAI
client = OpenAI()

file_response = client.files.content("file-u5qVJt82VgzVmq9UfgDJxXV3")

# contentの値を抽出するためのリスト
content_list = []

# file_response.text を一行ずつ処理
for line in file_response.text.splitlines():
    # JSON文字列を辞書に変換
    data = json.loads(line)

    # "response" -> "body" -> "choices" -> [0] -> "message" -> "content" の順にアクセス
    content = data.get("response", {}).get("body", {}).get("choices", [{}])[0].get("message", {}).get("content")

    # content が存在する場合のみリストに追加
    if content is not None:
        content_list.append(content)

# CSVファイルに書き込む
with open("output.csv", "w") as f:
    for i, content in enumerate(content_list):
        f.write(f"{i+8767},{content}\n")

print("CSVファイルが作成されました: output.csv")

CSVファイルが作成されました: output.csv
