In [None]:
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables from .env file
load_dotenv()

# OpenAI APIキーの設定（環境変数から読み取る）
openai.api_key = os.getenv("OPENAI_API_KEY")

In [None]:
import pandas as pd

def process_data(df):
    """
    データを処理し、フォーマットされたテキストを返す。
    
    :param df: データフレーム
    :return: フォーマットされたデータ
    """
    processed_data = []
    for _, row in df.iterrows():
        review = row['review']
        reply = row['replyContent']
        thumbsUpCount = row['thumbsUpCount']
        reviewCreatedVersion = row['reviewCreatedVersion']
        timeToReply = row['timeToReply']
        processed_data.append(f"Review: {review}\nthumbsUpCount: {thumbsUpCount}, reviewCreatedVersion: {reviewCreatedVersion}\nReply: {reply}\ntimeToReply: {timeToReply}")
    return processed_data

# データの読み込み
import pandas as pd
original_df = pd.read_csv('../test.csv')

new_df = pd.DataFrame()

reviews = process_data(original_df)  # process_data関数は既存のコードからそのまま使用

save_file = "data_formatted.jsonl"

model_name = "gpt-4o-mini"

import json

# JSONLファイルに書き込む
with open(save_file, 'w', encoding='utf-8') as f:
    for i, review in enumerate(reviews):
        data = {
            "custom_id": f"request-{i+2}",  # request-2から始まるように変更
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": model_name,  # モデルを指定
                "messages": [
                    {
                        "role": "system",
                        "content": "Analyze the review and determine its sentiment. Return a score from 0 to 100, where 0 is extremely negative, 100 is extremely positive, and 50 is neutral. Return only the numerical score."
                        },
                    {
                        "role": "user",
                        "content": review
                    }
                ],
                "max_tokens": 1000  # 必要に応じて調整
            }
        }
        json.dump(data, f, ensure_ascii=False)
        f.write('\n')  # 各JSONオブジェクトの後に改行を追加

print(f"JSONLファイルが作成されました: {save_file}")

client = OpenAI()

posted_data = client.files.create(
  file=open("data_formatted.jsonl", "rb"),
  purpose="batch"
)

batch = client.batches.create(
    input_file_id=posted_data.id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
)
print(batch)

JSONLファイルが作成されました: data_formatted.jsonl
Batch(id='batch_1q92o02jrvnSMFrwefx5gQ6s', completion_window='24h', created_at=1723728295, endpoint='/v1/chat/completions', input_file_id='file-zXBsBZYQS5m3rL9Dz1rsjuh8', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1723814695, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))


In [6]:
from openai import OpenAI
client = OpenAI()

client.batches.retrieve("batch_1q92o02jrvnSMFrwefx5gQ6s")

Batch(id='batch_1q92o02jrvnSMFrwefx5gQ6s', completion_window='24h', created_at=1723728295, endpoint='/v1/chat/completions', input_file_id='file-zXBsBZYQS5m3rL9Dz1rsjuh8', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1723733365, error_file_id=None, errors=None, expired_at=None, expires_at=1723814695, failed_at=None, finalizing_at=1723732685, in_progress_at=1723728299, metadata=None, output_file_id='file-NXOe0f5QiBqKgumdBLvUuW2j', request_counts=BatchRequestCounts(completed=5846, failed=0, total=5846))

In [7]:
import json

from openai import OpenAI
client = OpenAI()

file_response = client.files.content("file-NXOe0f5QiBqKgumdBLvUuW2j")

# contentの値を抽出するためのリスト
content_list = []

# file_response.text を一行ずつ処理
for line in file_response.text.splitlines():
    # JSON文字列を辞書に変換
    data = json.loads(line)

    # "response" -> "body" -> "choices" -> [0] -> "message" -> "content" の順にアクセス
    content = data.get("response", {}).get("body", {}).get("choices", [{}])[0].get("message", {}).get("content")

    # content が存在する場合のみリストに追加
    if content is not None:
        content_list.append(content)

import pandas as pd
df = pd.read_csv('../test.csv')
df["PN"] = content_list
df.to_csv('added_test.csv', index=False) 

In [23]:
from openai import OpenAI
client = OpenAI()

client.batches.cancel("batch_ovI3kPD1Vo2j7YQCcM0rvQUj")

Batch(id='batch_ovI3kPD1Vo2j7YQCcM0rvQUj', completion_window='24h', created_at=1723721154, endpoint='/v1/chat/completions', input_file_id='file-K8kzDiu6FSA63fBcHba2jAEH', object='batch', status='cancelled', cancelled_at=1723723681, cancelling_at=1723722548, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1723807554, failed_at=None, finalizing_at=None, in_progress_at=1723721160, metadata=None, output_file_id='file-Xjw0kz0bZYe9yXGRLmpaAeMa', request_counts=BatchRequestCounts(completed=8767, failed=0, total=8767))

In [11]:
!pip install statsmodels

Collecting statsmodels
  Downloading statsmodels-0.14.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.2 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Downloading patsy-0.5.6-py2.py3-none-any.whl.metadata (3.5 kB)
Downloading statsmodels-0.14.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m59.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hDownloading patsy-0.5.6-py2.py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.9/233.9 kB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: patsy, statsmodels
Successfully installed patsy-0.5.6 statsmodels-0.14.2


In [2]:
import transformers
import torch

model_id = "meta-llama/Meta-Llama-3-8B"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device="cuda",
    
)

messages = [
    {"role": "system", "content": "あなたはプロの英会話講師です。"},
    {"role": "user", "content": "「ここから市役所まではどれくらいの時間がかかりますか？」を英語で回答してください。"},
]

prompt = pipeline.tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = pipeline(
    prompt,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
print(outputs[0]["generated_text"][len(prompt):])

  warn(f"Failed to load image Python extension: {e}")
2024-08-20 06:18:31.461700: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-20 06:18:31.476875: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-20 06:18:31.481375: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-20 06:18:31.494811: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct.
403 Client Error. (Request ID: Root=1-66c3b6a9-4882e5787834bea86acd78f5;b691ca96-42bb-4946-bc80-b81e7ca7d491)

Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3-8B-Instruct is restricted and you are not in the authorized list. Visit https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct to ask for access.