# Generating JSONL File

In [16]:
import pandas as pd
import json

# read the Excel file
excel_file = 'texts_cleaned.xlsx'
sheet_name = '2010 - 2014'
df = pd.read_excel(excel_file, sheet_name=sheet_name)

# prepare the output JSONL file
output_file = '2010_2014(3).jsonl'

# define fixed values
model = "gpt-4o-mini"
system_content = "For the inputted text, analyse the market sentiments on Singapore's industrial market. Rate it between -1 to 1, whether the text is suggesting a boom or bust in the industrial properties market. Only give me the value."
url_template = "/v1/chat/completions"
method = "POST"

# open the file for writing
with open(output_file, 'w') as file:
    for index, row in df.iterrows():
        # generate custom_id
        custom_id = f"request-{index + 1}"
        
        # extract the 'text' column for user content
        user_content = row['Text']
        
        # create the JSON structure
        json_line = {
            "custom_id": custom_id,
            "method": method,
            "url": url_template,
            "body": {
                "model": model,
                "messages": [
                    {"role": "system", "content": system_content},
                    {"role": "user", "content": user_content}
                ],
                "max_tokens": 10
            }
        }
        
        # write the JSON line to the file
        file.write(json.dumps(json_line) + '\n')


# Upload File

In [17]:
from openai import OpenAI
client = OpenAI()

client.files.create(
  file=open("2010_2014(3).jsonl", "rb"),
  purpose="batch"
)

FileObject(id='file-ltad2Vw6Sk0apkK3nLmz7GLp', bytes=443292, created_at=1723011969, filename='2010_2014(3).jsonl', object='file', purpose='batch', status='processed', status_details=None)

# Create Batch

In [18]:
client.batches.create(
  input_file_id="file-ltad2Vw6Sk0apkK3nLmz7GLp",
  endpoint="/v1/chat/completions",
  completion_window="24h"
)

Batch(id='batch_RegEVUfEjx3JB7yfQmdlhyo1', completion_window='24h', created_at=1723011991, endpoint='/v1/chat/completions', input_file_id='file-ltad2Vw6Sk0apkK3nLmz7GLp', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1723098391, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))

# Retrieve Batch

In [15]:
client.batches.retrieve("batch_0qnU69dwioIxCL3nNUgc3F3Z")

Batch(id='batch_0qnU69dwioIxCL3nNUgc3F3Z', completion_window='24h', created_at=1723011380, endpoint='/v1/chat/completions', input_file_id='file-sKALRTzPcowTdKboQFo42suU', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1723011395, error_file_id=None, errors=None, expired_at=None, expires_at=1723097780, failed_at=None, finalizing_at=1723011392, in_progress_at=1723011380, metadata=None, output_file_id='file-bZW75DXg20MxRq5yBHJZ9D3G', request_counts=BatchRequestCounts(completed=78, failed=0, total=78))

# Convert to Excel

In [None]:
import pandas as pd

data = []

input_file = '/chatgpt/2010_2014/path_to_your_jsonl_file.jsonl'
with open(input_file, 'r') as file:
    for line in file:
        record = json.loads(line)
        extracted_data = {
            'custom_id': record['custom_id'],
            'message_content': record['response']['body']['choices'][0]['message']['content']
        }
        data.append(extracted_data)

# Create a DataFrame from the extracted data
df = pd.DataFrame(data)

# Write the DataFrame to an Excel file
output_file = 'output.xlsx'
df.to_excel(output_file, index=False)

print(f'Data has been successfully extracted to {output_file}')
