# Generating JSONL File

In [None]:
text = ["Analyze the market sentiments on Singapore's industrial market. Rate it from -1 to 1 on the sentiments for Singapore's industrial market. Only give me the value.",
        "Analyse the market sentiments on Singapore's industrial market. Rate it either positive, neutral or negative for the sentiments for Singapore's industrial market, whether the text is indicating a positive, neutral or negative outlook. Only give me the value.",
        "For the inputted text, analyse the market sentiments on Singapore's industrial market. Rate it between -1 to 1, whether the text is suggesting a boom or bust in the industrial properties market. Only give me the value.",
        "For the inputted text, analyse the market sentiments on Singapore's industrial market. Rate it either positive, neutral or negative for the sentiments for Singapore's industrial market. Only give me the value.",
        "Based on the provided text, predict the market for Singapore's industrial properties. Rate the sentiment on a scale from -1 to 1, where -1 indicates a very negative outlook, 0 is neutral, and 1 signifies a very positive outlook. Please provide only the numeric sentiment value."
        ]

In [22]:
import pandas as pd
import json

# read the Excel file
excel_file = 'texts_cleaned.xlsx'
sheet_name = '2020 - 2024'
df = pd.read_excel(excel_file, sheet_name=sheet_name)

# prepare the output JSONL file
output_file = '2020_2024(5).jsonl'

# define fixed values
model = "gpt-4o-mini"
system_content = "Analyze the market sentiments on Singapore's industrial market. Rate it from -1 to 1 on the sentiments for Singapore's industrial market. Only give me the value."
url_template = "/v1/chat/completions"
method = "POST"

# open the file for writing
with open(output_file, 'w') as file:
    for index, row in df.iterrows():
        # generate custom_id
        custom_id = f"request-{index + 1}"
        
        # extract the 'text' column for user content
        user_content = row['Text']
        
        # create the JSON structure
        json_line = {
            "custom_id": custom_id,
            "method": method,
            "url": url_template,
            "body": {
                "model": model,
                "messages": [
                    {"role": "system", "content": system_content},
                    {"role": "user", "content": user_content}
                ],
                "max_tokens": 10
            }
        }
        
        # write the JSON line to the file
        file.write(json.dumps(json_line) + '\n')

# Upload File

In [31]:
from openai import OpenAI
client = OpenAI()

client.files.create(
  file=open("2020_2024(5).jsonl", "rb"),
  purpose="batch"
)

FileObject(id='file-2KfRfBpRuRJmd3MvGyKigi3W', bytes=1727748, created_at=1723024669, filename='2020_2024(5).jsonl', object='file', purpose='batch', status='processed', status_details=None)

# Create Batch

In [32]:
client.batches.create(
  input_file_id="file-2KfRfBpRuRJmd3MvGyKigi3W",
  endpoint="/v1/chat/completions",
  completion_window="24h"
)

Batch(id='batch_VyWlTa0NeYE7ebHgUjPQIxpG', completion_window='24h', created_at=1723024677, endpoint='/v1/chat/completions', input_file_id='file-2KfRfBpRuRJmd3MvGyKigi3W', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1723111077, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))

# Retrieve Batch

In [39]:
client.batches.retrieve("batch_f8FszXNq8cTH8ypedzbtfW6j")

Batch(id='batch_f8FszXNq8cTH8ypedzbtfW6j', completion_window='24h', created_at=1723015390, endpoint='/v1/chat/completions', input_file_id='file-VoXIfMd7kdRgyIEfVFnRYeBC', object='batch', status='in_progress', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1723101790, failed_at=None, finalizing_at=None, in_progress_at=1723015391, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=43, failed=0, total=78))

# Convert to Excel

## Convert JSONL to Excel

In [39]:
import pandas as pd

data = []

input_file = './chatgpt/2020_2024/batch_OYYAcCnCIlRV9U8HKUjPlHJ1_output.jsonl'
with open(input_file, 'r') as file:
    for line in file:
        record = json.loads(line)
        extracted_data = {
            'custom_id': record['custom_id'],
            'message_content': record['response']['body']['choices'][0]['message']['content']
        }
        data.append(extracted_data)

df = pd.DataFrame(data)

output_file = 'sentiments.xlsx'
sheet_name = '2020 - 2024'
# df.to_excel(output_file, sheet_name= sheet_name, index=False)
with pd.ExcelWriter(output_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    df.to_excel(writer, sheet_name=sheet_name, index=False)

## Combining More JSONL File

In [43]:
def extract_data_from_jsonl(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            record = json.loads(line)
            extracted_data = {
                'custom_id': record['custom_id'],
                'message_content': record['response']['body']['choices'][0]['message']['content']
            }
            data.append(extracted_data)
    return data

existing_file = 'sentiments.xlsx'
sheet_name = '2020 - 2024'
existing_df = pd.read_excel(existing_file, sheet_name=sheet_name)

new_jsonl_file = './chatgpt/2020_2024/batch_VyWlTa0NeYE7ebHgUjPQIxpG_output.jsonl'
new_data = extract_data_from_jsonl(new_jsonl_file)
new_df = pd.DataFrame(new_data)

merged_df = existing_df.merge(new_df, on='custom_id', how='left', suffixes=('_x', '_y'))

with pd.ExcelWriter(existing_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    merged_df.to_excel(writer, sheet_name=sheet_name, index=False)
