In [1]:
from PIL import Image
import pandas as pd
from tqdm import tqdm
import os
import base64
import requests
import sys
import os
from dotenv import load_dotenv
import hashlib
import json
import time
import random

In [17]:
env_path="write your environment path"
load_dotenv(dotenv_path=env_path)

# # Setting up the deployment name
deployment_name = os.environ['AZURE_ENGINE_NAME']

# # The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
openai_api_base = os.environ['AZURE_API_URL']

# # The API key for your Azure OpenAI resource.
openai_api_key = os.environ['AZURE_API_KEY']

# # Currently OPENAI API have the following versions available: https://learn.microsoft.com/azure/ai-services/openai/reference
openai_api_version = os.environ['AZURE_API_VERSION']
# # Request URL

api_url = f"{openai_api_base}/openai/deployments/{deployment_name}/chat/completions?api-version={openai_api_version}"

# # Including the api-key in HTTP headers
headers =  {"api-key": openai_api_key}

    
def generate(user_prompt, sys_prompt, temp):
    
    json_data = {
      "model": "gpt-4o", 
      "messages": [
        {
          "role": "system",
          "content": [
            {
              "type": "text",
              "text": sys_prompt
            }
          ]
        },
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": user_prompt
            }
          ]
        }
      ],
      "temperature":temp,
      "top_p": 0.95,
      "max_tokens": 4000,
      "response_format":{
        "type": "json_object",
      },
    }    

    response = requests.post(api_url, headers=headers, json=json_data)

    return response

In [32]:
system_prompt = (
    "You are an expert LLM developer with expertise in writing instructions. "
    "Creating diverse instructions is crucial when developing instruction-following datasets. "
    "Please write instructions tailored to the user's needs."
)

temp=0
user_prompt = (
    "We are creating an instruction-following dataset for multilabel emotion classification. "
    "The dataset contains tweets, and each tweet can have one or more labels. "
    "The labels in our dataset include 'Joy', 'Fear', 'Anger', 'Love', 'Pessimism', 'Trust', "
    "'Optimism', 'Surprise', 'Sadness', 'Disgust', and 'Anticipation'. Note that a tweet may not have any emotion at all. "
    "Please write 100 diverse instructions. Provide response in JSON format in a form of list."
)

response=generate(user_prompt, system_prompt, temp)

In [35]:
json_text=response.json()['choices'][0]['message']['content']
instructions=json.loads(json_text)
instructions=instructions['instructions']


In [36]:
## write to file
# Specify the input and output file names

data_dir="../"

file_path=data_dir+'data/instructions_emotion.json'

with open(file_path, 'w') as file:
    json.dump(instructions, file, indent=4) #ensure_ascii=False,


## check labels

In [28]:
import pandas as pd
import ast

# Read the CSV file
df = pd.read_csv('../data/train_emotion.csv')

# Function to make items in a list unique
def make_unique(items):
    return list(set(items))

# Apply the function to the Emotion_Label column
df['Emotion_Label'] = df['Emotion_Label'].apply(lambda x: make_unique(ast.literal_eval(x)))

# Collect all unique labels
unique_labels = set()
df['Emotion_Label'].apply(lambda x: unique_labels.update(x))

# Print the unique labels
print("Unique Emotion Labels:")
for label in unique_labels:
    print(label)


Unique Emotion Labels:
Joy
Fear
Anger
Love
Pessimism
Trust
Optimisim
Surprise
No emotions
Sadness
Disgust
Anticipation
