In [1]:
from autolabel import LabelingAgent

from pathlib import Path
import json
import copy

from datasets import load_dataset
import pandas as pd

dataset = load_dataset("sem_eval_2018_task_1", "subtask5.english", split="train")



In [2]:
pd.DataFrame(dataset)

Unnamed: 0,ID,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
0,2017-En-21441,“Worry is a down payment on a problem you may ...,False,True,False,False,False,False,True,False,False,False,True
1,2017-En-31535,Whatever you decide to do make sure it makes y...,False,False,False,False,True,True,True,False,False,False,False
2,2017-En-21068,@Max_Kellerman it also helps that the majorit...,True,False,True,False,True,False,True,False,False,False,False
3,2017-En-31436,Accept the challenges so that you can literall...,False,False,False,False,True,False,True,False,False,False,False
4,2017-En-22195,My roommate: it's okay that we can't spell bec...,True,False,True,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6833,2017-En-21383,@nicky57672 Hi! We are working towards your hi...,False,False,False,False,False,False,False,False,False,False,False
6834,2017-En-41441,@andreamitchell said @berniesanders not only d...,False,True,False,False,False,False,False,False,False,True,False
6835,2017-En-10886,@isthataspider @dhodgs i will fight this guy! ...,True,False,True,False,False,False,False,True,False,False,False
6836,2017-En-40662,i wonder how a guy can broke his penis while h...,False,False,False,False,False,False,False,False,False,True,False


In [3]:
# create a new label column that is a comma separated list of emotions
dataset = dataset.map(
    lambda x: {
        "label": ", ".join(
            [
                k
                for k, v in x.items()
                if k not in ["ID", "Tweet"] and v == True
            ]
        )
    }
)

# if label is empty, set it to "neutral"
dataset = dataset.map(
    lambda x: {"label": "neutral"} if x["label"] == "" else x
)

pd.DataFrame(dataset)



Unnamed: 0,ID,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust,label
0,2017-En-21441,“Worry is a down payment on a problem you may ...,False,True,False,False,False,False,True,False,False,False,True,"anticipation, optimism, trust"
1,2017-En-31535,Whatever you decide to do make sure it makes y...,False,False,False,False,True,True,True,False,False,False,False,"joy, love, optimism"
2,2017-En-21068,@Max_Kellerman it also helps that the majorit...,True,False,True,False,True,False,True,False,False,False,False,"anger, disgust, joy, optimism"
3,2017-En-31436,Accept the challenges so that you can literall...,False,False,False,False,True,False,True,False,False,False,False,"joy, optimism"
4,2017-En-22195,My roommate: it's okay that we can't spell bec...,True,False,True,False,False,False,False,False,False,False,False,"anger, disgust"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6833,2017-En-21383,@nicky57672 Hi! We are working towards your hi...,False,False,False,False,False,False,False,False,False,False,False,neutral
6834,2017-En-41441,@andreamitchell said @berniesanders not only d...,False,True,False,False,False,False,False,False,False,True,False,"anticipation, surprise"
6835,2017-En-10886,@isthataspider @dhodgs i will fight this guy! ...,True,False,True,False,False,False,False,True,False,False,False,"anger, disgust, pessimism"
6836,2017-En-40662,i wonder how a guy can broke his penis while h...,False,False,False,False,False,False,False,False,False,True,False,surprise


In [4]:
dataset = dataset.remove_columns(["ID", "anger", "anticipation", "disgust", "fear", "joy", "love", "optimism", "pessimism", "sadness", "surprise", "trust"])
dataset = dataset.rename_column("Tweet", "example")
dataset = dataset.train_test_split(test_size=0.01, seed=42)
test_dataset, seed_dataset = dataset["train"], dataset["test"]
test_dataset.to_csv("test.csv")
seed_dataset.to_csv("seed.csv")



Creating CSV from Arrow format:   0%|          | 0/7 [00:00<?, ?ba/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

7917

In [5]:
config = {
    "task_name": "EmotionClassification",
    "task_type": "multi_label_classification",
    "dataset": {
        "label_column": "label",
        "delimiter": ",",
    },
    "model": {
        "provider": "openai",
        "name": "gpt-3.5-turbo",
    },
    "prompt": {
        "task_guidelines": "You are an expert at classifying tweets as neutral or one or more of the given emotions that best represent the mental state of the poster.\nYour job is to correctly label the provided input example into one or more of the following categories:\n{labels}",
        "output_guidelines": "You will return the answer as a comma separated list of labels sorted in alphabetical order. For example: \"label1, label2, label3\"",
        "labels": [
            "neutral",
            "anger",
            "anticipation",
            "disgust",
            "fear",
            "joy",
            "love",
            "optimism",
            "pessimism",
            "sadness",
            "surprise",
            "trust",
        ],
        "few_shot_examples": "seed.csv",
        "few_shot_selection": "semantic_similarity",
        "few_shot_num": 5,
        "example_template": "Input: {example}\nOutput: {label}",
    },
}

In [6]:
agent = LabelingAgent(config=config, cache=False)
agent.plan("test.csv", max_items=64)
agent.run("test.csv", max_items=64)

Output()

You are an expert at classifying tweets as neutral or one or more of the given emotions that best represent the mental state of the poster.
Your job is to correctly label the provided input example into one or more of the following categories:
neutral
anger
anticipation
disgust
fear
joy
love
optimism
pessimism
sadness
surprise
trust

You will return the answer as a comma separated list of labels sorted in alphabetical order. For example: "label1, label2, label3"

Some examples with their output answers are provided below:

Input: Okay you've annoyed me, you haven't done a good job there at all. #furious
Output: anger, disgust

Input: At the end of the day you gotta be #happy for you.
Output: joy, optimism

Input: Lol Adam the Bull with his fake outrage...
Output: anger, disgust, joy

Input: In  fact,  sometimes i don't get furious at people who wrong me, but   i get  furious at  myself  for being  a  fool.
Output: anger, disgust

Input: @_stardust_3 unless your concern is people figuri

2023-06-27 14:32:31 autolabel.labeler INFO: Task run already exists.


You are an expert at classifying tweets as neutral or one or more of the given emotions that best represent the mental state of the poster.
Your job is to correctly label the provided input example into one or more of the following categories:
neutral
anger
anticipation
disgust
fear
joy
love
optimism
pessimism
sadness
surprise
trust

You will return the answer as a comma separated list of labels sorted in alphabetical order. For example: "label1, label2, label3"

Some examples with their output answers are provided below:

Input: In my room 101 would go  Russell Howard,Tom Odell,Michael Buble!!! #pants #nogood 
Output: anger, disgust, pessimism, sadness

Input: @FullTimeDEVILS Memphis looking bright. Rojo looking like Rojo.
Output: joy, optimism

Input: Yet again another night I should've stayed in😊
Output: joy

Input: I hope a goal comes soon on either side. Otherwise there is a serious threat of a dull and frustrating game. #COYS
Output: anger, disgust, pessimism, sadness

Input: Wat

anticipation, joy, optimism, trust


Output()

Actual Cost: 0.0318


(0                           neutral
 1                           sadness
 2                    anger, disgust
 3                  joy, love, trust
 4                   optimism, trust
                   ...              
 59    anger, fear, pessimism, trust
 60        anger, disgust, pessimism
 61             fear, sadness, trust
 62                neutral, surprise
 63                   neutral, trust
 Name: EmotionClassification_llm_label, Length: 64, dtype: object,
                                               example   
 0   When you break a record in #madden I wish it d...  \
 1                               Up on melancholy hill   
 2                      @eMilsOnWheels I'm furious 😩😩😩   
 3   @TiganVarcolac &lt; feel this precious pleasur...   
 4   This is not me brown nosing but I've listened ...   
 ..                                                ...   
 59  Pakistan continues to treat #terror as a matte...   
 60  @brian5or6 turn that shit off!   Home Button u...   
 61 