In [None]:
import json
import pandas as pd
from pathlib import Path
from datasets import load_dataset

In [11]:
ds = load_dataset("orrzohar/EMID-Emotion-Matching", cache_dir="/Utilisateurs/umushtaq/emorec_work/mdlt_er/datasets/emid_dataset")

Resolving data files:   0%|          | 0/24 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/22 [00:00<?, ?it/s]

In [12]:
ds

DatasetDict({
    train: Dataset({
        features: ['audio', 'sampling_rate', 'image', 'same', 'emotion', 'question', 'answer'],
        num_rows: 24000
    })
    test: Dataset({
        features: ['audio', 'sampling_rate', 'image', 'same', 'emotion', 'question', 'answer'],
        num_rows: 6000
    })
})

In [20]:
ds = ds.select_columns(['image', 'same', 'emotion', 'question', 'answer'])

In [23]:
ds['train'][1]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x375>,
 'same': False,
 'emotion': '',
 'question': 'Do both modalities convey the same mood? Say `yes - emotion` if they do; otherwise say `no`.',
 'answer': 'no'}

In [24]:
ds = ds.filter(lambda example: example['emotion'] != "")

Filter:   0%|          | 0/24000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/6000 [00:00<?, ? examples/s]

In [25]:
ds

DatasetDict({
    train: Dataset({
        features: ['image', 'same', 'emotion', 'question', 'answer'],
        num_rows: 11962
    })
    test: Dataset({
        features: ['image', 'same', 'emotion', 'question', 'answer'],
        num_rows: 3038
    })
})

In [26]:
ds_selected = ds["train"].select(range(3000)) # type: ignore

In [27]:
ds_selected

Dataset({
    features: ['image', 'same', 'emotion', 'question', 'answer'],
    num_rows: 3000
})

In [29]:
df = pd.DataFrame(ds_selected) 
df.head()

Unnamed: 0,image,same,emotion,question,answer
0,<PIL.JpegImagePlugin.JpegImageFile image mode=...,True,anger,Compare the audio with the image: are they exp...,yes - anger
1,<PIL.JpegImagePlugin.JpegImageFile image mode=...,True,anger,Compare the audio with the image: are they exp...,yes - anger
2,<PIL.JpegImagePlugin.JpegImageFile image mode=...,True,excitement,Judge whether the song and the image share an ...,yes - excitement
3,<PIL.JpegImagePlugin.JpegImageFile image mode=...,True,sadness,Do both modalities convey the same mood? Say `...,yes - sadness
4,<PIL.JpegImagePlugin.JpegImageFile image mode=...,True,anger,Listen to the audio and inspect the picture. A...,yes - anger


In [45]:
df.emotion.unique()

array(['anger', 'excitement', 'sadness', 'fear', 'contentment',
       'amusement', 'awe'], dtype=object)

In [None]:
def generation_instruction(row):
    
    
   #  emotion_classes = ['Calm', 'Excited', 'Contentment', 'Frustrated', 'Sad', 'Aroused',
   #     'Alarmed', 'Bored', 'Happy', 'Annoyed', 'Tired', 'Glad']
   #  formatted_classes = ", ".join([f'"{emotion}"' for emotion in emotion_classes])

   instruction = f"""You are an expert art analyst specializing in emotional interpretation of visual images. Your task is to analyze diverse range of images and identify the single dominant emotion they evoke in a viewer.

Analyze the emotional content of this iamge and identify the single most dominant emotion it conveys or evokes.

Choose exactly one emotion from the following list:
['anger', 'excitement', 'sadness', 'fear', 'contentment', 'amusement', 'awe']

Consider the following when making your assessment:
- Color palette and tone (warm/cool, saturated/muted)
- Composition and visual tension
- Subject matter and imagery
- Brushwork or line quality (chaotic, gentle, rigid, etc.)
- Overall mood the artwork projects

Respond only with a JSON object in this exact format:
{{"emotion": "<emotion>"}}

Do not include any explanation, commentary, or additional fields.

   """

   return instruction

In [36]:
def build_output(row):
        
    return {"emotion": row.emotion}

In [37]:
df["instruction"] = df.apply(lambda x: generation_instruction(x), axis=1)

In [38]:
df["answer"] = df.apply(lambda x: build_output(x), axis=1) # type: ignore

In [39]:
print(df.iloc[0]['instruction'])

You are an expert art analyst specializing in emotional interpretation of visual artwork. Your task is to analyze paintings and drawings and identify the single dominant emotion they evoke in a viewer.

Analyze the emotional content of this artwork and identify the single most dominant emotion it conveys or evokes.

Choose exactly one emotion from the following list:
['Calm', 'Excited', 'Contentment', 'Frustrated', 'Sad', 'Aroused', 'Alarmed', 'Bored', 'Happy', 'Annoyed', 'Tired', 'Glad']

Consider the following when making your assessment:
- Color palette and tone (warm/cool, saturated/muted)
- Composition and visual tension
- Subject matter and imagery
- Brushwork or line quality (chaotic, gentle, rigid, etc.)
- Overall mood the artwork projects

Respond only with a JSON object in this exact format:
{"emotion": "<emotion>"}

Do not include any explanation, commentary, or additional fields.

   


In [40]:
print(df.iloc[0]['answer'])

{'emotion': 'anger'}


In [None]:
def build_jsonl_dataset(
    split,
    image_col="image",
    instruction_col="instruction",
    answer_col="answer",
    
):  
    output_path=f"emid_{split}.jsonl"
    records = []
    
    #df = df_train if split == "train" else df_test

    for _, row in df.iterrows():
        sample = {
            "messages": [
            {
                "content": "<image>" + row[instruction_col],
                "role": "user"
            },
            {
                "content": "" + str(row[answer_col]),
                "role": "assistant"
            },
            ],
            "images": [
            row[image_col]
            ]
        }
        records.append(sample)
        
    output_path = Path(output_path)
    with output_path.open("w", encoding="utf-8") as f:
        for r in records:
            f.write(json.dumps(r, ensure_ascii=False) + "\n")

    print(f"Saved {len(records)} samples to {output_path}")

In [None]:
build_jsonl_dataset(split="train")