In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation.utils import GenerationConfig
import pandas as pd
from tqdm import tqdm
import ast

from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch

# Data Augmentation Using LLMs

In this notebook, data augmentation using llms is explored and shown how to do this with an instruction based model.

Paper:

* Shorten, C., Khoshgoftaar, T. M., & Furht, B. (2021). Text data augmentation for deep learning. Journal of big Data, 8, 1-34.

## Loading model

In [2]:
torch.cuda.is_available()

True

In [3]:
model = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    trust_remote_code=True,
    device_map="auto",
)





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Augmentation

In [12]:
emotions = ["happy", "sad", "power"]

for emotion in emotions:
    # The formulation of the prompt is the most important, what helps:
    # Putting " at the end, expecting or the data also in ''.
    # Putting clear instructions / commands in the prompt.
    sequences = pipeline(
           f"Given the emotion '{emotion}'. Reformulate this sentence: 'This music makes me {emotion}'. Reformulation: \"",
            max_length=200,
            do_sample=True,
            top_k=10,
            num_return_sequences=10,
            eos_token_id=tokenizer.eos_token_id,
    )
    print(emotion)
    for seq in sequences:
        try:
            name = seq['generated_text'].split("Reformulation: ")[-1].split("\n")[0].split("\"")[1]
        
            print("-", name)
        except:
            print(seq)
    print("-")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


happy
- The melody brings a sense joy and contentment to me.
- The melody brings joy to my soul.
- The cheerful melody brings a smile to my face.
- The melodious tunes this music carries evoke feelings of joy and delight within me
- This music brings me joy.
- The melody brings me joy.
- This piece of music causes an emotional response of happiness in me.
- The melody brings a feeling of joy to me.
- The melody of this song fills me with joy and contentment
- The pleasant melodies evoke a feeling of happiness within me.
-


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


sad
- The music I am listening to currently evokes a feeling of sadness in me.
- This music evokes feelings of sadness in me.
- The music I am listening to is causing sorrow.
- The melodies evoke feelings of sadness within me.
- The melody evokes feelings of sadness within me
- The music is causing me to feel unhappy.
- The music I am currently listening to is causing me emotional distress.
- The music I just heard left me feeling melancholic.
- The music evokes feelings of sadness within me.
- This music evokes feelings of sadness within me.
-
power
- This music brings me joy and a sense of power.
- This music elates me and gives me strength.
- The upbeat melodies of this music provide me with a surge of happiness and energy.
- Listening to this music gives me an emotional boost and a sense of empowerment.
- The upbeat tunes uplift the mood, providing me with a surge of power.
- The music brings me joy and happiness.
- The music elates me and provides me with a feeling of power.
- The

In [13]:
emotions = ["happy", "sad", "power"]

for emotion in emotions:
    sequences = pipeline(
           f"Give different sentences of the same meaning: 'This music track makes me {emotion}'. Sentences: \"",
            max_length=200,
            do_sample=True,
            top_k=10,
            num_return_sequences=10,
            eos_token_id=tokenizer.eos_token_id,
    )
    print(emotion)
    for seq in sequences:
        try:
            name = seq['generated_text'].split("Reformulation: ")[-1].split("\n")[0].split("\"")[1]
        
            print("-", name)
        except:
            print(seq)
    print("-")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


happy
- Listening to this music track fills me with joy and elation.
- The upbeat melody is making my day,
- Listening to this music makes me happy
- This particular track makes me feel joyful.
- Listening to this music makes me happy
- This tune brings a smile to my face.
- This music brings me joy
- This music track brings me joy
- This tune brings me joy,
- This music track brings me joy
-


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


sad
- The melancholic melody evokes feelings of sadness within me.
- This melancholic melody brings me down
- This piece of music brings me down.
- This music track is depressing me
- The melody evokes an feeling of melancholy.
- Listening to this music makes me melancholy,
- This melancholy melody is a downer for me.
- This music track brings tears to my eyes
- I'm not in the mood for this music track right now,
- This piece of music evokes a feeling of sadness in me.
-
power
- The music track is empowering me.
- This music track is energizing
- This music track gives me power.
- This music makes me feel powerful.
- This music track gives me strength,
- This music track empowers me
- This music track invigorates me.
- This song gives me a sense of power.
- This music track invigorates me.
- The music is empowering me.
-


In [6]:
post = "Emotional metal with melodic breaks"

sequences = pipeline(
    f"Analyze this post title: '{post}' on basis of intention. Intent: \"",
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=10,
    eos_token_id=tokenizer.eos_token_id,
)
print(post)
for seq in sequences:
    try:
        name = seq['generated_text'].split("Reformulation: ")[-1].split("\n")[0].split("\"")[1]
        
        print("-", name)
    except:
        print(seq)
print("-")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Emotional metal with melodic breaks
- to create intense, emotional songs.
- I'm not a big fan of this genre. 
- I wanted to compose a melodic piece that had a deep emotional impact.
- I need help choosing an appropriate song for my emotional metal project
- Emotional
- Metal
- to emotionally engage the listener by combining metal with melodic elements
- A metal post
- I want to write about music that evokes strong emotions.
- to evoke strong emotions
-
