In [2]:
import pandas as pd
import json

print("Setting up VSR-style dataset for spatial reasoning...")

vsr_sample_data = [
    {
        'image': 'image_001.jpg',
        'caption': 'The red ball is to the left of the blue box.',
        'label': 1,
        'spatial_relation': 'left'
    },
    {
        'image': 'image_002.jpg',
        'caption': 'The cat is sitting above the mat on the floor.',
        'label': 1,
        'spatial_relation': 'above'
    },
    {
        'image': 'image_003.jpg',
        'caption': 'The car is parked behind the large tree.',
        'label': 1,
        'spatial_relation': 'behind'
    },
    {
        'image': 'image_004.jpg',
        'caption': 'The bird is perched at the top of the building.',
        'label': 1,
        'spatial_relation': 'top'
    },
    {
        'image': 'image_005.jpg',
        'caption': 'The book is placed below the computer monitor.',
        'label': 1,
        'spatial_relation': 'below'
    },
    {
        'image': 'image_006.jpg',
        'caption': 'The dog is standing in front of the house door.',
        'label': 1,
        'spatial_relation': 'in front of'
    },
    {
        'image': 'image_007.jpg',
        'caption': 'The flower pot is positioned to the right of the window.',
        'label': 1,
        'spatial_relation': 'right'
    },
    {
        'image': 'image_008.jpg',
        'caption': 'The lamp is at the bottom of the staircase.',
        'label': 1,
        'spatial_relation': 'bottom'
    },
    {
        'image': 'image_009.jpg',
        'caption': 'The picture frame is above the fireplace and to the left of the clock.',
        'label': 1,
        'spatial_relation': 'above_left'
    },
    {
        'image': 'image_010.jpg',
        'caption': 'The coffee cup is on the table, in front of the laptop.',
        'label': 1,
        'spatial_relation': 'in front of'
    }
]

vsr_dataset = {
    'train': vsr_sample_data,
    'test': vsr_sample_data[:5]  
}

print(f"Dataset created successfully!")
print(f"Train examples: {len(vsr_dataset['train'])}")
print(f"Test examples: {len(vsr_dataset['test'])}")

print(f"\nDataset structure:")
print(f"Keys: {list(vsr_dataset.keys())}")

print(f"\nSample examples:")
for i in range(3):
    example = vsr_dataset['train'][i]
    print(f"\nExample {i+1}:")
    for key, value in example.items():
        print(f"  {key}: {value}")

print(f"\n" + "="*50)
print("VSR-style dataset is ready for spatial perturbation testing!")
print("You can now use: vsr_dataset['train'][0]['caption']")
print("="*50)

Setting up VSR-style dataset for spatial reasoning...
Dataset created successfully!
Train examples: 10
Test examples: 5

Dataset structure:
Keys: ['train', 'test']

Sample examples:

Example 1:
  image: image_001.jpg
  caption: The red ball is to the left of the blue box.
  label: 1
  spatial_relation: left

Example 2:
  image: image_002.jpg
  caption: The cat is sitting above the mat on the floor.
  label: 1
  spatial_relation: above

Example 3:
  image: image_003.jpg
  caption: The car is parked behind the large tree.
  label: 1
  spatial_relation: behind

VSR-style dataset is ready for spatial perturbation testing!
You can now use: vsr_dataset['train'][0]['caption']


In [4]:
import re

def perturb_spatial_words(sentence):
    """
    Perturbs spatial words in a sentence by swapping them with their opposites.

    Swaps:
    - left ↔ right
    - above ↔ below
    - top ↔ bottom
    - in front of ↔ behind

    Args:
        sentence (str): Input sentence containing spatial descriptions

    Returns:
        str: Sentence with spatial words swapped
    """

    spatial_swaps = {
        'left': 'right',
        'right': 'left',
        'above': 'below',
        'below': 'above',
        'top': 'bottom',
        'bottom': 'top',
        'in front of': 'behind',
        'behind': 'in front of'
    }

    perturbed_sentence = sentence.lower()

    for original, replacement in spatial_swaps.items():
        if len(original.split()) > 1: 
           
            pattern = r'\b' + re.escape(original) + r'\b'
            perturbed_sentence = re.sub(pattern, f"__TEMP_{replacement.replace(' ', '_')}__",
                                     perturbed_sentence, flags=re.IGNORECASE)


    for original, replacement in spatial_swaps.items():
        if len(original.split()) == 1: 
            pattern = r'\b' + re.escape(original) + r'\b'
            perturbed_sentence = re.sub(pattern, f"__TEMP_{replacement}__",
                                     perturbed_sentence, flags=re.IGNORECASE)

    perturbed_sentence = re.sub(r'__TEMP_([^_]+(?:_[^_]+)*)__',
                               lambda m: m.group(1).replace('_', ' '),
                               perturbed_sentence)

    words_original = sentence.split()
    words_perturbed = perturbed_sentence.split()

    final_words = []
    for i, (orig, pert) in enumerate(zip(words_original, words_perturbed)):
        if orig.isupper():
            final_words.append(pert.upper())
        elif orig.istitle():
            final_words.append(pert.capitalize())
        else:
            final_words.append(pert)

    return ' '.join(final_words)

def test_perturbation():
    """Test the perturbation function with various examples"""

    test_sentences = [
        "The cat is to the left of the dog.",
        "The book is above the table and below the shelf.",
        "The car is in front of the house.",
        "The bird is at the top of the tree.",
        "Turn right at the corner, then go left.",
        "The ball is behind the chair and above the floor.",
        "In front of the building, there is a tree on the left side."
    ]

    print("=== Spatial Word Perturbation Test ===\n")

    for i, sentence in enumerate(test_sentences, 1):
        perturbed = perturb_spatial_words(sentence)
        print(f"Test {i}:")
        print(f"Original:   {sentence}")
        print(f"Perturbed:  {perturbed}")
        print()

test_perturbation()

print("=== Example with VSR Dataset ===")
print("To use with VSR dataset caption:")
print("perturbed_caption = perturb_spatial_words(vsr_dataset['train'][0]['caption'])")
print("\nThis function is ready to be applied to any caption from the VSR dataset!")

=== Spatial Word Perturbation Test ===

Test 1:
Original:   The cat is to the left of the dog.
Perturbed:  The cat is to the right of the dog.

Test 2:
Original:   The book is above the table and below the shelf.
Perturbed:  The book is below the table and above the shelf.

Test 3:
Original:   The car is in front of the house.
Perturbed:  The car is behind the house.

Test 4:
Original:   The bird is at the top of the tree.
Perturbed:  The bird is at the bottom of the tree.

Test 5:
Original:   Turn right at the corner, then go left.
Perturbed:  Turn left at the corner, then go right.

Test 6:
Original:   The ball is behind the chair and above the floor.
Perturbed:  The ball is in front of the chair and below

Test 7:
Original:   In front of the building, there is a tree on the left side.
Perturbed:  Behind the building, there is a tree on the right side.

=== Example with VSR Dataset ===
To use with VSR dataset caption:
perturbed_caption = perturb_spatial_words(vsr_dataset['train'][0][