#Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os

# Go to the DAAM folder in Google Drive
folder_path = '/content/drive/My Drive/Colab Notebooks/Stable diffusion/Logical operators'
os.chdir(folder_path)
print("Current working directory: ", os.getcwd())

Current working directory:  /content/drive/My Drive/Colab Notebooks/Stable diffusion/Logical operators


In [None]:
!pip install -r requirements.txt

In [None]:
import torch

In [None]:
import json

# Load the Hugging Face token from a JSON file
def load_hf_token(json_path):
    with open(json_path, 'r') as json_file:
        data = json.load(json_file)
        return data['hf_token']

hf_token = load_hf_token('config.json')

In [None]:
from diffusers import StableDiffusionPipeline
pipe = StableDiffusionPipeline.from_pretrained(
    'stabilityai/stable-diffusion-2-1-base',
    revision="main",
    torch_dtype=torch.float16,
    use_auth_token=hf_token
).to('cuda')

#And

##Load the dataframe

In [8]:
os.chdir('and')

# Verify the current working directory
print("Current working directory: ", os.getcwd())

Current working directory:  /content/drive/MyDrive/Colab Notebooks/Stable diffusion/Logical operators/and


In [None]:
import pandas as pd
df = pd.read_csv('and_generated_sentences.csv')

In [None]:
# Create a new index column that uniquely identifies each row
df['id'] = range(1, len(df) + 1)

# Set the new index column as the first column
cols = df.columns.tolist()  # Get a list of all columns
cols = cols[-1:] + cols[:-1]  # Move the last column (new index) to the first position
df = df[cols]  # Reassign sorted column DataFrame to df

print(df.head())

   id                                           sentence conjunction  \
0   1  A little girl and her dog are playing in the p...         and   
1   2  An astronaut and a satellite are floating in s...         and   
2   3  A ship and a lighthouse stand against the back...         and   
3   4  A butterfly and a flower are in a vibrant meadow.         and   
4   5  A knight and a dragon are locked in a fierce b...         and   

    first_noun second_noun  
0  Little girl     Her dog  
1    Astronaut   Satellite  
2         Ship  Lighthouse  
3    Butterfly      Flower  
4       Knight      Dragon  


##Generate images

In [None]:
from tqdm import tqdm
from pathlib import Path
from PIL import Image

def generate_images(df, start_idx=0, batch_size=2):
    print("Starting the image generation process.")
    total_batches = (len(df) - start_idx) // batch_size + ((len(df) - start_idx) % batch_size > 0)
    output_folder = Path('images')
    output_folder.mkdir(exist_ok=True)

    print(f"Total number of batches to process: {total_batches}")
    for batch_num in range(total_batches):
        start = start_idx + batch_num * batch_size
        end = min(start + batch_size, len(df))
        batch_df = df.iloc[start:end]
        print(f"Processing batch {batch_num + 1}/{total_batches} - from index {start} to {end}")

        for _, row in tqdm(batch_df.iterrows(), total=len(batch_df), desc=f'Batch {batch_num+1}/{total_batches}'):
            id, sentence = row['id'], row['sentence']
            print(f"Generating image for ID {id} with prompt: {sentence}")
            try:
                gen = torch.Generator().manual_seed(id)
                with torch.no_grad():
                    # Generate the image
                    out = pipe(sentence, num_inference_steps=50, generator=gen)
                # Save the image
                img = out.images[0]
                img_path = output_folder / f'{id}.png'
                img.save(img_path)
                print(f"Image saved to {img_path}")
                torch.cuda.empty_cache()
            except Exception as e:
                print(f"Error processing ID {id}: {e}")

        # Save progress after each batch
        with open('progress.txt', 'w') as f:
            f.write(str(end))
        print(f"Progress saved, batch {batch_num + 1} complete.")

# Check for existing progress and resume
try:
    with open('progress.txt', 'r') as f:
        start_index = int(f.read().strip())
        print(f"Resuming from index {start_index}")
except FileNotFoundError:
    start_index = 0
    print("Starting from the beginning")

generate_images(df, start_idx=start_index)

##Answer questions about images

In [None]:
!pip install pillow



In [None]:
from PIL import Image
from transformers import pipeline
from pathlib import Path

# Load the visual question-answering model
vqa_pipeline = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa", use_auth_token=hf_token)

def detect_object_in_image(image_path, object_name):
    image = Image.open(image_path)
    question = f"Is there a {object_name} in this picture?"

    try:
        response = vqa_pipeline(image, question, top_k=1)
        print("Pipeline response:", response)

        answer = response[0]['answer'].lower() if isinstance(response, list) else response['answer'].lower()

        if 'yes' in answer or 'there is' in answer:
            return True
        else:
            return False
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return False

In [None]:
def update_df_with_image_analysis(df):
    df['first_noun_presence'] = False
    df['second_noun_presence'] = False

    for index, row in df.iterrows():
        image_path = Path('images') / f"{row['id']}.png"
        if image_path.exists():
            df.at[index, 'first_noun_presence'] = detect_object_in_image(image_path, row['first_noun'])
            df.at[index, 'second_noun_presence'] = detect_object_in_image(image_path, row['second_noun'])
        else:
            print(f"No image found for ID {row['id']}")

update_df_with_image_analysis(df)

In [None]:
print(df.head())

   id                                           sentence conjunction  \
0   1  A little girl and her dog are playing in the p...         and   
1   2  An astronaut and a satellite are floating in s...         and   
2   3  A ship and a lighthouse stand against the back...         and   
3   4  A butterfly and a flower are in a vibrant meadow.         and   
4   5  A knight and a dragon are locked in a fierce b...         and   

    first_noun second_noun  first_noun_presence  second_noun_presence  
0  Little girl     Her dog                 True                  True  
1    Astronaut   Satellite                 True                  True  
2         Ship  Lighthouse                 True                  True  
3    Butterfly      Flower                 True                  True  
4       Knight      Dragon                 True                  True  


In [None]:
df['answer_check'] = df.apply(lambda x: x['first_noun_presence'] and x['second_noun_presence'], axis=1)

In [None]:
print(df.head())

   id                                           sentence conjunction  \
0   1  A little girl and her dog are playing in the p...         and   
1   2  An astronaut and a satellite are floating in s...         and   
2   3  A ship and a lighthouse stand against the back...         and   
3   4  A butterfly and a flower are in a vibrant meadow.         and   
4   5  A knight and a dragon are locked in a fierce b...         and   

    first_noun second_noun  first_noun_presence  second_noun_presence  \
0  Little girl     Her dog                 True                  True   
1    Astronaut   Satellite                 True                  True   
2         Ship  Lighthouse                 True                  True   
3    Butterfly      Flower                 True                  True   
4       Knight      Dragon                 True                  True   

   answer_check  
0          True  
1          True  
2          True  
3          True  
4          True  


In [None]:
df.to_csv("and_updated_dataframe.csv", index=False)

In [None]:
# Calculate the percentage of True values in the 'answer_check' column
percentage_true = (df['answer_check'].sum() / len(df['answer_check'])) * 100

# Output the percentage
print(f"The percentage of True values is: {percentage_true:.2f}%")

The percentage of True values is: 68.00%


#Or

##Load the dataframe

In [10]:
folder_path = '/content/drive/My Drive/Colab Notebooks/Stable diffusion/Logical operators'
os.chdir(folder_path)

In [11]:
os.chdir('or')

# Verify the current working directory
print("Current working directory: ", os.getcwd())

Current working directory:  /content/drive/My Drive/Colab Notebooks/Stable diffusion/Logical operators/or


In [None]:
import pandas as pd
df = pd.read_csv('or_generated_sentences.csv')

In [None]:
# Create a new index column that uniquely identifies each row
df['id'] = range(1, len(df) + 1)

# Set the new index column as the first column
cols = df.columns.tolist()  # Get a list of all columns
cols = cols[-1:] + cols[:-1]  # Move the last column (new index) to the first position
df = df[cols]  # Reassign sorted column DataFrame to df

print(df.head())

   id                                           sentence conjunction  \
0   1    A girl is holding a red balloon or a blue kite.          or   
1   2  An old man is feeding pigeons or reading a new...          or   
2   3  A child is building a sandcastle or collecting...          or   
3   4  A woman is watering her plants or hanging laun...          or   
4   5  A cat is playing with a ball of yarn or sleepi...          or   

                    first_noun                second_noun  
0                  Red balloon                  Blue kite  
1              Feeding pigeons        Reading a newspaper  
2        Building a sandcastle       Collecting seashells  
3              Watering plants            Hanging laundry  
4  Playing with a ball of yarn  Sleeping on a comfy chair  


##Generate images

In [None]:
from tqdm import tqdm
from pathlib import Path
from PIL import Image

def generate_images(df, start_idx=0, batch_size=2):
    print("Starting the image generation process.")
    total_batches = (len(df) - start_idx) // batch_size + ((len(df) - start_idx) % batch_size > 0)
    output_folder = Path('images')
    output_folder.mkdir(exist_ok=True)

    print(f"Total number of batches to process: {total_batches}")
    for batch_num in range(total_batches):
        start = start_idx + batch_num * batch_size
        end = min(start + batch_size, len(df))
        batch_df = df.iloc[start:end]
        print(f"Processing batch {batch_num + 1}/{total_batches} - from index {start} to {end}")

        for _, row in tqdm(batch_df.iterrows(), total=len(batch_df), desc=f'Batch {batch_num+1}/{total_batches}'):
            id, sentence = row['id'], row['sentence']
            print(f"Generating image for ID {id} with prompt: {sentence}")
            try:
                gen = torch.Generator().manual_seed(id)
                with torch.no_grad():
                    # Generate the image
                    out = pipe(sentence, num_inference_steps=50, generator=gen)
                # Save the image
                img = out.images[0]
                img_path = output_folder / f'{id}.png'
                img.save(img_path)
                print(f"Image saved to {img_path}")
                torch.cuda.empty_cache()
            except Exception as e:
                print(f"Error processing ID {id}: {e}")

        # Save progress after each batch
        with open('progress.txt', 'w') as f:
            f.write(str(end))
        print(f"Progress saved, batch {batch_num + 1} complete.")

# Check for existing progress and resume
try:
    with open('progress.txt', 'r') as f:
        start_index = int(f.read().strip())
        print(f"Resuming from index {start_index}")
except FileNotFoundError:
    start_index = 0
    print("Starting from the beginning")

generate_images(df, start_idx=start_index)

##Answer questions about images

In [None]:
!pip install pillow



In [None]:
from PIL import Image
from transformers import pipeline
from pathlib import Path

# Load the visual question-answering model
vqa_pipeline = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa", use_auth_token=hf_token)

def detect_object_in_image(image_path, object_name):
    image = Image.open(image_path)
    question = f"Is there a {object_name} in this picture?"

    try:
        response = vqa_pipeline(image, question, top_k=1)
        print("Pipeline response:", response)

        answer = response[0]['answer'].lower() if isinstance(response, list) else response['answer'].lower()

        if 'yes' in answer or 'there is' in answer:
            return True
        else:
            return False
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return False

In [None]:
def update_df_with_image_analysis(df):
    df['first_noun_presence'] = False
    df['second_noun_presence'] = False

    for index, row in df.iterrows():
        image_path = Path('images') / f"{row['id']}.png"
        if image_path.exists():
            df.at[index, 'first_noun_presence'] = detect_object_in_image(image_path, row['first_noun'])
            df.at[index, 'second_noun_presence'] = detect_object_in_image(image_path, row['second_noun'])
        else:
            print(f"No image found for ID {row['id']}")

update_df_with_image_analysis(df)

In [None]:
print(df.head())

   id                                           sentence conjunction  \
0   1    A girl is holding a red balloon or a blue kite.          or   
1   2  An old man is feeding pigeons or reading a new...          or   
2   3  A child is building a sandcastle or collecting...          or   
3   4  A woman is watering her plants or hanging laun...          or   
4   5  A cat is playing with a ball of yarn or sleepi...          or   

                    first_noun                second_noun  \
0                  Red balloon                  Blue kite   
1              Feeding pigeons        Reading a newspaper   
2        Building a sandcastle       Collecting seashells   
3              Watering plants            Hanging laundry   
4  Playing with a ball of yarn  Sleeping on a comfy chair   

   first_noun_presence  second_noun_presence  answer_check  
0                 True                  True         False  
1                 True                  True         False  
2                

In [None]:
df['answer_check'] = df.apply(lambda x: x['first_noun_presence'] ^ x['second_noun_presence'], axis=1)

In [None]:
print(df.head())

   id                                           sentence conjunction  \
0   1    A girl is holding a red balloon or a blue kite.          or   
1   2  An old man is feeding pigeons or reading a new...          or   
2   3  A child is building a sandcastle or collecting...          or   
3   4  A woman is watering her plants or hanging laun...          or   
4   5  A cat is playing with a ball of yarn or sleepi...          or   

                    first_noun                second_noun  \
0                  Red balloon                  Blue kite   
1              Feeding pigeons        Reading a newspaper   
2        Building a sandcastle       Collecting seashells   
3              Watering plants            Hanging laundry   
4  Playing with a ball of yarn  Sleeping on a comfy chair   

   first_noun_presence  second_noun_presence  answer_check  
0                 True                  True         False  
1                 True                  True         False  
2                

In [None]:
df.to_csv("or_updated_dataframe.csv", index=False)

In [None]:
# Calculate the percentage of True values in the 'answer_check' column
percentage_true = (df['answer_check'].sum() / len(df['answer_check'])) * 100

# Output the percentage
print(f"The percentage of True values is: {percentage_true:.2f}%")

The percentage of True values is: 36.00%


#Negation

##Load the dataframe

In [12]:
folder_path = '/content/drive/My Drive/Colab Notebooks/Stable diffusion/Logical operators'
os.chdir(folder_path)

In [13]:
os.chdir('negation')

# Verify the current working directory
print("Current working directory: ", os.getcwd())

Current working directory:  /content/drive/My Drive/Colab Notebooks/Stable diffusion/Logical operators/negation


In [None]:
import pandas as pd
df = pd.read_csv('negation_generated_sentences.csv')

In [None]:
# Create a new index column that uniquely identifies each row
df['id'] = range(1, len(df) + 1)

# Set the new index column as the first column
cols = df.columns.tolist()  # Get a list of all columns
cols = cols[-1:] + cols[:-1]  # Move the last column (new index) to the first position
df = df[cols]  # Reassign sorted column DataFrame to df

print(df.head())

   id                                           sentence negation negated_noun
0   1                 The tree does not have any leaves.      Not       Leaves
1   2                 There is no moon in the night sky.       No         Moon
2   3                The room is not filled with people.      Not       People
3   4              There isn't a single bird in the sky.    Isn't         Bird
4   5  The playground is empty, no children are playing.       No     Children


##Generate images

In [None]:
from tqdm import tqdm
from pathlib import Path
from PIL import Image

def generate_images(df, start_idx=0, batch_size=2):
    print("Starting the image generation process.")
    total_batches = (len(df) - start_idx) // batch_size + ((len(df) - start_idx) % batch_size > 0)
    output_folder = Path('images')
    output_folder.mkdir(exist_ok=True)

    print(f"Total number of batches to process: {total_batches}")
    for batch_num in range(total_batches):
        start = start_idx + batch_num * batch_size
        end = min(start + batch_size, len(df))
        batch_df = df.iloc[start:end]
        print(f"Processing batch {batch_num + 1}/{total_batches} - from index {start} to {end}")

        for _, row in tqdm(batch_df.iterrows(), total=len(batch_df), desc=f'Batch {batch_num+1}/{total_batches}'):
            id, sentence = row['id'], row['sentence']
            print(f"Generating image for ID {id} with prompt: {sentence}")
            try:
                gen = torch.Generator().manual_seed(id)
                with torch.no_grad():
                    # Generate the image
                    out = pipe(sentence, num_inference_steps=50, generator=gen)
                # Save the image
                img = out.images[0]
                img_path = output_folder / f'{id}.png'
                img.save(img_path)
                print(f"Image saved to {img_path}")
                torch.cuda.empty_cache()
            except Exception as e:
                print(f"Error processing ID {id}: {e}")

        # Save progress after each batch
        with open('progress.txt', 'w') as f:
            f.write(str(end))
        print(f"Progress saved, batch {batch_num + 1} complete.")

# Check for existing progress and resume
try:
    with open('progress.txt', 'r') as f:
        start_index = int(f.read().strip())
        print(f"Resuming from index {start_index}")
except FileNotFoundError:
    start_index = 0
    print("Starting from the beginning")

generate_images(df, start_idx=start_index)

##Answer questions about images

In [None]:
!pip install pillow



In [None]:
from PIL import Image
from transformers import pipeline
from pathlib import Path

# Load the visual question-answering model
vqa_pipeline = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa", use_auth_token=hf_token)

def detect_object_in_image(image_path, object_name):
    image = Image.open(image_path)
    question = f"Is there a {object_name} in this picture?"

    try:
        response = vqa_pipeline(image, question, top_k=1)
        print("Pipeline response:", response)

        answer = response[0]['answer'].lower() if isinstance(response, list) else response['answer'].lower()

        if 'yes' in answer or 'there is' in answer:
            return True
        else:
            return False
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return False

In [None]:
def update_df_with_image_analysis(df):
    df['negated_noun_presence'] = False

    for index, row in df.iterrows():
        image_path = Path('images') / f"{row['id']}.png"
        if image_path.exists():
            df.at[index, 'negated_noun_presence'] = detect_object_in_image(image_path, row['negated_noun'])
        else:
            print(f"No image found for ID {row['id']}")

update_df_with_image_analysis(df)

In [None]:
print(df.head())

   id                                           sentence negation  \
0   1                 The tree does not have any leaves.      Not   
1   2                 There is no moon in the night sky.       No   
2   3                The room is not filled with people.      Not   
3   4              There isn't a single bird in the sky.    Isn't   
4   5  The playground is empty, no children are playing.       No   

  negated_noun  negated_noun_presence  
0       Leaves                  False  
1         Moon                   True  
2       People                  False  
3         Bird                   True  
4     Children                  False  


In [None]:
df['answer_check'] = df.apply(lambda x: not x['negated_noun_presence'], axis=1)

In [None]:
print(df.head())

   id                                           sentence negation  \
0   1                 The tree does not have any leaves.      Not   
1   2                 There is no moon in the night sky.       No   
2   3                The room is not filled with people.      Not   
3   4              There isn't a single bird in the sky.    Isn't   
4   5  The playground is empty, no children are playing.       No   

  negated_noun  negated_noun_presence  answer_check  
0       Leaves                  False          True  
1         Moon                   True         False  
2       People                  False          True  
3         Bird                   True         False  
4     Children                  False          True  


In [None]:
df.to_csv("negation_updated_dataframe.csv", index=False)

In [None]:
# Calculate the percentage of True values in the 'answer_check' column
percentage_true = (df['answer_check'].sum() / len(df['answer_check'])) * 100

# Output the percentage
print(f"The percentage of True values is: {percentage_true:.2f}%")

The percentage of True values is: 40.00%
