# Set up

### Installations

In [1]:
!pip install transformers

!pip install openpyxl==3.1.2

!pip install requests


### Load model and processor

In [2]:
import transformers
from transformers import AutoProcessor, Blip2ForConditionalGeneration
import torch

processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
# by default `from_pretrained` loads the weights in float32 - keeping float32, else errors in text generation below
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float32)

In [3]:
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

### Functions for image paths

In [1]:
##Functions to get dataset image paths and add them to a list
import os

## AI DESIGNS

# One-Pieces
def get_onepieces(folder_onepieces, extensions=['.png', '.jpg', '.jpeg']):
    return [os.path.join(folder_onepieces, f) for f in os.listdir(folder_onepieces) if os.path.splitext(f)[1].lower() in extensions]

folder_onepieces = '/work/AI_Designs/AI_Designs_OnePiece'
list_onepiece = get_onepieces(folder_onepieces)

# Skirts
def get_skirts(folder_skirts, extensions=['.png', '.jpg', '.jpeg']):
    return [os.path.join(folder_skirts, f) for f in os.listdir(folder_skirts) if os.path.splitext(f)[1].lower() in extensions]

folder_skirts = '/work/AI_Designs/AI_Designs_BarbieSkirt'
list_skirts = get_skirts(folder_skirts)

#Snowsuit
def get_snowsuits(folder_snowsuits, extensions=['.png', '.jpg', '.jpeg']):
    return [os.path.join(folder_snowsuits, f) for f in os.listdir(folder_snowsuits) if os.path.splitext(f)[1].lower() in extensions]

folder_snowsuits = '/work/AI_Designs/AI_Designs_Snowsuit'
list_snowsuits = get_snowsuits(folder_snowsuits)

#Tops
def get_tops(folder_tops, extensions=['.png', '.jpg', '.jpeg']):
    return [os.path.join(folder_tops, f) for f in os.listdir(folder_tops) if os.path.splitext(f)[1].lower() in extensions]

folder_tops = '/work/AI_Designs/AI_Designs_Top'
list_tops = get_tops(folder_tops)

## BASELINE IMAGES - 1000

# One-Pieces
def get_b1000_onepieces(folder_b1000_onepieces, extensions=['.png', '.jpg', '.jpeg']):
    return [os.path.join(folder_b1000_onepieces, f) for f in os.listdir(folder_b1000_onepieces) if os.path.splitext(f)[1].lower() in extensions]

folder_b1000_onepieces = '/work/Baseline_OnePiece_ONE' 
list_b1000_onepiece = get_b1000_onepieces(folder_b1000_onepieces)

# Skirts
def get_b1000_skirts(folder_b1000_skirts, extensions=['.png', '.jpg', '.jpeg']):
    return [os.path.join(folder_b1000_skirts, f) for f in os.listdir(folder_b1000_skirts) if os.path.splitext(f)[1].lower() in extensions]

folder_b1000_skirts = '/work/Baseline_Skirt_ONE'
list_b1000_skirts = get_b1000_skirts(folder_b1000_skirts)

#Snowsuit
def get_b1000_snowsuits(folder_b1000_snowsuits, extensions=['.png', '.jpg', '.jpeg']):
    return [os.path.join(folder_b1000_snowsuits, f) for f in os.listdir(folder_b1000_snowsuits) if os.path.splitext(f)[1].lower() in extensions]

folder_b1000_snowsuits = '/work/Baseline_Snowsuit_ONE'
list_b1000_snowsuits = get_b1000_snowsuits(folder_b1000_snowsuits)

#Tops
def get_b1000_tops(folder_b1000_tops, extensions=['.png', '.jpg', '.jpeg']):
    return [os.path.join(folder_b1000_tops, f) for f in os.listdir(folder_b1000_tops) if os.path.splitext(f)[1].lower() in extensions]

folder_b1000_tops = '/work/Baseline_Oberteil_ONE'
list_b1000_tops = get_b1000_tops(folder_b1000_tops)


# Content Novelty

## Load model and processor

In [4]:
import transformers
from transformers import AutoProcessor, Blip2ForConditionalGeneration
import torch
import pandas as pd
import numpy as np
from PIL import Image
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float32)

## Define function for generating text description

In [None]:
from PIL import Image
import torch
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
import numpy as np
from transformers import BertTokenizer, BertModel

# Reuse function to generate text from images
def generate_text_from_images(image_list, processor, model, device, max_new_tokens=90, min_new_tokens=60):
    results = []
    for image_path in image_list:
        # Load and preprocess image
        image = Image.open(skirt).convert('RGB')
        inputs = processor(image, return_tensors="pt").to(device)

        # Generate text
        with torch.cuda.amp.autocast(enabled=False):  # Disabling autocasting for now
            generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens, min_new_tokens=min_new_tokens)

        # Decode generated text
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
        results.append(generated_text)
    
    return results

# Initialize BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

#Get Text Embeddings
def get_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = bert_model(**inputs)
    # Use the mean of the last hidden state as the sentence embedding
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
    return embeddings

# Reuse processing function
def process_and_save(image_list, image_category, processor, model, device, output_file_prefix):
    # Generate descriptions
    generated_texts = generate_text_from_images(image_list, processor, model, device)
    df = pd.DataFrame({"Image": image_list, "Generated Text": generated_texts})
    
    # Save descriptions to Excel
    output_file = f"{text_descrtiptions}_{image_category}.xlsx"
    df.to_excel(output_file, index=False)
    print(f"Generated texts saved to {output_file}")

    # Compute embeddings
    descriptions = df["Generated Text"].tolist()
    embeddings = [get_embeddings(desc) for desc in descriptions]
    
    return df, embeddings    

In [None]:
##Call function to generate text from images

## AI DESIGNS

#One-Piece
tops_df, tops_embeddings = process_and_save(
    list_tops, "tops", processor, model, device, "generated_texts"
)

#Skirt
skirts_df, skirts_embeddings = process_and_save(
    list_skirts, "skirts", processor, model, device, "generated_texts"
)

#Snowsuit
snowsuits_df, tops_embeddings = process_and_save(
    list_snowsuits, "snowsuits", processor, model, device, "generated_texts"
)

#Tops
tops_df, tops_embeddings = process_and_save(
    list_tops, "tops", processor, model, device, "generated_texts"
)

## BASELINE IMAGES

#One-Piece
tops_b_df, tops_embeddings = process_and_save(
    list_b1000_tops, "b_tops", processor, model, device, "generated_texts"
)

#Skirt
skirts_df, skirts_embeddings = process_and_save(
    list_b1000_skirts, "b_skirts", processor, model, device, "generated_texts"
)

#Snowsuit
snowsuits_df, tops_embeddings = process_and_save(
    list_b1000_snowsuits, "b_snowsuits", processor, model, device, "generated_texts"
)

#Tops
tops_df, tops_embeddings = process_and_save(
    list_b1000_tops, "b_tops", processor, model, device, "generated_texts"
)

## Calculate Content Novelty

### One-Pieces

In [None]:
from PIL import Image
import torch
import pandas as pd
from transformers import BertTokenizer, BertModel

# Define function to process and generate text from images with improved parameters
def generate_text_from_images(list_onepiece, processor, model, device, max_new_tokens=60, min_new_tokens=40, temperature=0.7, top_p=0.7, num_beams=4):
    results = []
    for onepiece in list_onepiece:
        # Load and preprocess image
        image = Image.open(onepiece).convert('RGB')
        prompt = "This image features"
        inputs = processor(image, text=prompt, return_tensors="pt").to(device)

        # Generate text using beam search, temperature, and nucleus sampling
        with torch.no_grad():
            generated_ids = model.generate(
                **inputs, 
                max_new_tokens=max_new_tokens, 
                min_new_tokens=min_new_tokens, 
                temperature=temperature, 
                top_p=top_p, 
                num_beams=num_beams
            )

        # Decode generated text
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
        results.append(generated_text)
    
    return results

# Initialize BERT tokenizer and model for embeddings
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

def get_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = bert_model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
    return embeddings

# Process images
texts = generate_text_from_images(list_onepiece, processor, model, device)
df = pd.DataFrame({"Image": list_onepiece, "Generated Text": texts})

# Save descriptions to Excel
df.to_excel("generated_texts_onepiece.xlsx", index=False)


### Skirts

In [8]:
from PIL import Image
import torch
import pandas as pd
from transformers import BertTokenizer, BertModel

# Define function to process and generate text from images with improved parameters
def generate_text_from_images(list_skirts, processor, model, device, max_new_tokens=60, min_new_tokens=40, temperature=0.7, top_p=0.7, num_beams=4):
    results = []
    for skirt in list_skirts:
        # Load and preprocess image
        image = Image.open(skirt).convert('RGB')
        prompt = "This image features"
        inputs = processor(image, text=prompt, return_tensors="pt").to(device)

        # Generate text using beam search, temperature, and nucleus sampling
        with torch.no_grad():
            generated_ids = model.generate(
                **inputs, 
                max_new_tokens=max_new_tokens, 
                min_new_tokens=min_new_tokens, 
                temperature=temperature, 
                top_p=top_p, 
                num_beams=num_beams
            )

        # Decode generated text
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
        results.append(generated_text)
    
    return results

# Initialize BERT tokenizer and model for embeddings
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

def get_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = bert_model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
    return embeddings

# Process images
texts = generate_text_from_images(list_skirts, processor, model, device)
df = pd.DataFrame({"Image": list_skirts, "Generated Text": texts})

# Save descriptions to Excel
df.to_excel("generated_texts_skirts.xlsx", index=False)


### Snowsuits

In [None]:
from PIL import Image
import torch
import pandas as pd
from transformers import BertTokenizer, BertModel

# Define function to process and generate text from images with improved parameters
def generate_text_from_images(list_snowsuits, processor, model, device, max_new_tokens=60, min_new_tokens=40, temperature=0.7, top_p=0.7, num_beams=4):
    results = []
    for snowsuit in list_snowsuits:
        # Load and preprocess image
        image = Image.open(snowsuit).convert('RGB')
        prompt = "This image features"
        inputs = processor(image, text=prompt, return_tensors="pt").to(device)

        # Generate text using beam search, temperature, and nucleus sampling
        with torch.no_grad():
            generated_ids = model.generate(
                **inputs, 
                max_new_tokens=max_new_tokens, 
                min_new_tokens=min_new_tokens, 
                temperature=temperature, 
                top_p=top_p, 
                num_beams=num_beams
            )

        # Decode generated text
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
        results.append(generated_text)
    
    return results

# Initialize BERT tokenizer and model for embeddings
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

def get_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = bert_model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
    return embeddings

# Process images
texts = generate_text_from_images(list_snowsuits, processor, model, device)
df = pd.DataFrame({"Image": list_snowsuits, "Generated Text": texts})

# Save descriptions to Excel
df.to_excel("generated_texts_snowsuits.xlsx", index=False)


### Tops

In [None]:
from PIL import Image
import torch
import pandas as pd
from transformers import BertTokenizer, BertModel

# Define function to process and generate text from images with improved parameters
def generate_text_from_images(list_tops, processor, model, device, max_new_tokens=60, min_new_tokens=40, temperature=0.7, top_p=0.7, num_beams=4):
    results = []
    for top in list_tops:
        # Load and preprocess image
        image = Image.open(top).convert('RGB')
        prompt = "This image features"
        inputs = processor(image, text=prompt, return_tensors="pt").to(device)

        # Generate text using beam search, temperature, and nucleus sampling
        with torch.no_grad():
            generated_ids = model.generate(
                **inputs, 
                max_new_tokens=max_new_tokens, 
                min_new_tokens=min_new_tokens, 
                temperature=temperature, 
                top_p=top_p, 
                num_beams=num_beams
            )

        # Decode generated text
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
        results.append(generated_text)
    
    return results

# Initialize BERT tokenizer and model for embeddings
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

def get_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = bert_model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
    return embeddings

# Process images
texts = generate_text_from_images(list_tops, processor, model, device)
df = pd.DataFrame({"Image": list_tops, "Generated Text": texts})

# Save descriptions to Excel
df.to_excel("generated_texts_tops.xlsx", index=False)


# Visual Novelty 

## Load Model and processor

In [None]:
import os
import random
from PIL import Image
import torch
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from transformers import AutoFeatureExtractor, Dinov2Model, AutoImageProcessor
model_name = 'facebook/dinov2-base'
processor = AutoImageProcessor.from_pretrained(model_name)
model = Dinov2Model.from_pretrained(model_name).eval()
#feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)

## Compute novelty score using cosine similarity

In [None]:
FEATURE_DIM = 768

def extract_visual_features(image_paths, processor, model, device):
    visual_features = []
    for image_path in image_paths:
        try:
         # Load and preprocess image
         image = Image.open(image_path).convert('RGB')
         # Resize image to a consistent size
         image = image.resize((224, 224)) 
         inputs = processor(images=image, return_tensors="pt").to(device)

         # Extract visual features
         with torch.no_grad():
            features = model(**inputs).last_hidden_state.squeeze(0).cpu()

        # Average over the patch dimension to get a single feature vector for each image
         flattened_features = features.mean(dim=0)

        # Padding
         if flattened_features.shape[0] < FEATURE_DIM:
                padded_features = torch.nn.functional.pad(flattened_features, (0, FEATURE_DIM - flattened_features.shape[0]))
         else:
                padded_features = flattened_features[:FEATURE_DIM]
         visual_features.append(flattened_features)
        except Exception as e:
            print(f"Error prepocessing {image_path}: {e}")
            continue
    
    return visual_features


#Load baseline images
baseline_image_folder = 'Baseline_OnePiece/'
baseline_image_paths = [os.path.join(baseline_image_folder, f) for f in os.listdir(baseline_image_folder) if f.endswith('.jpg')]

# Subsample baseline images
num_baseline_samples = 10000 
baseline_image_paths_sampled = random.sample(baseline_image_paths, min(num_baseline_samples, len(baseline_image_paths)))

# Extract visual features from baseline images
baseline_visual_features = extract_visual_features(baseline_image_paths_sampled, processor, model, device)

# Extract visual features from query images
visual_features_query = extract_visual_features(list_onepiece, processor, model, device)

# Check if the features list is empty
if not baseline_visual_features or not visual_features_query:
    raise ValueError("Feature extraction failed for all images.")

# Compute distances or similarities with baseline
def compute_distances_with_baseline(query_features, baseline_features):
    # Convert query features to numpy array
    query_feature_matrix = torch.stack(query_features).numpy()
    # Convert baseline features to numpy array
    baseline_feature_matrix = torch.stack(baseline_features).numpy()

    cosine_sims = cosine_similarity(query_feature_matrix, baseline_feature_matrix)
    return cosine_sims

# Compute distances or similarities with baseline
cosine_sims_with_baseline = compute_distances_with_baseline(visual_features_query, baseline_visual_features)

# Compute novelty scores against baseline
def compute_novelty(cosine_sims):
    # Cosine similarity
    novelty_scores_cosine = 1 - np.mean(cosine_sims, axis=1)
    return novelty_scores_cosine

visual_novelty_onepiece_cosine_with_baseline = compute_novelty( cosine_sims_with_baseline)

# Prepare data to export into an Excel
data = {
    'Image Path': list_onepiece,
    'Novelty Score (Cosine)': visual_novelty_onepiece_cosine_with_baseline,
}

# Create a DataFrame
df = pd.DataFrame(data)

# Export to Excel
output_file = 'visual_novelty_onepieces.xlsx'
df.to_excel(output_file, index=False)

# Measure Complexity

## HSV Entropy

In [2]:
import numpy as np
import pandas as pd
from PIL import Image
from collections import Counter
import os

def calculate_shannon_entropy(image_array):
    pixel_counts = Counter(image_array.flatten())
    total_pixels = image_array.size
    entropy = -sum((count / total_pixels) * np.log2(count / total_pixels) for count in pixel_counts.values() if count > 0)
    return entropy

def calculate_hsv_entropy(image_path):
    image = Image.open(image_path).convert('HSV')
    image_array = np.array(image)
    h_channel = image_array[:, :, 0]
    s_channel = image_array[:, :, 1]
    v_channel = image_array[:, :, 2]
    
    h_entropy = calculate_shannon_entropy(h_channel)
    s_entropy = calculate_shannon_entropy(s_channel)
    v_entropy = calculate_shannon_entropy(v_channel)
    combined_entropy = np.mean([h_entropy, s_entropy, v_entropy])
    
    return h_entropy, s_entropy, v_entropy, combined_entropy

def calculate_baseline_statistics(baseline_folder):
    entropies = {"H": [], "S": [], "V": [], "Combined": []}
    
    for filename in os.listdir(baseline_folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            image_path = os.path.join(baseline_folder, filename)
            h_entropy, s_entropy, v_entropy, combined_entropy = calculate_hsv_entropy(image_path)
            entropies["H"].append(h_entropy)
            entropies["S"].append(s_entropy)
            entropies["V"].append(v_entropy)
            entropies["Combined"].append(combined_entropy)
    
    baseline_mean = {k: np.mean(v) for k, v in entropies.items()}
    baseline_std = {k: np.std(v) for k, v in entropies.items()}
    
    return baseline_mean, baseline_std

def calculate_z_scores(entropies, baseline_mean, baseline_std):
    z_scores = {
        "H": (entropies["H"] - baseline_mean["H"]) / baseline_std["H"],
        "S": (entropies["S"] - baseline_mean["S"]) / baseline_std["S"],
        "V": (entropies["V"] - baseline_mean["V"]) / baseline_std["V"],
        "Combined": (entropies["Combined"] - baseline_mean["Combined"]) / baseline_std["Combined"]
    }
    return z_scores

# Paths to folders - Update with corresponsing paths for skirts, snowsuits, tops
target_folder = '/work/AI_Designs/AI_Designs_OnePiece'  
baseline_folder = '/work/Baseline_OnePiece_ONE' 

# Calculate baseline statistics
baseline_mean, baseline_std = calculate_baseline_statistics(baseline_folder)

# List to store results
results = []

# Iterate over images in the target folder
for filename in os.listdir(target_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        image_path = os.path.join(target_folder, filename)
        h_entropy, s_entropy, v_entropy, combined_entropy = calculate_hsv_entropy(image_path)
        
        entropies = {
            "H": h_entropy,
            "S": s_entropy,
            "V": v_entropy,
            "Combined": combined_entropy
        }
        
        z_scores = calculate_z_scores(entropies, baseline_mean, baseline_std)
        
        results.append({
            "Image": filename,
            "Hue Entropy (bits)": round(h_entropy, 4),
            "Saturation Entropy (bits)": round(s_entropy, 4),
            "Value Entropy (bits)": round(v_entropy, 4),
            "Combined HSV Entropy (bits)": round(combined_entropy, 4),
            "Hue Z-Score": round(z_scores["H"], 4),
            "Saturation Z-Score": round(z_scores["S"], 4),
            "Value Z-Score": round(z_scores["V"], 4),
            "Combined Z-Score": round(z_scores["Combined"], 4)
        })

# Convert the results to a DataFrame
df = pd.DataFrame(results)

# Save the DataFrame to an Excel file
output_file = 'complexity_onepieces.xlsx'
df.to_excel(output_file, index=False)


Results saved to 1411_hsv_entropy_z_scores_results_bOberteil.xlsx


# Scrape Images

In [None]:
pip install apify-client

In [None]:

import os
import json
from apify_client import ApifyClient

# Initialize the Apify client with your API token
api_token = 'apify_api_bXONqF5EhaKgNAHDvCiHv2zc0J7zke2o55D3'
client = ApifyClient(api_token)

# Define the input configuration for the scraper
input_data = {
    "searchUrl": "https://www.amazon.de/s?k=snowsuit+kids&crid=QVL4VLTFLGLM&sprefix=snowsuit+%2Caps%2C181&ref=nb_sb_ss_pltr-xclick_2_9",
    "maxItems": 20,  # Adjust the number of items as needed
    "proxy": {
        "useApifyProxy": True,
        "apifyProxyGroups": ["RESIDENTIAL"]
    },
}

# Run the scraper
run = client.actor("curious_coder/amazon-scraper").call(run_input=input_data)

# Fetch the results
dataset_id = run["defaultDatasetId"]
items = client.dataset(dataset_id).list_items().items

# JSON file path
json_file_path = 'amazon_snowsuits.json'

# Write the results to the JSON file
with open(json_file_path, mode='w', encoding='utf-8') as file:
    json.dump(items, file, ensure_ascii=False, indent=4)

print(f"Data saved to {json_file_path}")


Number of items fetched: 0
Data saved to asos_skirts.json


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=9f956f77-27e9-4ead-b449-10d39e88f021' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>