First, get the embeddings for the 50 experiment images.

In [2]:
import pandas as pd

# Load the datasets
df1 = pd.read_csv('/home/wallacelab/complexity-experiment/Data/Embeddings/hebart49_scaled_embedding_data.csv')  
df2 = pd.read_csv('/home/wallacelab/complexity-experiment/Final Paper Data/filtered_hebart_ranking_results.csv')  

# Ensure 'Image Name' exists in both dataframes
if 'Image Name' in df1.columns and 'Image Name' in df2.columns:
    # Create sets of 'Image Name' from both dataframes for fast comparison
    set_of_image_names_df2 = set(df2['Image Name'])

    # Filter df1 to only include rows where 'Image Name' matches those in df2
    filtered_df1 = df1[df1['Image Name'].isin(set_of_image_names_df2)]

    # Save the filtered dataframe to a new CSV file
    filtered_df1.to_csv('/home/wallacelab/complexity-experiment/Final Paper Data/filtered_hebart49_embeddings.csv', index=False)
    print("Filtered data has been saved successfully.")
else:
    print("Error: 'Image Name' column missing in one or both datasets")

Filtered data has been saved successfully.


Now, use participant data to give each image a complexity score.

In [4]:
import pandas as pd

# Load the dataset
df = pd.read_csv('/home/wallacelab/complexity-experiment/Final Paper Data/filtered_hebart_ranking_results.csv')

# Identify all columns that end with 'Third Ordering'
third_ordering_cols = [col for col in df.columns if col.endswith('Third Ordering')]

# Calculate the mean of these columns
df['Average Third Ordering'] = df[third_ordering_cols].mean(axis=1)

# Normalize the averages
df['Normalized Average'] = 1 - ((df['Average Third Ordering'] - 1) / (50 - 1))

# Select only the relevant columns
final_df = df[['Image Name', 'Average Third Ordering', 'Normalized Average']]

# Save the filtered DataFrame to a new CSV file
final_df.to_csv('/home/wallacelab/complexity-experiment/Final Paper Data/ranking_complexity_scores.csv', index=False)
print(final_df.head())  # Optionally print the first few rows to check the output

            Image Name  Average Third Ordering  Normalized Average
0     catapult_01b.jpg               16.666667            0.680272
1         sled_01b.jpg               24.833333            0.513605
2       alpaca_01b.jpg               27.833333            0.452381
3       anchor_01b.jpg               25.833333            0.493197
4  springboard_01b.jpg               22.833333            0.554422


Now multiply the normalized scores by the embeddings.

In [32]:
import csv

def sort_csv_by_image_name(input_csv_path, output_csv_path):
    # Read the CSV file
    with open(input_csv_path, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        header = next(reader)  
        rows = list(reader)  

    # Sort the rows
    sorted_rows = sorted(rows, key=lambda x: x[0])

    # Write the sorted rows back to a new CSV file
    with open(output_csv_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(header)  
        writer.writerows(sorted_rows)  

sort_csv_by_image_name('/home/wallacelab/complexity-experiment/Final Paper Data/ranking_complexity_scores.csv', '/home/wallacelab/complexity-experiment/Final Paper Data/ranking_complexity_scores.csv')

In [35]:
import csv

def multiply_values(average_csv_path, values_csv_path, output_csv_path):
    # Read the CSV file with the 'Normalized Average'
    with open(average_csv_path, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        averages = {row['Image Name']: float(row['Normalized Average']) for row in reader}

    # Read the CSV file with the values, columns named "0" to "48"
    with open(values_csv_path, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        fieldnames = reader.fieldnames
        rows = [row for row in reader]

    # Perform the multiplication for columns named "0" to "48"
    multiplied_rows = []
    for row in rows:
        image_name = row['Image Name']
        avg = averages.get(image_name, 1)  # Use average 1 if not found, or handle appropriately
        # Multiply each specified value by the corresponding 'Normalized Average'
        for i in range(49):  # Assuming column titles are strings "0" through "48"
            if str(i) in row:
                row[str(i)] = float(row[str(i)]) * avg
        multiplied_rows.append(row)

    # Write the results back to a new CSV file
    with open(output_csv_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(multiplied_rows)

# Example usage
multiply_values('/home/wallacelab/complexity-experiment/Final Paper Data/ranking_complexity_scores.csv', '/home/wallacelab/complexity-experiment/Final Paper Data/filtered_hebart49_embeddings.csv', '/home/wallacelab/complexity-experiment/Final Paper Data/filtered_hebart49_weighted_embeddings.csv')

Now add the embeddings together, turn them into a probability distribution, and determine which dimensions hold the most complexity.

In [37]:
import pandas as pd

# Load the data from a CSV file
input_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/filtered_hebart49_weighted_embeddings.csv'  # replace 'input.csv' with your file path
data = pd.read_csv(input_csv)

# List of column names to sum up
columns_to_sum = [f'{i}' for i in range(49)]

# Sum up the values in each specified column
sums = data[columns_to_sum].sum()

# Convert sums to a probability distribution
total_sum = sums.sum()
probability_distribution = sums / total_sum

# Save the probability distribution to a new CSV file
output_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/probability_distribution.csv'  # define the output CSV file name
probability_distribution.to_csv(output_csv, header=True)

print('Probability distribution has been saved to', output_csv)

Probability distribution has been saved to /home/wallacelab/complexity-experiment/Final Paper Data/probability_distribution.csv


In [38]:
import pandas as pd

# Load the probability data from a CSV file
input_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/probability_distribution.csv'  # replace 'probabilities.csv' with your file path
data = pd.read_csv(input_csv)

# Sort the data based on the 'Probability' column in descending order
ranked_data = data.sort_values('Probability', ascending=False).reset_index(drop=True)

# Save the ranked data to a new CSV file
output_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/ranked_dimensions.csv'  # define the output CSV file name
ranked_data.to_csv(output_csv, index=False)

print('Ranked probabilities have been saved to', output_csv)

Ranked probabilities have been saved to /home/wallacelab/complexity-experiment/Final Paper Data/ranked_dimensions.csv


Now determine how to re-weight the entropy.

In [41]:
import pandas as pd

# Load the data from a CSV file
input_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/probability_distribution.csv'  # replace 'probabilities.csv' with your file path
data = pd.read_csv(input_csv)

# Calculate the average probability
average_probability = data['Probability'].mean()

# Calculate the distance of each probability from the average
data['Distance_from_average'] = abs(data['Probability'] - average_probability)

# Find the maximum distance from the average
max_distance = data['Distance_from_average'].max()

# Assign weights based on the distance from the average
data['Weight'] = data.apply(lambda row: 1 if row['Probability'] == average_probability + max_distance
                            else (-1 if row['Probability'] == average_probability - max_distance
                                  else ((row['Probability'] - average_probability) / max_distance)), axis=1)

# Save the updated data to a new CSV file
output_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/weighted_dimensions.csv'  # define the output CSV file name
data.to_csv(output_csv, index=False)

print('Weights assigned and saved to', output_csv)


Weights assigned and saved to /home/wallacelab/complexity-experiment/Final Paper Data/weighted_dimensions.csv


Combine entropy and embeddings.

In [46]:
import pandas as pd

# Load the existing data from the CSV file
existing_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/filtered_hebart49_embeddings.csv'  # replace 'existing_data.csv' with your file path
existing_data = pd.read_csv(existing_csv)

# Load the entropy data from another CSV file
entropy_csv = '/home/wallacelab/complexity-experiment/Data/EntropyScores/things_entropy_scores.csv'  # replace 'entropy_data.csv' with your file path
entropy_data = pd.read_csv(entropy_csv)

# Merge the existing data with the entropy data based on 'Image Name'
merged_data = pd.merge(existing_data, entropy_data, on='Image Name', how='left')

# Save the updated data with the new column to the existing CSV file
merged_data.to_csv(existing_csv, index=False)

print('Column "Entropy" appended to', existing_csv)


Column "Entropy" appended to /home/wallacelab/complexity-experiment/Final Paper Data/filtered_hebart49_embeddings.csv


Now get the weights for the dimensions. The file just_weights.csv contains weights which can be used to reweight dimensional entropy scores for any image.

In [71]:
import pandas as pd

# Path to the input CSV file
input_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/weighted_dimensions.csv'

# Read the CSV file into a DataFrame
data = pd.read_csv(input_csv)

# Remove the first three columns
data = data.iloc[:, 3:]

# Path to the output CSV file
output_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/just_weights.csv'

# Save the modified DataFrame to a new CSV file
data.to_csv(output_csv, index=False)

print("Columns removed and saved to:", output_csv)

Columns removed and saved to: /home/wallacelab/complexity-experiment/Final Paper Data/just_weights.csv


In [72]:
import pandas as pd

# Path to the input CSV file
input_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/filtered_hebart49_embeddings.csv'

# Read the CSV file into a DataFrame
data = pd.read_csv(input_csv)

# Drop the first and last two columns
data = data.iloc[:, 1:-2]

# Path to the output CSV file
output_csv = '/home/wallacelab/complexity-experiment/Final Paper Data/just_hebart49_embeddings.csv'

# Save the modified DataFrame to a new CSV file
data.to_csv(output_csv, index=False)

print("Columns removed and saved to:", output_csv)

Columns removed and saved to: /home/wallacelab/complexity-experiment/Final Paper Data/just_hebart49_embeddings.csv


Save the new entropy values.

In [82]:
import numpy as np
import pandas as pd

# Load your data
weights_df = pd.read_csv('/home/wallacelab/complexity-experiment/Final Paper Data/just_weights.csv')  
matrix_df = pd.read_csv('/home/wallacelab/complexity-experiment/Final Paper Data/just_hebart49_embeddings.csv')  

# Filter the weights
weights = weights_df['Weight'].values

# Convert the entire matrix dataframe to a numpy array
matrix = matrix_df.values

# Compute the dot products
dot_products = np.dot(matrix, weights)

# Convert the results into a DataFrame
results_df = pd.DataFrame(dot_products, columns=['Dot Product'])

# Save the results to a new CSV file
results_df.to_csv('/home/wallacelab/complexity-experiment/Final Paper Data/entropy_changes.csv', index=False)