## Imports


In [None]:
import os
from supabase import create_client, Client
from pathlib import Path
import google.generativeai as genai
from dotenv import load_dotenv
import pandas as pd

## Get json from Supabase


In [2]:
# Get keys and url for supabase
url: str = os.environ.get("VITE_SUPABASE_URL")
key: str = os.environ.get("VITE_SUPABASE_KEY")
supabase: Client = create_client(url, key)

In [None]:
response = supabase.table("users").select("*").execute()

### Sort responses based on last push to database


In [None]:
responses = sorted(response.data, key=lambda x: x['timestamp'])
responses

## Set up keys for Gemini


In [8]:
# Loads key from .env, make sure key is named <GEMINI_KEY> in .env
load_dotenv()
load_dotenv(verbose=True)
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)
key = os.environ.get('GEMINI_KEY')
genai.configure(api_key=key)
model = genai.GenerativeModel("gemini-2.0-flash-exp")

## Test Gemini with simple prompt


In [None]:
response = model.generate_content(
    "Explain how AI works, limit response to 1 sentence and a max of 100 characters")
print(response.text)

## Set up dataframe


In [None]:
seed = responses[0]['user']['seed']
cols = list(responses[0]['user']['steps'].keys())
cols.insert(0, "seed")
df = pd.DataFrame(columns=cols)
for i in range(responses[0]['user']['iters']):
    new_row = pd.DataFrame(
        [{'seed': seed, 'problem or task representation': seed}])
    df = pd.concat([df, new_row], ignore_index=True)
df.head(1)

In [None]:
df.shape

## Prompt Gemini for steps using seed


# TODO

- cache input tokens, to increase context window
- creativity/variation slider on webpage
- drop down for metric

* https://docs.google.com/document/d/1oMswDB1Cbzjkxh-FUdxGapPHUi0IV1YRqXeDSGt5ZWk/edit?tab=t.0#heading=h.4nr3f9pj4xli for metrics DROP DOWN/check all that apply
* look into metrics at the same time as generation
* clear cache between iterations
* store token/cost estimate
* compare cossign similarity score between each step between iteration then average, and std
* after eval is done return to stats to user


In [13]:
counter = 0

for row in range(df.shape[0]):
    for col in range(2, df.shape[1]):
        label = responses[0]['user']['steps'][df.columns[col]]
        prompt = (
            f"Given information about the following {str.upper(df.iloc[row, col-1])}"
            f"Step {str.upper(df.columns[col])}: {label} Please respond with ONLY the {df.columns[col]} step and absolutely no additional text or explanation."
        )
        genai.configure(api_key=key)
        model = genai.GenerativeModel("gemini-1.5-flash")
        response = model.generate_content(prompt,
                                          generation_config=genai.types.GenerationConfig(
                                              temperature=1.0))
        df.iloc[row, col] = response.text

## Save csv


In [None]:
df

In [20]:
pd.set_option('display.max_colwidth', None)
df.to_csv('sample.csv', index=False)
# df

In [None]:
from transformers import BertTokenizer, BertModel
import torch
import re
import random
import pandas as pd

# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# load embeddings
model = BertModel.from_pretrained('bert-base-uncased')

# Set the model in evaluation mode to deactivate the DropOut modules to have reproducible results during evaluation
model.eval()

In [2]:
def preprocess_text(text, random_mask_option=False, indexed_tokens=True):
    # Basic text normalization (optional)
    text = text.lower()
    # Remove punctuation and special characters
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(' +', ' ', text)  # Remove extra spaces
    text = text.strip()

    # Tokenize input
    tokenized_text = tokenizer.tokenize(text)

    '''Masking Option: To mask a token that we will try to predict back
    Masking a token can help improve the model's robustness and ability to generalize. 
    If the goal is to train or fine-tune a BERT model on a specific dataset using the MLM objective, we need to mask tokens during preprocessing. 
    This trains the model to better understand the context and improve its ability to predict or understand missing words.
    This helps the model to learn bidirectional context representations. 
    By predicting the masked tokens, BERT learns to understand the context of a word from both its left and right surroundings. 
    In here this is left optional because For tasks where we just want to extract embeddings for text 
    (e.g., for text similarity, clustering), masking is not necessary.'''
    if random_mask_option:
        mask_index = random.randint(
            1, len(tokenized_text) - 2) if len(tokenized_text) > 2 else 0
        if len(tokenized_text) > 0:
            tokenized_text[mask_index] = '[MASK]'

    '''Convert tokens to vocabulary indices
    BERT models and other transformer-based models require numerical input. 
    Specifically, they need token IDs that map to the model's vocabulary.'''
    if indexed_tokens:
        indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
        return indexed_tokens
    else:
        return tokenized_text

In [3]:
def get_bert_embeddings(indexed_tokens):
    # Convert indexed tokens to tensor and create attention mask
    input_ids = torch.tensor([indexed_tokens])
    attention_mask = torch.tensor([[1] * len(indexed_tokens)])

    # Get the embeddings from BERT
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        last_hidden_states = outputs.last_hidden_state

    # The embeddings of the `[CLS]` token (representing the whole sentence) can be used
    sentence_embedding = last_hidden_states[:, 0, :].squeeze()
    return sentence_embedding

In [5]:
# compute the cosine similarity between two vectors
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


def calculate_cosine_similarity(embedding1, embedding2):
    embedding1 = embedding1.unsqueeze(0)  # Add batch dimension
    embedding2 = embedding2.unsqueeze(0)  # Add batch dimension
    similarity = cosine_similarity(embedding1, embedding2)
    return similarity[0][0]

In [7]:
filepath = 'sample.csv'
df = pd.read_csv(filepath)
df.head()

Unnamed: 0,seed,problem or task representation,preparation,generation,validation,outcome
0,"Over the past several years, advanced footwear...","Over the past several years, advanced footwear...",**Problem:** Heat buildup inside running shoes...,"Develop a breathable, moisture-wicking shoe up...",Validate breathability and moisture-wicking pe...,Assess if the test has been passed perfectly—i...
1,"Over the past several years, advanced footwear...","Over the past several years, advanced footwear...",* **Heat transfer in materials:** Understandin...,Develop a running shoe incorporating PCMs in t...,Validation would involve comparing CFD simulat...,Assess if the test has been passed perfectly—i...
2,"Over the past several years, advanced footwear...","Over the past several years, advanced footwear...",* **Problem:** Heat buildup inside running sho...,Improved ventilation through laser-perforated ...,Validation requires empirical testing of each ...,Step OUTCOME: assess if the test has been pass...
3,"Over the past several years, advanced footwear...","Over the past several years, advanced footwear...",**Problem:** Heat buildup inside running shoes...,Develop prototypes using various mesh densitie...,Compare CFD simulation results with experiment...,Step OUTCOME:\n
4,"Over the past several years, advanced footwear...","Over the past several years, advanced footwear...",* **Heat dissipation in materials science:** R...,Develop a novel running shoe incorporating por...,Validate the design and manufacturing process ...,Step OUTCOME:\n


In [8]:
# for every row in the df, preprocess the value of each column after the first one
for i in range(len(df)):
    print(f'Processing row {i+1}...')
    steps = []
    for j in range(1, len(df.columns)):
        steps.append(get_bert_embeddings(preprocess_text(df.iloc[i, j])))

    # for every pair of steps, compute the cosine similarity

    # initializing a dataframe that is as long as the number of steps
    df_similarities = pd.DataFrame(np.ones((len(steps), len(steps))), columns=range(
        1, len(steps)+1), index=range(1, len(steps)+1))

    for j in range(len(steps)):
        for k in range(j+1, len(steps)):
            df_similarities.iloc[j, k] = calculate_cosine_similarity(
                steps[j], steps[k])

    # make the bottom triangle of the matrix reflect the top triangle
    for j in range(len(steps)):
        for k in range(j+1, len(steps)):
            df_similarities.iloc[k, j] = df_similarities.iloc[j, k]

    df_similarities['Trial'] = i+1

    # write df_similarities to a csv file
    if i == 0:
        df_similarities.to_csv(f'similarity_matrix.csv')
    else:
        df_similarities.to_csv(f'similarity_matrix.csv',
                               mode='a', header=False)

Processing row 1...
Processing row 2...
Processing row 3...
Processing row 4...
Processing row 5...
Processing row 6...
Processing row 7...
Processing row 8...
Processing row 9...
Processing row 10...


In [23]:
df1 = pd.read_csv('similarity_matrix.csv')
df1.mean(axis=0), df1.std(axis=0)

(Unnamed: 0    3.000000
 1             0.691543
 2             0.702099
 3             0.683321
 4             0.616077
 5             0.607136
 Trial         5.500000
 dtype: float64,
 Unnamed: 0    1.428571
 1             0.230359
 2             0.219735
 3             0.222898
 4             0.226366
 5             0.282082
 Trial         2.901442
 dtype: float64)

In [22]:
import pandas as pd

# Load the similarity matrix data
data = pd.read_csv("similarity_matrix.csv")

# Drop the Unnamed column if it exists (leftover from index saving)
if "Unnamed: 0" in data.columns:
    data = data.drop(columns=["Unnamed: 0"])

# Group rows by 'Trial' and compute the average similarity matrix
average_matrix = data.groupby("Trial").mean().mean(axis=0)
average_matrix

# # Convert to a clean DataFrame for display
# average_df = pd.DataFrame(average_matrix)
# average_df.columns = range(1, len(average_df) + 1)
# average_df.index = range(1, len(average_df) + 1)

# # Print the averaged similarity matrix
# print(average_df)

# # Optionally, save the average similarity matrix to a CSV
# average_df.to_csv("average_similarity_matrix.csv", index=False)

1    0.691543
2    0.702099
3    0.683321
4    0.616077
5    0.607136
dtype: float64

## WORK IN PROGRESS: Evaluate each step of the process


In [None]:
responses[0]

crit_label = "useful"
crit_def = 'degree to which something is valuable, helpful, or solves a problem.'


for k, v in responses[0]['user']['steps'].items():

    EVAL_STRING = prompt = (
        f"Based on the definition of '{crit_label}' — {crit_def} — "
        f"rate the following {k} ({simulation[i][k]}) "
        f"Use highly critical judgement and the entire range of this scale: "
        f"1 (very low {crit_label}), "
        f"2 (low {crit_label}), "
        f"3 (slightly low {crit_label}), "
        f"4 (moderate {crit_label}), "
        f"5 (slightly high {crit_label}), "
        f"6 (high {crit_label}), "
        f"7 (very high {crit_label}). "
        f"Respond with ONLY a single number for an overall rating and absolutely no additional text or explanation."
    )

In [None]:
df = pd.read_csv('sample.csv')
df