In [2]:
import pandas as pd
import numpy as np
import pickle
from src.constants import *
from src.neuron import *
from src.utils import *
from src.network import *
from src.validation import *
from src.viz import *
from src.genetic_algorithm import *
from multiprocessing import Pool
from functools import partial
import ast
import time


In [15]:

# Assuming evaluate_dna is a function that takes a DNA sequence and returns its score
def get_dna_score(curr_dna):
        
    dna_matrix = load_dna(curr_dna)
    
    # === Preparing Network === 
    all_neurons = create_neurons()
    splits, input_waves, alpha_array = create_experiment()
    criteria_dict = define_criteria()
    max_score = TMAX//BIN_SIZE * len(CRITERIA_NAMES)

    dna_score, neuron_data = evaluate_dna(
                    dna_matrix=dna_matrix,
                    neurons=all_neurons,
                    alpha_array=alpha_array,
                    input_waves=input_waves,
                    criteria=criteria_dict,
                    curr_dna=curr_dna
                    )
    total_score = sum(dna_score.values())
    print(f'    === DNA: {curr_dna}') 
    print(f'    === Control: {dna_score["control"]}/{max_score}')
    print(f'    === Experimental: {dna_score["experimental"]}/{max_score}')
    print(f'    === Overall: {total_score}({total_score/(2*max_score):.2%})')
    print('\n')

    return dna_score  # Random score for demonstration

def evaluate_dna_change(original_dna, index, original_score):
    # Create a copy of the original DNA and set the specified index to zero
    modified_dna = original_dna.copy()
    modified_dna[index] = 0
    
    # Evaluate the modified DNA
    new_score_dict = get_dna_score(modified_dna)
    
    # Assuming new_score_dict is a dictionary with keys like 'control' and 'experimental'
    # You need to decide how to combine these into a single score
    new_score = sum(new_score_dict.values())  # Example: sum all scores
    
    # Calculate the change in score
    score_change = original_score - new_score
    return score_change

def process_dna_row(row):
    dna = row['dna'] 
    original_score = row['dna_score']
    
    # Find indices of non-zero elements
    non_zero_indices = [i for i, x in enumerate(dna) if x != 0]
    
    score_changes = [evaluate_dna_change(dna, index, original_score) for index in non_zero_indices]
    score_change_list = [0] * len(dna)
    for idx, change in zip(non_zero_indices, score_changes):
        score_change_list[idx] = change
    
    return score_change_list

def filter_data(original_df, completely_reduced_dna):
    # Debug: Print the columns of the DataFrame
    print("Columns in original_df:", original_df.columns)

    # Debug: Print the first few rows of the DataFrame
    print("First few rows of original_df:\n", original_df.head())

    # === Filter for high scoring runs ===
    # Define the dna_score threshold
    dna_threshold = 730
    # Filter the DataFrame for entries with dna_score above the threshold
    filtered_df = original_df[original_df['dna_score'] > dna_threshold].sort_values(by='dna_score', ascending=False).reset_index(drop=True)


    # === Filter for unique configurations ===
    max_synapses = 20

    ## Picking out indicies of unique configurations
    unique_configs = {}
    for index, row in filtered_df.iterrows():
        config = tuple(1 if weight != 0 else 0 for weight in row['dna'])
        # Calculate the number of non-zero entries
        non_zero_count = sum(config)
        # Only consider configurations with non-zero counts of 18 or lower
        if non_zero_count <= max_synapses and config not in unique_configs:
            # Yes, this will drop configurations that have potentially more "resilient" weights, in lieu of the maximally scoring weights, 
            # but we're not going to get everything in general, so it's okay to drop some in exchange for computational efficiency.
            
            # Store the index, dna_score, and non_zero count
            unique_configs[config] = {
                'index': index,
                'dna_score': row['dna_score'],
                'non_zero_count': non_zero_count
            }

    # Create a list of indices for the unique configurations
    unique_indices = [details['index'] for details in unique_configs.values()]

    # Create a new dataframe with only the unique configurations
    unique_df = filtered_df.loc[unique_indices].copy().reset_index(drop=True)
    print(f"Found {len(unique_df)} unique configurations")
    print(unique_df)


    # At this stage, we have a dataframe with 730+ scores, <20 synapses, and single representatives for any given unique configuration.

    # # Convert the 'dna' column from string representation to actual lists
    # df['dna'] = df['dna'].apply(ast.literal_eval)

    # This step is going to create the score change list [101,0,0,8,8,6,0,...]
    
    # Processing each row in the dataframe
    results = []
    for index, row in unique_df.iterrows():
        print(index)
        score_change_list = process_dna_row(row)
        results.append((index, score_change_list))  # Save index and score change list as a tuple
    
    # Output the results
    for index, score_change_list in results:
        print(f'Index: {index}, Score Changes: {score_change_list}')

    # Putting all those change vectors into a df. Optionally, could save the results to a CSV
    changes_df = pd.DataFrame(results, columns=['Index', 'ScoreChanges'])


    # === Create new dataframe where indices of subthreshold score changes are set to 0 ===

    # Define the threshold
    threshold = 5

    # List to store minimized DNA
    minimized_dna_list = []
    assert len(unique_df) == len(changes_df)
    # Iterate over each row in the dataframes
    for i in range(len(unique_df)):
        # Get the DNA and changes arrays
        dna = unique_df.iloc[i]['dna']
        changes = changes_df.iloc[i]['ScoreChanges']
        
        # Check if changes is a string and needs to be evaluated
        if isinstance(changes, str):
            changes = ast.literal_eval(changes)  # Use ast.literal_eval for safety
        
        # Ensure all elements are integers 
        changes = list(map(int, changes))
        


        ## Save fully minimized DNAs (i.e. nothing is below threshold for them)
        # Check if all changes are either above threshold or 0
        if all(change == 0 or change > threshold for change in changes):
            # Save this DNA to fully minimal list since no more reductions needed
            completely_reduced_dna = pd.concat([completely_reduced_dna, unique_df.iloc[i]])
            continue # Skip to next DNA since this one is fully reduced

        # Create a new DNA list with elements set to 0 if the corresponding change is below the threshold
        for j in range(len(dna)):
            if (changes[j] <= threshold) and (dna[j] != 0):
                minimized_dna = dna.copy()
                minimized_dna[j] = 0
                # Should produce a list of once-changed, unevaluated DNAs from the parents. 
                # What happens to the parents who have no children (are fully reduced)?
            
                # Append the minimized DNA to the list
                minimized_dna_list.append({'dna': minimized_dna,
                                        'dna_score': unique_df.iloc[i]['dna_score'] - changes[j]})

        # At this step, minimized_dna_list is full of DNA to be rerun, and see if any of them still work.
        # the ones that do still work will be filtered; their deltas will be calculated; and repeat
  

    # Return the list as a DataFrame
    return pd.DataFrame(minimized_dna_list)

def load_and_flatten_data(file_path):
    with open(file_path, 'rb') as file:
        # Load the combined data from the pickle file
        combined_data_dict = pickle.load(file)

    # Flatten the data into a list of dictionaries
    flattened_data = []
    for generation, population_results in combined_data_dict.items():
        for entry in population_results:
            flattened_data.append({
                'generation': generation,
                'dna': entry['dna'],
                'dna_score': entry['dna_score']
            })

    # Create a DataFrame from the flattened data
    df = pd.DataFrame(flattened_data).sort_values(by='dna_score', ascending=False)
    return df


In [5]:

start_time = time.time()

# === Load the combined data ===
file_path = '/Users/stevenwendel/Documents/GitHub/bg/data/J_high_gen_2025-03-13_04-00-56.pkl'
untested_dna = load_and_flatten_data(file_path)
display(untested_dna)


Unnamed: 0,generation,dna,dna_score
499000,998,"[1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...",738
498000,996,"[1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...",738
487000,974,"[1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...",738
486000,972,"[1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...",738
496500,993,"[1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...",738
...,...,...,...
194999,389,"[125, 0, 0, 437, -71, -303, -2, -8, -922, -5, ...",341
1499,2,"[267, 81, 649, 383, -174, -186, -235, -894, -5...",333
179499,358,"[111, 1, 1, 625, -35, -57, -1, -12, -15, -7, -...",315
106999,213,"[373, 44, 7, 489, -43, -677, -7, -10, -85, -19...",309


In [13]:
# Check how many rows in untested_dna are the same as the first row
first_row_dna = untested_dna['dna'].iloc[0]
# Convert the first_row_dna to a list for comparison
first_row_dna_list = ast.literal_eval(first_row_dna) if isinstance(first_row_dna, str) else first_row_dna

# Use a list comprehension to compare lengths and count matches
same_count = sum(
    1 for dna in untested_dna['dna'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    if len(dna) == len(first_row_dna_list) and dna == first_row_dna_list
)

print(f"Number of rows in untested_dna that are the same as the first row: {same_count}")


Number of rows in untested_dna that are the same as the first row: 46


In [16]:


# Initialize lists to store fully reduced DNAs
completely_reduced_dna = pd.DataFrame()

iteration = 0  # Initialize iteration counter

while len(untested_dna) > 0:
    print(f'Iteration: {iteration} ==== Current untested size: {len(untested_dna)}')
    untested_dna = filter_data(untested_dna, completely_reduced_dna)

    # Save the minimized DataFrame to a CSV file
    untested_dna.to_csv(f'untested_data_pass_{iteration}.csv', index=False)
    completely_reduced_dna.to_csv(f'completely_reduced_data_pass_{iteration}.csv', index=False)
    
    iteration += 1  # Increment iteration counter

print(f'Time taken: {time.time() - start_time:.2f} seconds')

#Should I not be dropping the index? it could serve as a barcode to see what is kept and what is dropped. Might fuck up my ilocs though.

Iteration: 0 ==== Current untested size: 500000
Columns in original_df: Index(['generation', 'dna', 'dna_score'], dtype='object')
First few rows of original_df:
        generation                                                dna  \
499000        998  [1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...   
498000        996  [1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...   
487000        974  [1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...   
486000        972  [1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...   
496500        993  [1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...   

        dna_score  
499000        738  
498000        738  
487000        738  
486000        738  
496500        738  
Found 6 unique configurations
  generation                                                dna  dna_score
0        998  [1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...        738
1        865  [999, 0, 0, 771, -87, 0, 0, 0, -27, 0, 0, 0, -...        737
2        814  [313, 0, 0, 733,

In [5]:
with open('./data/unique_df7.pkl', 'rb') as f:
    unique_df = pickle.load(f)

print(unique_df)



  generation                                                dna  dna_score
0        998  [1000, 0, 0, 885, -8, 0, 0, 0, -43, 0, 0, 0, -...        738
1        865  [999, 0, 0, 771, -87, 0, 0, 0, -27, 0, 0, 0, -...        737
2        814  [313, 0, 0, 733, -22, -23, 0, 0, -26, 0, 0, 0,...        736
3        855  [999, 0, 0, 612, -14, -14, 0, 0, -10, 0, 0, 0,...        736
4        711  [163, 0, 0, 1000, -14, 0, 0, 0, -21, 0, 0, 0, ...        732
5        739  [473, 0, 0, 646, -23, -11, 0, 0, -167, 0, 0, 0...        732


In [3]:
pkl_file = '/Users/stevenwendel/Documents/GitHub/bg/data/K_high_gen_2025-03-15_01-15-23.pkl'

with open(pkl_file, 'rb') as f: 
    ga_results = pickle.load(f)

flattened_results = flatten_pkl(ga_results)


In [1]:
import pandas as pd
import numpy as np
import pickle
from src.constants import *
from src.neuron import *
from src.utils import *
from src.network import *
from src.validation import *
from src.viz import *
from src.genetic_algorithm import *
from multiprocessing import Pool
from functools import partial
import ast

pkl_file = '/Users/stevenwendel/Documents/GitHub/bg/data/J_high_gen_2025-03-13_04-00-56.pkl'

with open(pkl_file, 'rb') as f: 
    ga_results = pickle.load(f)


In [2]:

intial_df = flatten_pkl(ga_results)


In [None]:

def clean_df(initial_df, threshold=700):
    cleaned_df = initial_df[initial_df['dna_score'] >= threshold]
    cleaned_df = cleaned_df.drop_duplicates(subset='dna')
    cleaned_df = cleaned_df.sort_values(by='dna_score', ascending=False)    
    return cleaned_df

cleaned_df = clean_df(intial_df, threshold=650)


In [None]:

cleaned_df = get_unique_representatives(cleaned_df, max_synapses=20)
display(cleaned_df.head())
