In [1]:
import pandas as pd
import numpy as np
import random
from random import randint
import json
import dataclasses
from dataclasses import dataclass
import copy

In [2]:
def reset_data():
    data = pd.read_json('Data/initial_data_1cat.json')
    
    # Extracting the 'words' data and flattening it
    words_data = pd.json_normalize(data['Category']['words'])

    # Transposing the DataFrame for better readability and structure
    words_data = words_data.transpose()

    # Resetting the index to have a proper DataFrame structure
    words_data.reset_index(inplace=True)

    # Splitting the 'index' column to separate word keys from their properties
    words_data[['word_key', 'property']] = words_data['index'].str.split('.', expand=True)

    # Pivoting the table to have properties as columns
    pivoted_data = words_data.pivot(index='word_key', columns='property', values=0)

    # Resetting index for clarity
    pivoted_data.reset_index(inplace=True)

    # Saving the filtered data to JSON file
    pivoted_data_path = "Data/pivoted_data.json"

    pivoted_data.to_json(pivoted_data_path, orient='records', lines=True)
    
    return "Data/pivoted_data.json"

In [3]:
@dataclass
class ExemplarData:
    word_key: str
    exemplar: str
    exemplar_index: int
    frequency: int
    exemplar_strength: float

    @staticmethod
    def add_exemplar(exemplar_data_list, chosen_word, new_exemplar, exemplar_index, frequency):
        exemplar_data_list.append(
            ExemplarData(
                word_key=chosen_word,
                exemplar=new_exemplar,
                exemplar_index=exemplar_index,
                frequency=int(frequency),
                exemplar_strength=1
            )
        )

    @staticmethod
    def remove_weak_exemplars(exemplar_data_list):
        return [data for data in exemplar_data_list if data.exemplar_strength >= 1e-10]

    @staticmethod
    def get_weights(exemplar_data_list, chosen_word, exemplars_list):
        weights = [0] * len(exemplars_list)  # Initialize all weights to 0

        for idx, exemplar in enumerate(exemplars_list):
            for data in exemplar_data_list:
                if data.word_key == chosen_word and data.exemplar == exemplar and data.exemplar_index == idx:
                    weights[idx] += data.exemplar_strength  # Incrementing the weight

        # Replace any zero weights with 1 (default weight)
        weights = [weight if weight > 0 else 1 for weight in weights]

        return weights

def strengths(runs, iterations, k = "x"):
    pivoted_data_path = reset_data()
    kdf = pd.read_json(pivoted_data_path, orient='records', lines=True)
    if k == "x":
        k = 1 - 1/int(kdf['frequency'].sum())
    for a in range(runs):
        exemplar_data_list = []

        for i in range(iterations):
            df = pd.read_json(pivoted_data_path, orient='records', lines=True)

            words_data = df['word_key'].tolist()
            frequencies = df['frequency'].astype(int).tolist()

            chosen_word = random.choices(words_data, weights=frequencies, k=1)[0]
            exemplars_list = df[df.word_key == chosen_word]['exemplars'].iloc[0]
            frequency = df[df.word_key == chosen_word]['frequency'].iloc[0]

            # Get weights for the current exemplars
            weights = ExemplarData.get_weights(exemplar_data_list, chosen_word, exemplars_list)

            # Select an exemplar based on updated weights
            exemplar_index = random.choices(range(len(exemplars_list)), weights=weights, k=1)[0]
            new_exemplar = exemplars_list[exemplar_index]

            # Decay strengths of existing exemplars
            for data in exemplar_data_list:
                data.exemplar_strength *= k

            # Update exemplar data
            ExemplarData.add_exemplar(exemplar_data_list, chosen_word, new_exemplar, exemplar_index, frequency)
            exemplar_data_list = ExemplarData.remove_weak_exemplars(exemplar_data_list)

        # Save the exemplar data to a file
        k_str = str(round(k, 5)).replace('0', '').replace('.', '')
        strengths_path = f"Outputs/Old/strengths_r{a+1}_i{iterations}_k{k_str}.json"
        
        with open(strengths_path, 'w') as f:
            json.dump([dataclasses.asdict(data) for data in exemplar_data_list], f)
    
    return k


In [4]:
k = strengths(1, 20000, 1-5/492)
k_str = str(round(k, 5)).replace('0', '').replace('.', '')

In [5]:
strengths_data = pd.DataFrame(pd.read_json(f'Outputs/Old/strengths_r1_i20000_k{k_str}.json'))
strengths_data

Unnamed: 0,word_key,exemplar,exemplar_index,frequency,exemplar_strength
0,F8T2,0.6,1,8,1.002160e-10
1,F7T2,-0.8,2,7,1.012449e-10
2,F6T3,1.0,4,6,1.022844e-10
3,F3T10,0.5,0,3,1.033345e-10
4,F5T3,-0.9,3,5,1.043954e-10
...,...,...,...,...,...
2250,F4T9,-0.1,3,4,9.599651e-01
2251,F5T11,-0.7,0,5,9.698210e-01
2252,F6T8,1.4,3,6,9.797781e-01
2253,F11T1,-0.5,2,11,9.898374e-01


In [6]:
pivoted_data_path = reset_data()
kdf = pd.read_json(pivoted_data_path, orient='records', lines=True)

# Loading data
final_strengths_data = copy.deepcopy(strengths_data) 
final_strengths_data['exemplar'] = final_strengths_data['exemplar'].round(1)

master_df = pd.read_csv("Data/master_df.csv")
master_df.rename(columns={"Unnamed: 0": "index"}, inplace=True)
master_df['exemplar'] = master_df['exemplar'].round(1)
master_df = master_df.rename(columns={'exemplars': 'exemplar'})

# Check and append missing word_keys
missing_word_keys = set(master_df['word_key']) - set(final_strengths_data['word_key'])

for word_key in missing_word_keys:
    # Randomly select a row for the missing word_key
    selected_row = master_df[master_df['word_key'] == word_key].sample(n=1).iloc[0]

    # Extract exemplar, exemplar_index, and frequency
    exemplar_choice = selected_row['exemplar']
    exemplar_index = selected_row['exemplar_index'] 
    frequency = selected_row['frequency']  

    # Append to final_strengths_data with strength k^11317
    new_entry = {
        'word_key': word_key,
        'exemplar': exemplar_choice,
        'exemplar_index': exemplar_index,
        'frequency': frequency,
        'exemplar_strength': k**11317
    }
    final_strengths_data = final_strengths_data.append(new_entry, ignore_index=True)

final_strengths_data

Unnamed: 0,word_key,exemplar,exemplar_index,frequency,exemplar_strength
0,F8T2,0.6,1,8,1.002160e-10
1,F7T2,-0.8,2,7,1.012449e-10
2,F6T3,1.0,4,6,1.022844e-10
3,F3T10,0.5,0,3,1.033345e-10
4,F5T3,-0.9,3,5,1.043954e-10
...,...,...,...,...,...
2250,F4T9,-0.1,3,4,9.599651e-01
2251,F5T11,-0.7,0,5,9.698210e-01
2252,F6T8,1.4,3,6,9.797781e-01
2253,F11T1,-0.5,2,11,9.898374e-01


In [7]:
# Here we group by 'word_key', aggregate 'exemplars' into lists, and sum 'exemplar_frequency'
grouped_data = final_strengths_data.groupby(['word_key']).agg({
    'exemplar': lambda x: x.tolist(),  # convert exemplars to a list
    'frequency': 'first',  # take the first frequency assuming it's the same for all
    'exemplar_strength' : lambda x: x.tolist() # convert exemplar_strength to a list
}).reset_index()

# Open the file to write the JSON data
with open(f'Outputs/Old/strengths_k{k_str}.json', 'w') as f:
    for index, row in grouped_data.iterrows():
        # Create a dictionary for the current row
        row_dict = {
            "word_key": row["word_key"],
            "exemplar": row["exemplar"],
            "frequency": row["frequency"],
            "exemplar_strength": row["exemplar_strength"]
        }
        # Convert the dictionary to a JSON string
        json_data = json.dumps(row_dict, separators=(',', ':'))
        # Write the JSON string followed by a newline character to the file
        f.write(f"{json_data}\n")

In [8]:
grouped_data

Unnamed: 0,word_key,exemplar,frequency,exemplar_strength
0,F10T1,"[1.2, 1.9, 1.2, 1.9, 1.2, 1.2, 1.2, 1.9, 1.2, ...",10,"[1.966628291933145e-10, 2.8118181956477095e-10..."
1,F10T2,"[0.2, 0.2, 1.4, 1.4, 1.4, 1.4, 0.9, 1.4, 1.4, ...",10,"[3.020231465590408e-10, 5.351359836347905e-10,..."
2,F10T3,"[-0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0....",10,"[1.1328453824569731e-10, 1.812312768366149e-10..."
3,F11T1,"[0.7, 0.6, 0.6, -0.2, 0.6, 0.6, 0.6, 0.6, 0.6,...",11,"[1.554859054069464e-10, 2.8698521295416655e-10..."
4,F12T1,"[-0.2, 0.5, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2...",12,"[1.4328538485174992e-10, 1.9072773792651381e-1..."
...,...,...,...,...
87,F9T1,"[-0.6, -0.6, -0.6, -0.6, 2.2, 2.2, -0.6, -0.6,...",9,"[1.7397569656552371e-10, 4.0615175378302386e-1..."
88,F9T2,"[-2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2....",9,"[1.0986572200897631e-10, 2.112395693864762e-10..."
89,F9T3,"[0.9, 0.7, 0.6, 0.9, 0.9, 0.7, 0.9, 0.9, 0.9, ...",9,"[2.6992472697990254e-10, 2.783242807480558e-10..."
90,F9T4,"[-0.4, -0.4, -0.4, -0.4, -0.4, -0.6, -1.6, -0....",9,"[1.507934933071273e-10, 2.538784402345185e-10,..."
