In [168]:
import pandas as pd
import numpy as np
import random
from random import randint
import json
import dataclasses
from dataclasses import dataclass
import copy

Defaulting to user installation because normal site-packages is not writeable


In [2]:
def reset_data():
    data = pd.read_json('Data/initial_data_1cat.json')
    
    # Extracting the 'words' data and flattening it
    words_data = pd.json_normalize(data['Category']['words'])

    # Transposing the DataFrame for better readability and structure
    words_data = words_data.transpose()

    # Resetting the index to have a proper DataFrame structure
    words_data.reset_index(inplace=True)

    # Splitting the 'index' column to separate word keys from their properties
    words_data[['word_key', 'property']] = words_data['index'].str.split('.', expand=True)

    # Pivoting the table to have properties as columns
    pivoted_data = words_data.pivot(index='word_key', columns='property', values=0)

    # Resetting index for clarity
    pivoted_data.reset_index(inplace=True)

    # Saving the filtered data to JSON file
    pivoted_data_path = "Data/pivoted_data.json"

    pivoted_data.to_json(pivoted_data_path, orient='records', lines=True)
    
    return "Data/pivoted_data.json"

In [163]:
@dataclass
class ExemplarData:
    word_key: str
    exemplar: str
    exemplar_index: int
    frequency: int
    exemplar_strength: float

    @staticmethod
    def add_exemplar(exemplar_data_list, chosen_word, new_exemplar, exemplar_index, frequency):
        exemplar_data_list.append(
            ExemplarData(
                word_key=chosen_word,
                exemplar=new_exemplar,
                exemplar_index=exemplar_index,
                frequency=int(frequency),
                exemplar_strength=1
            )
        )

    @staticmethod
    def remove_weak_exemplars(exemplar_data_list):
        return [data for data in exemplar_data_list if data.exemplar_strength >= 1e-10]

    @staticmethod
    def get_weights(exemplar_data_list, chosen_word, exemplars_list):
        weights = [1] * len(exemplars_list)
        for idx, exemplar in enumerate(exemplars_list):
            for data in exemplar_data_list:
                if data.word_key == chosen_word and data.exemplar == exemplar and data.exemplar_index == idx:
                    weights[idx] = data.exemplar_strength
                    break
        return weights

def strengths(runs, iterations):
    pivoted_data_path = reset_data()
    kdf = pd.read_json(pivoted_data_path, orient='records', lines=True)
    k = 1 - 1/int(kdf['frequency'].sum())
    for a in range(runs):
        exemplar_data_list = []

        for i in range(iterations):
            df = pd.read_json(pivoted_data_path, orient='records', lines=True)

            words_data = df['word_key'].tolist()
            frequencies = df['frequency'].astype(int).tolist()

            chosen_word = random.choices(words_data, weights=frequencies, k=1)[0]
            exemplars_list = df[df.word_key == chosen_word]['exemplars'].iloc[0]
            frequency = df[df.word_key == chosen_word]['frequency'].iloc[0]

            # Get weights for the current exemplars
            weights = ExemplarData.get_weights(exemplar_data_list, chosen_word, exemplars_list)

            # Select an exemplar based on updated weights
            exemplar_index = random.choices(range(len(exemplars_list)), weights=weights, k=1)[0]
            new_exemplar = exemplars_list[exemplar_index]

            # Decay strengths of existing exemplars
            for data in exemplar_data_list:
                data.exemplar_strength *= k

            # Update exemplar data
            ExemplarData.add_exemplar(exemplar_data_list, chosen_word, new_exemplar, exemplar_index, frequency)
            exemplar_data_list = ExemplarData.remove_weak_exemplars(exemplar_data_list)

        # Save the exemplar data to a file
        strengths_path = f"Outputs/Old/strengths_r{a+1}_i{iterations}.json"
        with open(strengths_path, 'w') as f:
            json.dump([dataclasses.asdict(data) for data in exemplar_data_list], f)


In [164]:
strengths(1, 20000)

In [165]:
strengths_data = pd.DataFrame(pd.read_json('Outputs/Old/strengths_r1_i20000.json'))
strengths_data

Unnamed: 0,word_key,exemplar,exemplar_index,frequency,exemplar_strength
0,F5T11,-0.7,0,5,1.000411e-10
1,F4T10,-0.9,1,4,1.002448e-10
2,F4T8,-0.3,3,4,1.004490e-10
3,F9T1,-1.2,0,9,1.006536e-10
4,F4T10,-0.9,1,4,1.008586e-10
...,...,...,...,...,...
11313,F2T2,-0.2,0,2,9.918947e-01
11314,F4T9,0.5,0,4,9.939148e-01
11315,F7T7,1.2,6,7,9.959391e-01
11316,F8T6,1.2,6,8,9.979675e-01


In [201]:
pivoted_data_path = reset_data()
kdf = pd.read_json(pivoted_data_path, orient='records', lines=True)
k = 1 - 1/int(kdf['frequency'].sum())

# Loading data
final_strengths_data = copy.deepcopy(strengths_data) 
final_strengths_data['exemplar'] = final_strengths_data['exemplar'].round(1)

master_df = pd.read_csv("Data/master_df.csv")
master_df.rename(columns={"Unnamed: 0": "index"}, inplace=True)
master_df['exemplar'] = master_df['exemplar'].round(1)
master_df = master_df.rename(columns={'exemplars': 'exemplar'})

# Check and append missing word_keys
missing_word_keys = set(master_df['word_key']) - set(final_strengths_data['word_key'])

for word_key in missing_word_keys:
    # Randomly select a row for the missing word_key
    selected_row = master_df[master_df['word_key'] == word_key].sample(n=1).iloc[0]

    # Extract exemplar, exemplar_index, and frequency
    exemplar_choice = selected_row['exemplar']
    exemplar_index = selected_row['exemplar_index'] 
    frequency = selected_row['frequency']  

    # Append to final_strengths_data with strength k^11317
    new_entry = {
        'word_key': word_key,
        'exemplar': exemplar_choice,
        'exemplar_index': exemplar_index,
        'frequency': frequency,
        'exemplar_strength': k**11317
    }
    final_strengths_data = final_strengths_data.append(new_entry, ignore_index=True)

final_strengths_data

Unnamed: 0,word_key,exemplar,exemplar_index,frequency,exemplar_strength
0,F5T11,-0.7,0,5,1.000411e-10
1,F4T10,-0.9,1,4,1.002448e-10
2,F4T8,-0.3,3,4,1.004490e-10
3,F9T1,-1.2,0,9,1.006536e-10
4,F4T10,-0.9,1,4,1.008586e-10
...,...,...,...,...,...
11313,F2T2,-0.2,0,2,9.918947e-01
11314,F4T9,0.5,0,4,9.939148e-01
11315,F7T7,1.2,6,7,9.959391e-01
11316,F8T6,1.2,6,8,9.979675e-01


In [197]:
# Here we group by 'word_key', aggregate 'exemplars' into lists, and sum 'exemplar_frequency'
grouped_data = final_strengths_data.groupby(['word_key']).agg({
    'exemplar': lambda x: x.tolist(),  # convert exemplars to a list
    'frequency': 'first',  # take the first frequency assuming it's the same for all
    'exemplar_strength' : lambda x: x.tolist() # convert exemplar_strength to a list
}).reset_index()

# Open the file to write the JSON data
with open('Outputs/Old/strengths.json', 'w') as f:
    for index, row in grouped_data.iterrows():
        # Create a dictionary for the current row
        row_dict = {
            "word_key": row["word_key"],
            "exemplar": row["exemplar"],
            "frequency": row["frequency"],
            "exemplar_strength": row["exemplar_strength"]
        }
        # Convert the dictionary to a JSON string
        json_data = json.dumps(row_dict, separators=(',', ':'))
        # Write the JSON string followed by a newline character to the file
        f.write(f"{json_data}\n")

In [202]:
grouped_data

Unnamed: 0,word_key,exemplar,frequency,exemplar_strength
0,F10T1,"[1.6, 0.5, 1.2, -1.0, -2.1, -2.1, 1.2, 0.2, 0....",10,"[1.025136425161233e-10, 1.096328783493783e-10,..."
1,F10T2,"[0.9, -0.5, -0.3, -1.3, -0.2, -0.3, -0.5, -0.5...",10,"[1.067711395842395e-10, 1.114319425750472e-10,..."
2,F10T3,"[0.8, 0.5, 0.5, -0.2, -0.2, 1.1, 0.8, -0.2, -0...",10,"[1.1372234541325822e-10, 1.206342189494861e-10..."
3,F11T1,"[-0.2, -1.1, 0.6, -0.5, -0.5, 0.2, 0.6, -0.5, ...",11,"[1.3355220073468682e-10, 1.469537757940777e-10..."
4,F12T1,"[-0.2, -1.7, 0.7, -1.6, -1.7, -1.7, -1.6, 0.7,...",12,"[1.125713190097277e-10, 1.1418604460789091e-10..."
...,...,...,...,...
87,F9T1,"[-1.2, -0.6, -1.2, -1.2, 2.2, -1.2, -1.2, 1.0,...",9,"[1.0065356680470981e-10, 1.201443343023376e-10..."
88,F9T2,"[-0.6, 0.8, -0.8, -0.6, 0.8, -0.6, 0.4, 0.8, -...",9,"[1.1326052925909552e-10, 1.2796618578934052e-1..."
89,F9T3,"[0.7, 0.1, -0.5, 0.1, -0.7, -0.7, -0.6, 0.6, -...",9,"[1.0126981142623051e-10, 1.130303249313331e-10..."
90,F9T4,"[2.3, -0.9, -0.9, 2.3, -1.0, -0.9, -0.5, 2.3, ...",9,"[1.0742483862930251e-10, 1.236157603276733e-10..."
