In [1]:
import pandas as pd
import numpy as np
import random
import json

In [2]:
def reset_data():
    # Reading the JSON file
    data = pd.read_json('Outputs/Old/strengths.json', lines=True)

    # Saving the filtered data to JSON file without rounding
    data_path = "Outputs/Old/old_model_data.json"
    data.to_json(data_path, orient='records', lines=True, double_precision=15)

    return data_path

In [3]:
def contains_small_exemplars(exemplars_list):
    return any(exemplar < 0.0000000001 for exemplar in exemplars_list)

In [4]:
def remove_small_exemplars(row):
    # Threshold for removal
    threshold = 0.0000000001

    # Find indices of exemplar strengths that are below the threshold
    indices_to_remove = [index for index, strength in enumerate(row['exemplar_strength']) if strength < threshold]

    # Remove these indices from both 'exemplar' and 'exemplar_strength'
    row['exemplar'] = [exemplar for index, exemplar in enumerate(row['exemplar']) if index not in indices_to_remove]
    row['exemplar_strength'] = [strength for index, strength in enumerate(row['exemplar_strength']) if index not in indices_to_remove]

    return row

In [5]:
def old_model(runs):
    for a in range(runs):
        # Define mean dictionaries
        means_1_6 = {}
        means_7_12 = {}
        
        # Reset data path
        pivoted_data_path = reset_data()
        kdf = pd.read_json(pivoted_data_path, orient='records', lines=True)
        k = 1 - 1/int(kdf['frequency'].sum())

        for i in range(10000):
            # Read master data
            df = pd.read_json(pivoted_data_path, orient='records', lines=True)

            # Extract the 'words' dictionary from the DataFrame
            words_data = df['word_key'].tolist()
            frequencies = df['frequency'].astype(int).tolist()

            # Picking a word based on its frequency
            chosen_word = random.choices(words_data, weights=frequencies, k=1)[0]

            # Extract exemplars for the chosen word
            exemplars_list = df[df.word_key == chosen_word]['exemplar'].iloc[0]
            
            # Extract exemplar strengths for the chosen word
            exemplar_strengths = df[df.word_key == chosen_word]['exemplar_strength'].iloc[0]

            # Choose an exemplar weighted by exemplar strength
            chosen_exemplar = random.choices(exemplars_list, weights=exemplar_strengths, k=1)[0]

            # Add the chosen exemplar to the end of the exemplars list
            exemplars_list.append(chosen_exemplar + 0.1)
            
            # Multiply all the exemplar strengths in the entire dataframe by k
            df['exemplar_strength'] = df['exemplar_strength'].apply(lambda x: [item * k for item in x])
            exemplar_strengths = [item * k for item in exemplar_strengths]
            
            # Add an exemplar strength of 1 to the end of the exemplar strengths list 
            exemplar_strengths.append(1)

            # Update the dataframe with the new exemplar list and exemplar strength list
            df.at[df[df.word_key == chosen_word].index[0], 'exemplar'] = exemplars_list
            df.at[df[df.word_key == chosen_word].index[0], 'exemplar_strength'] = exemplar_strengths
            
            if len(df[df.word_key == chosen_word]['exemplar']) > 1:
                if df['exemplar_strength'].apply(contains_small_exemplars).any():
                     df = df.apply(remove_small_exemplars, axis=1)
            
            # Save the dataframe to JSON
            df.to_json(pivoted_data_path, orient='records', lines=True)

            # Save the mean of the exemplars
            if int(df[df.word_key == chosen_word]['frequency']) < 7:
                means_1_6[i] = np.mean(exemplars_list)
            else:
                means_7_12[i] = np.mean(exemplars_list)
                
        # Save the mean dictionaries as JSON files        
        means_1_6_path = "Outputs/Old/old_means_1_6_r" + str(a+1) + ".json"
        means_7_12_path = "Outputs/Old/old_means_7_12_r" + str(a+1) + ".json"

        with open(means_1_6_path, 'w') as f:
            json.dump(means_1_6, f)

        with open(means_7_12_path, 'w') as f:
            json.dump(means_7_12, f)

In [6]:
old_model(10)