In [1]:
import pandas as pd
import numpy as np
import random
import json

In [2]:
def reset_data():
    data = pd.read_json('Outputs/Old/strengths.json', lines = True)

    # Saving the filtered data to JSON file
    data_path = "Outputs/Old/old_model_data.json"

    data.to_json(data_path, orient='records', lines=True)
    
    return "Outputs/Old/old_model_data.json"

In [3]:
def contains_small_exemplars(exemplars_list):
    return any(exemplar < 0.0000000001 for exemplar in exemplars_list)

In [4]:
def remove_small_exemplars_and_adjust_frequency(row):
    # Only proceed if frequency is greater than 1
    if row['frequency'] > 1:
        new_exemplars = [exemplar for exemplar in row['exemplar_strength'] if exemplar >= 0.0000000001]
        if len(new_exemplars) < len(row['exemplar_strength']):
            row['frequency'] = len(new_exemplars)  # Reduce frequency by if any exemplars are removed
        row['exemplar_strength'] = new_exemplars
    return row

In [5]:
def old_model(runs):
    for a in range(runs):
        # Define mean dictionaries
        means_1_6 = {}
        means_7_12 = {}
        
        # Reset data path
        pivoted_data_path = reset_data()

        for i in range(10000):
            # Read master data
            df = pd.read_json(pivoted_data_path, orient='records', lines=True)

            # Extract the 'words' dictionary from the DataFrame
            words_data = df['word_key'].tolist()
            frequencies = df['frequency'].astype(int).tolist()

            # Picking a word based on its frequency
            chosen_word = random.choices(words_data, weights=frequencies, k=1)[0]

            # Extract exemplars for the chosen word
            exemplars_list = df[df.word_key == chosen_word]['exemplars'].iloc[0]
            
            # Extract exemplar strengths for the chosen word
            exemplar_strengths = df[df.word_key == chosen_word]['exemplar_strength'].iloc[0]

            # Choose an exemplar weighted by exemplar strength
            chosen_exemplar = random.choices(exemplars_list, weights=exemplar_strengths, k=1)[0]

            # Add the chosen exemplar to the end of the exemplars list
            exemplars_list.append(chosen_exemplar + 0.1)
            
            # Multiply all the exemplar strengths in the entire dataframe by 0.9
            df['exemplar_strength'] = df['exemplar_strength'].apply(lambda x: [item * 0.9 for item in x])
            exemplar_strengths = [item * 0.9 for item in exemplar_strengths]
            
            # Add an exemplar strength of 1 to the end of the exemplar strengths list 
            exemplar_strengths.append(1)

            # Update the dataframe with the new exemplar list and exemplar strength list
            df.at[df[df.word_key == chosen_word].index[0], 'exemplars'] = exemplars_list
            df.at[df[df.word_key == chosen_word].index[0], 'exemplar_strength'] = exemplar_strengths
            
            # Check if any row has exemplars below the threshold
            if df['exemplar_strength'].apply(contains_small_exemplars).any():
                df = df.apply(remove_small_exemplars_and_adjust_frequency, axis=1)
            
            # Save the dataframe to JSON
            df.to_json(pivoted_data_path, orient='records', lines=True)

            # Save the mean of the exemplars
            if int(df[df.word_key == chosen_word]['frequency']) < 7:
                means_1_6[i] = np.mean(exemplars_list)
            else:
                means_7_12[i] = np.mean(exemplars_list)
                
        # Save the mean dictionaries as JSON files        
        means_1_6_path = "Outputs/Old/old_means_1_6_r" + str(a+1) + ".json"
        means_7_12_path = "Outputs/Old/old_means_7_12_r" + str(a+1) + ".json"

        with open(means_1_6_path, 'w') as f:
            json.dump(means_1_6, f)

        with open(means_7_12_path, 'w') as f:
            json.dump(means_7_12, f)

In [6]:
old_model(10)