In [1]:
import pandas as pd
import numpy as np
import random
import json

In [2]:
def reset_data():
    data = pd.read_json('Data/initial_data_1cat.json')
    
    # Extracting the 'words' data and flattening it
    words_data = pd.json_normalize(data['Category']['words'])

    # Transposing the DataFrame for better readability and structure
    words_data = words_data.transpose()

    # Resetting the index to have a proper DataFrame structure
    words_data.reset_index(inplace=True)

    # Splitting the 'index' column to separate word keys from their properties
    words_data[['word_key', 'property']] = words_data['index'].str.split('.', expand=True)

    # Pivoting the table to have properties as columns
    pivoted_data = words_data.pivot(index='word_key', columns='property', values=0)

    # Resetting index for clarity
    pivoted_data.reset_index(inplace=True)

    # Saving the filtered data to JSON file
    pivoted_data_path = "Data/pivoted_data.json"

    pivoted_data.to_json(pivoted_data_path, orient='records', lines=True)
    
    return "Data/pivoted_data.json"

In [3]:
def new_model(runs):
    for a in range(runs):
        # Define mean dictionaries
        means_1_6 = {}
        means_7_12 = {}
        
        # Reset data
        pivoted_data_path = reset_data()

        for i in range(10000):
            # Read master data
            df = pd.read_json(pivoted_data_path, orient='records', lines=True)

            # Extract the 'words' dictionary from the DataFrame
            words_data = df['word_key'].tolist()
            frequencies = df['frequency'].astype(int).tolist()

            # Picking a word based on its frequency
            chosen_word = random.choices(words_data, weights=frequencies, k=1)[0]

            # Extract exemplars for the chosen word
            exemplars_list = df[df.word_key == chosen_word]['exemplars'].iloc[0]

            # Choose an exemplar randomly without weighting
            chosen_exemplar = random.choice(exemplars_list)

            # Add 0.1 to chosen exemplar
            new_exemplar = chosen_exemplar + 0.1

            # Choose a random index within the range of indices
            random_index = np.random.choice(len(exemplars_list))

            # Replace the value of the exemplar at the random index with the new exemplar
            exemplars_list[random_index] = new_exemplar

            # Update the dataframe with the new exemplar list
            df.at[df[df.word_key == chosen_word].index[0], 'exemplars'] = exemplars_list
            df.to_json(pivoted_data_path, orient='records', lines=True)

            # Save the mean of the exemplars
            if int(df[df.word_key == chosen_word]['frequency']) < 7:
                means_1_6[i] = np.mean(exemplars_list)
            else:
                means_7_12[i] = np.mean(exemplars_list)
                
        # Save the mean dictionaries as JSON files        
        means_1_6_path = "Outputs/means_1_6_r" + str(a+1) + ".json"
        means_7_12_path = "Outputs/means_7_12_r" + str(a+1) + ".json"

        with open(means_1_6_path, 'w') as f:
            json.dump(means_1_6, f)

        with open(means_7_12_path, 'w') as f:
            json.dump(means_7_12, f)

In [4]:
new_model(10)