In [1]:
import pandas as pd
import numpy as np
import random
import json

In [2]:
def reset_data():
    data = pd.read_json('Data/initial_data_1cat.json')
    
    # Extracting the 'words' data and flattening it
    words_data = pd.json_normalize(data['Category']['words'])

    # Transposing the DataFrame for better readability and structure
    words_data = words_data.transpose()

    # Resetting the index to have a proper DataFrame structure
    words_data.reset_index(inplace=True)

    # Splitting the 'index' column to separate word keys from their properties
    words_data[['word_key', 'property']] = words_data['index'].str.split('.', expand=True)

    # Pivoting the table to have properties as columns
    pivoted_data = words_data.pivot(index='word_key', columns='property', values=0)

    # Resetting index for clarity
    pivoted_data.reset_index(inplace=True)

    # Saving the filtered data to JSON file
    pivoted_data_path = "Data/pivoted_data.json"

    pivoted_data.to_json(pivoted_data_path, orient='records', lines=True)
    
    return "Data/pivoted_data.json"

In [30]:
def master_data():
    # Define strengths dictionary
    strengths = {}

    # Reset data
    pivoted_data_path = reset_data()
    
    # Read master data
    df = pd.read_json(pivoted_data_path, orient='records', lines=True)

    # Expanding the DataFrame to have a row for each exemplar
    expanded_data = []
    for index, row in df.iterrows():
        for exemplar_index, exemplar in enumerate(row['exemplars']):
            expanded_data.append({
                "word_key": row['word_key'],
                "exemplars": exemplar,
                "exemplar_index": exemplar_index,
                "frequency": row['frequency']
            })

    # Creating a new DataFrame with the expanded data
    expanded_df = pd.DataFrame(expanded_data)
    return expanded_df

In [32]:
master_df = master_data()

# Save the master dictionary as a csv file        
master_df.to_csv("Data/master_df.csv")

master_df

Unnamed: 0,word_key,exemplars,exemplar_index,frequency
0,F10T1,0.2,0,10
1,F10T1,-1.0,1,10
2,F10T1,-1.2,2,10
3,F10T1,0.2,3,10
4,F10T1,0.5,4,10
...,...,...,...,...
486,F9T5,0.1,3,9
487,F9T5,0.1,4,9
488,F9T5,0.4,5,9
489,F9T5,0.2,6,9


In [40]:
df = pd.read_json(pivoted_data_path, orient='records', lines=True)
for entry in df[df.word_key == "F9T5"]['exemplars']:
    print(entry)

[0.1, -1.6, -0.2, 0.1, 0.1, 0.4, 0.2, 0.30000000000000004]
