In [1]:
import os
import sys
import numpy as np
import pandas as pd

dataset = "dreamachine"

print(f"Current working directory: {os.getcwd()}")
BOX_DIR = os.path.join(os.path.expanduser("~"), "Library", "CloudStorage", "Box-Box", "TMDATA")
print(f"Retrieving data from BOX, locally stored at: {BOX_DIR}")
DATA_DIR = os.path.join(BOX_DIR, dataset)
print(f"Data directory: {DATA_DIR}")


reports_file = os.listdir(DATA_DIR)
print(f"Files for {dataset} dataset (n={len(os.listdir(DATA_DIR))}): {os.listdir(DATA_DIR)}")
csv_files = [f for f in reports_file if f.endswith(".csv")]
raw_file_name = [f for f in reports_file if not f.endswith(".csv")][0]
raw_file_path = os.path.join(DATA_DIR, raw_file_name)
print(f"Raw file path: {raw_file_path}")


sensory_tool_data = pd.read_excel(raw_file_path)

Current working directory: /Users/rb666/Projects/MOSAIC/preproc
Retrieving data from BOX, locally stored at: /Users/rb666/Library/CloudStorage/Box-Box/TMDATA
Data directory: /Users/rb666/Library/CloudStorage/Box-Box/TMDATA/dreamachine
Files for dreamachine dataset (n=6): ['freeform_SensoryTool_complete.xlsx', 'freeform_HS_SensoryTool_complete.csv', 'freeform_DL_SensoryTool_complete.csv', 'SensoryTool_CombinedData_v24_20230912_2.xlsx', 'HS_reflections_APIcleaned.csv', 'DL_reflections_APIcleaned.csv']
Raw file path: /Users/rb666/Library/CloudStorage/Box-Box/TMDATA/dreamachine/freeform_SensoryTool_complete.xlsx


In [2]:
# columns corresp. to freeform responses that we might want to extract 
freeform_responses = [
    'reflection_answer',
    'emo_freeformResponse', #userâ€™s freeform emotional response, if present
    'aud_freeformResponse',
    'vizCol_freeformAnswer',
    'vizPat_freeformAnswer',
    'vizMov_freeform_answer', #user's freeform answer for movement types
    'cogBody_freeformAnswer'] #users freeform additional body sensations if they chose to add this

# new dataframe to store only the freeform responses, and the metadata columns
freeform_data = pd.DataFrame()


for col in freeform_responses:
    freeform_data[col] = sensory_tool_data[col]

freeform_data['meta_HighSensory'] = sensory_tool_data['meta_HighSensory']
freeform_data['meta_HighSensory'] = freeform_data['meta_HighSensory'].replace({True: 'High Sensory', False: 'Deep Listening'})

#delete subjects that have NaN in all freeform responses
freeform_data = freeform_data.dropna(subset=freeform_responses, how='all')
freeform_data.to_excel(os.path.join(DATA_DIR,'freeform_SensoryTool_complete.xlsx'), index=False)

HS_freeform_data = freeform_data[freeform_data['meta_HighSensory'] == 1]
DL_freeform_data = freeform_data[freeform_data['meta_HighSensory'] == 0]

#save in data directory
HS_freeform_data.to_csv(os.path.join(DATA_DIR, 'freeform_HS_SensoryTool_complete.csv'), index=False)
DL_freeform_data.to_csv(os.path.join(DATA_DIR, 'freeform_DL_SensoryTool_complete.csv'), index=False)


In [3]:
# Load only the 'reflection_answer' column from each CSV file for HS and DL
hs_reflection = pd.read_csv(os.path.join(DATA_DIR, 'freeform_HS_SensoryTool_complete.csv'), usecols=['reflection_answer']).dropna()
dl_reflection = pd.read_csv(os.path.join(DATA_DIR, 'freeform_DL_SensoryTool_complete.csv'), usecols=['reflection_answer']).dropna()

print("HS reflection_answer shape:", hs_reflection.shape)
print("DL reflection_answer shape:", dl_reflection.shape)

HS reflection_answer shape: (336, 1)
DL reflection_answer shape: (98, 1)
