In [1]:
import os
import pandas as pd
from pymongo import MongoClient

# Parses and structures the data from a DataFrame based on given item names and row range.
def parse_and_structure_data(data, item_names, start_row, end_row):
    structured_data = []
    for index in range(start_row, end_row + 1):
        parts = data.iloc[index].dropna().tolist()
        if len(parts) > len(item_names):
            parts = parts[:len(item_names)]
        structured_data.append(dict(zip(item_names, parts)))
    return structured_data

# Processes a survey CSV file, parses different sections using predefined item names, and inserts the parsed data into MongoDB.
def process_survey(directory, filename, db, collection_prefix):
    file_path = os.path.join(directory, filename)
    data = pd.read_csv(file_path, delimiter=';')
    
    # Item names for each quest
    panas_items = ["Active", "Distressed", "Interested", "Inspired", "Annoyed", "Strong", "Guilty", "Scared", "Hostile", "Excited", "Proud", "Irritable", "Enthusiastic", "Ashamed", "Alert", "Nervous", "Determined", "Attentive", "Jittery", "Afraid", "Stressed", "Frustrated", "Happy", "Angry", "Irritated", "Sad"]
    stai_items = ["I feel at ease", "I feel nervous", "I am jittery", "I am relaxed", "I am worried", "I feel pleasant"]
    dim_items = ["Dimension 1", "Dimension 2"] 
    sssq_items = ["Committed", "Motivated", "Reflected", "Concerned"]
    
    # Parse each section of the survey and structure the data
    structured_panas_data = parse_and_structure_data(data, panas_items, 5, 10)
    structured_stai_data = parse_and_structure_data(data, stai_items, 11, 15)
    structured_dim_data = parse_and_structure_data(data, dim_items, 16, 18)
    structured_sssq_data = parse_and_structure_data(data, sssq_items, 19, 22)
    
    # Insert structured data into MongoDB
    collection_name = f"{collection_prefix}_responses"
    collection = db[collection_name]
    collection.insert_many(structured_panas_data + structured_stai_data + structured_dim_data + structured_sssq_data)

client = MongoClient('mongodb://localhost:27017/')
db = client['questionnaire_database']

# Survey base directory, adjust if needed
base_directory = '/Users/jwu/Documents/5400/WESAD'

for i in range(2, 12): 
    directory = f'S{i}'
    survey_directory_path = os.path.join(base_directory, directory)
    filename = f'{directory.lower()}_quest.csv'
    process_survey(survey_directory_path, filename, db, directory.lower())

print("Data from surveys S2 to S11 has been inserted into MongoDB.")

Data from surveys S2 to S11 has been inserted into MongoDB.


In [8]:
collection = db['s2_responses'] # Change name to see other surveys, for exp., s3_responses, s4_responses
documents = collection.find()

for doc in documents:
    print(doc)

{'_id': ObjectId('66199ca233664641d48cad2a'), 'Active': '# PANAS', 'Distressed': '3', 'Interested': '2', 'Inspired': '4', 'Annoyed': '1', 'Strong': '3', 'Guilty': '3', 'Scared': '1', 'Hostile': 2.0, 'Excited': 1.0, 'Proud': 4.0, 'Irritable': 2.0, 'Enthusiastic': 4.0, 'Ashamed': 3.0, 'Alert': 1.0, 'Nervous': 5.0, 'Determined': 4.0, 'Attentive': 4.0, 'Jittery': 4.0, 'Afraid': 2.0, 'Stressed': 3.0, 'Frustrated': 3.0, 'Happy': 3.0, 'Angry': 2.0, 'Irritated': 1.0, 'Sad': 3.0}
{'_id': ObjectId('66199ca233664641d48cad2b'), 'Active': '# PANAS', 'Distressed': '1', 'Interested': '1', 'Inspired': '2', 'Annoyed': '3', 'Strong': '1', 'Guilty': '2', 'Scared': '1', 'Hostile': 1.0, 'Excited': 1.0, 'Proud': 1.0, 'Irritable': 1.0, 'Enthusiastic': 1.0, 'Ashamed': 3.0, 'Alert': 1.0, 'Nervous': 2.0, 'Determined': 1.0, 'Attentive': 2.0, 'Jittery': 3.0, 'Afraid': 1.0, 'Stressed': 1.0, 'Frustrated': 1.0, 'Happy': 1.0, 'Angry': 4.0, 'Irritated': 1.0}
{'_id': ObjectId('66199ca233664641d48cad2c'), 'Active': '# P