In [1]:
import pandas as pd
import os
import numpy as np

In [2]:
parent_directory = os.path.dirname(os.getcwd())
survey_directory = os.path.join(parent_directory, 'data', 'surveys', 'final')
surveys = ['demographics_survey__stanford']

survey_results = {
    'age': [],
    'gender_counts': {
        'Male': 0,
        'Female': 0,
        'Non-binary/Third Gender': 0,
        'Prefer not to answer': 0
    }
}

gender_options = {
    0: 'Male',
    1: 'Female',
    2: 'Non-binary/Third Gender',
    3: 'Prefer not to answer'
}

In [3]:
for foldername in os.listdir(survey_directory):
    folder_path = os.path.join(survey_directory, foldername)
    if os.path.isdir(folder_path):
        for filename in os.listdir(folder_path):
            if filename.endswith('.csv'):
                if any(survey in filename for survey in surveys):
                    subject_id = filename.split('_')[0]
                    df = pd.read_csv(os.path.join(folder_path, filename))

                    age_row = df[df['text'] == "How old are you (in years)?"]
                    if not age_row.empty:
                        age_res = age_row['response'].values[0]
                        survey_results['age'].append(age_res)

                    gender_row = df[df['text'].str.contains("What is your gender?", na=False)]
                    if not gender_row.empty:
                        gender_res = gender_row['response'].values[0]
                        gender_res = int(gender_res)
                        if gender_res in gender_options:
                            survey_results['gender_counts'][gender_options[gender_res]] += 1
                        

In [4]:
if survey_results['age']:
    survey_results['age'] = [int(age) for age in survey_results['age']]
    survey_results['age'].append(20) #S019's age is 20 from redcap. They didn't complete the demographics survey
    average_age = np.mean(survey_results['age'])
    std_age = pd.Series(survey_results['age']).std()
else:
    average_age = None
    std_age = None

survey_results['average_age'] = average_age
survey_results['std_age'] = std_age

print("Survey Results:")
print("Average Age:", survey_results['average_age'])
print("Standard Deviation of Age:", survey_results['std_age'])
print("Gender Counts:", survey_results['gender_counts']) #S019's gender is female. They didn't complete the demographics survey.

Survey Results:
Average Age: 26.2
Standard Deviation of Age: 8.14673129354533
Gender Counts: {'Male': 15, 'Female': 22, 'Non-binary/Third Gender': 2, 'Prefer not to answer': 0}
