# Analytics for Survey
### Table of Contents
 1. [Install Dependencies](#install-dependencies)
 2. [Imports](#imports)
 3. [Survey Data Preprocessing](#survey-data-preprocessing)
 4. [Paper Statistics](#paper-statistics)

## Install Dependencies

In [None]:
%pip install pandas

## Imports

In [None]:
import pandas as pd

## Survey Data Preprocessing

In [None]:
df = pd.read_csv('path/to/survey')

# Sort the DataFrame from the earliest submission time to latest and remove earlier duplicate submissions
df.sort_values('Timestamp', inplace=True)
df.drop_duplicates('Email Address', inplace=True, keep='last')
print(f'{len(df)} total submissions')

# Determine which participants were in the assisted or control group
form_question_title, = [question_title for question_title in df.columns if question_title.startswith('You will be completing Form')]
df['Control Group'] = df[form_question_title].apply(lambda x: x == 'Form B' if pd.notna(x) else x)
df.drop(form_question_title, axis=1, inplace=True)
print(f'{len(df[df["Control Group"] == True])} participants in the control group; {len(df[df["Control Group"] == False])} participants in the assisted group')

## Paper Statistics

In [None]:
# Education demographics
freshmen_sophomore_df = df[df['If you are a an ongoing undergraduate student, what is your current year?'].isin(['First Year', 'Second Yead'])]
junior_senior_df = df[df['If you are a an ongoing undergraduate student, what is your current year?'].isin(['Third Year', 'Fourth Year'])]
masters_df = df[df['What is your highest level of education?'] == "Master's Degree or above"]
phd_df = df[df['What is your highest level of education?'] == 'PhD']
other_education_df = df[~df.isin(freshmen_sophomore_df) & ~df.isin(junior_senior_df) & ~df.isin(masters_df) & ~df.isin(phd_df)].dropna(how='all')

assert len(freshmen_sophomore_df) + len(junior_senior_df) + len(masters_df) + len(phd_df) + len(other_education_df) == len(df)
print('Education Demographics:')
for label, education_df in {'Freshmen & Sophomores': freshmen_sophomore_df,
                            'Juniors & Seniors': junior_senior_df,
                            'Masters Students': masters_df,
                            'PhD Students': phd_df,
                            'Other': other_education_df}.items():
    values = education_df['Control Group'].value_counts()
    print(f'\t{label} - Control: {values[True] if True in values else 0}; Assisted: {values[False] if False in values else 0}')

# Programming experience
print('\nProgramming Experience:\n\tYears of Experience:')
for label, count in df['How many years of programming experience do you have?'].value_counts().items():
    print(f'\t\t{label} - Control: {len(df[(df["How many years of programming experience do you have?"] == label) & (df["Control Group"] == True)])}; ' + \
          f'Assisted: {len(df[(df["How many years of programming experience do you have?"] == label) & (df["Control Group"] == False)])}')

print('\tTesting Methods:')
functionality_labels = df['How do you typically test the functionality of your code? (Select all that apply)'].str.split(', ').explode().unique()
for label in functionality_labels:
    print(f'\t\t{label} - Control: {len(df[df["How do you typically test the functionality of your code? (Select all that apply)"].str.count(label) & (df["Control Group"] == True)])}; ' + \
          f'Assisted: {len(df[df["How do you typically test the functionality of your code? (Select all that apply)"].str.count(label) & (df["Control Group"] == False)])}')

print('\tDebugging Methods:')
debug_labels = df['How do you typically debug your code when you encounter issues or unexpected behaviors? (Select all that apply)'].str.split(', ').explode().unique()
for label in debug_labels:
    print(f'\t\t{label} - Control: {len(df[df["How do you typically debug your code when you encounter issues or unexpected behaviors? (Select all that apply)"].str.count(label) & (df["Control Group"] == True)])}; ' + \
          f'Assisted: {len(df[df["How do you typically debug your code when you encounter issues or unexpected behaviors? (Select all that apply)"].str.count(label) & (df["Control Group"] == False)])}')

print('\tSecurity Courses Taken:')
for label, count in df['Have you received any formal training or certifications specifically in code security?'].value_counts().items():
    print(f'\t\t{label} - Control: {len(df[(df["Have you received any formal training or certifications specifically in code security?"] == label) & (df["Control Group"] == True)])}; ' + \
          f'Assisted: {len(df[(df["Have you received any formal training or certifications specifically in code security?"] == label) & (df["Control Group"] == False)])}')

