# Imports

In [435]:
import pandas as pd

In [436]:
data = pd.read_excel('data/Study Group Survey(1-92).xlsx')
data.head()

Unnamed: 0,ID,Start time,Completion time,Email,Name,Last modified time,Statement 1,Statement 12,Statement 13,When would it suit you best to meet?
0,1,2025-09-02 14:18:46,2025-09-02 14:19:47,jamoe25@student.sdu.dk,Jakob Randa Mørk,,I see it as an advantage,"Intermediate (some experience, but still limited)",Preferrable on campus,Before/after classes on weekdays ;
1,2,2025-09-02 14:18:42,2025-09-02 14:19:49,anlaz25@student.sdu.dk,Ana Lazar,,Neutral – I can work both alone and in a group,No experience,Preferable Online,"Before/after classes on weekdays ;Weekends, place to be arranged among team members.;"
2,3,2025-09-02 14:19:03,2025-09-02 14:21:21,zosip25@student.sdu.dk,Zorka Anna Sipos,,I see it as an advantage,Almost no experience,I have limited time to meet,Flexible;I am working around 20-30 hours a week during weekdays. So weekday evenings either in person or online are fine. I wouldnt want to do groupwork on the weekend only or mainly by myself;Week day evenings;
3,4,2025-09-02 14:20:37,2025-09-02 14:22:45,sosar25@student.sdu.dk,SOHEL SARKER,,I very much believe it will strengthen my learning.,"Intermediate (some experience, but still limited)",Preferable Online,Before/after classes on weekdays ;
4,5,2025-09-02 14:19:47,2025-09-02 14:23:00,luebe22@student.sdu.dk,Lucie Eberová,,I see it as an advantage,No experience,Both are fine,Flexible;


# Drop Columns

In [437]:
data.columns

Index(['ID', 'Start time', 'Completion time', 'Email', 'Name',
       'Last modified time', 'Statement 1', 'Statement 12', 'Statement 13',
       'When would it suit you best to meet?'],
      dtype='object')

In [438]:
data.drop(columns=['Start time','Completion time', 'Email', 'Name', 'Last modified time'], inplace=True)
data.head(3)

Unnamed: 0,ID,Statement 1,Statement 12,Statement 13,When would it suit you best to meet?
0,1,I see it as an advantage,"Intermediate (some experience, but still limited)",Preferrable on campus,Before/after classes on weekdays ;
1,2,Neutral – I can work both alone and in a group,No experience,Preferable Online,"Before/after classes on weekdays ;Weekends, place to be arranged among team members.;"
2,3,I see it as an advantage,Almost no experience,I have limited time to meet,Flexible;I am working around 20-30 hours a week during weekdays. So weekday evenings either in person or online are fine. I wouldnt want to do groupwork on the weekend only or mainly by myself;Week day evenings;


# Value Mapping

In [439]:
data.columns

Index(['ID', 'Statement 1', 'Statement 12', 'Statement 13',
       'When would it suit you best to meet?'],
      dtype='object')

In [440]:
# Q1
# How do you feel about working in a study group for programming?
q1_mapping = {
    'I very much believe it will strengthen my learning.': 1.0,
    'I see it as an advantage': 0.75, 
    'Neutral – I can work both alone and in a group': 0.5, 
    'Prefer not to do group work – but I will if necessary': 0.25, 
    'Special conditions': 0.0
    }
data['Statement 1'] = data['Statement 1'].str.replace(' ', ' ')
data['Statement 1'] = data['Statement 1'].str.strip().map(q1_mapping)

In [441]:
# Q2
# What is your current level of programming experience?
q2_mapping = {
    'Professional': 1.0,
    'Advanced (I have programmed before and feel quite confident)': 0.75, 
    'Intermediate (some experience, but still limited)': 0.5, 
    'Almost no experience': 0.25, 
    'No experience': 0.0
    }

data['Statement 12'] = data['Statement 12'].str.replace(' ', ' ')
data['Statement 12'] = data['Statement 12'].str.strip().map(q2_mapping)

In [442]:
# Q3
# How would you prefer to meet with your study group?
q3_mapping = {
    'I have limited time to meet': 1.0,
    'Online only': 0.83, 
    'Preferable Online': 0.66, 
    'Both are fine': 0.5, 
    'Preferable Online': 0.33, 
    'Preferrable on campus': 0.16,
    'On campus': 0.0
    }

data['Statement 13'] = data['Statement 13'].str.replace(' ', ' ')
data['Statement 13'] = data['Statement 13'].str.strip().map(q3_mapping)

In [443]:
# if missing values, check for spaces or periods
data.isna().sum()

ID                                      0
Statement 1                             0
Statement 12                            0
Statement 13                            0
When would it suit you best to meet?    0
dtype: int64

# Q4 - MCQ

In [444]:
mcq_col_name = 'When would it suit you best to meet?'

# remove non-standard spaces
data[mcq_col_name] = data[mcq_col_name].str.replace(' ', ' ')

In [445]:
q4_options = ["Flexible", 'Before/after classes on weekdays ', 'Week day evenings', 'Weekends, place to be arranged among team members.']
mcq_list = data[mcq_col_name].str.split(';')

In [446]:
from sklearn.preprocessing import MultiLabelBinarizer

# One Hot Encode with predefined classes
mlb = MultiLabelBinarizer(classes=q4_options)
mcq_predefined = mlb.fit_transform(mcq_list)
data[mlb.classes_] = mcq_predefined



In [None]:
def remove_mcq_options(series: pd.Series, options: list[str]) -> pd.Series:
    """
    Removes all specified options and semicolons from a pandas Series of strings.
    """
    result = series.str.replace(";", '')
    for option in options:
        result = result.str.replace(option, '')
    return result

# Get "Other" statements by removing all predefined options
other_statement = remove_mcq_options(data[mcq_col_name], q4_options)

# Binary "Other" column
data["other"] = other_statement.apply(lambda x: 0 if len(x) == 0 else 1)

# String of "Other" statements
data["other_text"] = other_statement

In [None]:
data.drop(columns=[mcq_col_name, 'ID'], inplace=True)
data.rename(columns={
    'Statement 1': 'willingness',
    'Statement 12': 'expertise',
    'Statement 13': 'place',
    'Flexible': 'flexible',
    'Before/after classes on weekdays ': 'before_after_classes',
    'Week day evenings': 'week_day_evenings',
    'Weekends, place to be arranged among team members.': 'weekends'
}, inplace=True)

data.head(3)

Unnamed: 0,willingness,expertise,place,Flexible,Before/after classes on weekdays,Week day evenings,"Weekends, place to be arranged among team members.",Other,Other_text
0,0.75,0.5,0.16,0,1,0,0,0,
1,0.5,0.0,0.33,0,1,0,1,0,
2,0.75,0.25,1.0,1,0,1,0,1,I am working around 20-30 hours a week during weekdays. So weekday evenings either in person or online are fine. I wouldnt want to do groupwork on the weekend only or mainly by myself
