In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("survey.csv")

df = df.dropna()
df = df.drop(['Timestamp','remote_work','tech_company','wellness_program','anonymity','benefits','obs_consequence','mental_vs_physical', 'self_employed', 'state','Country'],axis=1)

In [2]:
column_mapping = {
    "family_history": "family_mental_history",
    "treatment": "Support Received",
    "work_interfere": "Mission Interference",
    "no_employees": "Clan Size",
    "care_options": "knowledge_about_facilities",
    "seek_help": "seeken_help",
    "leave": "frequency_of_leaves",
    "mental_health_consequence": "job_physical_consequence",
    "phys_health_consequence": "job_mental_consequence",
    "coworkers": "comrade_relationships",
    "supervisor": "superior_relationship",
    "mental_health_interview": "physical_assessment",
    "phys_health_interview": "mental_assessment",
}

df.rename(columns=column_mapping, inplace=True)

In [3]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

columns = {"knowledge_about_facilities", 
           "seeken_help", 
           "frequency_of_leaves", 
           "job_physical_consequence", 
           "job_mental_consequence", 
           "comrade_relationships", 
           "superior_relationship", 
           "physical_assessment", 
           "mental_assessment",
           "family_mental_history",
           "Support Received",
           "Mission Interference"}

for column in columns:
    df[column] = label_encoder.fit_transform(df[column])

In [4]:
clan_size_mapping = {
    '1-5': 3,
    '6-25': 15.5,
    '26-100': 63,
    '100-500': 300,
    '500-1000': 750,
    'More than 1000': 1200  
}

df['Clan Size'] = df['Clan Size'].map(clan_size_mapping)

In [5]:
gender_mapping = {
    'male': 1,
    'Male': 1,
    'M': 1,
    'm': 1,
    'cis male': 1,
    'Cis Male': 1,
    'female': 0,
    'Female': 0,
    'Female ': 0,
    'woman': 0,
    'F': 0,
    'f': 0,
    'cis-female/femme': 0,
    'Female (cis)': 0,
    'A little about you': np.nan,
    'p': np.nan
}

df['Gender'] = df['Gender'].map(gender_mapping)

In [6]:
betrayal_related_features = ['frequency_of_leaves', 'Mission Interference', 'job_physical_consequence', 'job_mental_consequence','family_mental_history']
loyalty_related_features = ['comrade_relationships', 'superior_relationship', 'Support Received']

def assign_betrayal_label(row):
    betrayal_score = 0
    total_features = len(betrayal_related_features) + len(loyalty_related_features)

    for feature in betrayal_related_features:
        if row[feature] >= df[feature].quantile(0.8):  # Top 20%
            betrayal_score += 1
 
    for feature in loyalty_related_features:
        if row[feature] <= df[feature].quantile(0.2):  # Bottom 20%
            betrayal_score += 1

    betrayal_likelihood = betrayal_score / total_features

    return 1 if betrayal_likelihood > 0.5 else 0

# Apply the function to create the target variable
df['Betrayal'] = df.apply(assign_betrayal_label, axis=1)


In [9]:
df.to_csv("processedData.csv")

df

Unnamed: 0,Age,Gender,family_mental_history,Support Received,Mission Interference,Clan Size,knowledge_about_facilities,seeken_help,frequency_of_leaves,job_physical_consequence,job_mental_consequence,comrade_relationships,superior_relationship,physical_assessment,mental_assessment,comments,Betrayal
24,33,1.0,1,1,2,63.0,1,2,0,1,1,2,2,1,2,Relatively new job. Ask again later,0
25,35,1.0,1,1,3,1200.0,2,0,4,2,1,1,2,1,2,Sometimes I think about using drugs for my me...,1
33,42,1.0,1,1,3,63.0,2,2,4,0,1,1,1,0,2,I selected my current employer based on its po...,1
45,38,0.0,1,1,3,63.0,2,2,2,1,1,1,2,1,1,Our health plan has covered my psychotherapy a...,0
49,30,1.0,1,1,2,63.0,0,0,0,0,1,1,2,1,1,I just started a new job last week hence a lot...,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1186,56,0.0,0,0,2,3.0,1,0,0,0,0,0,0,1,1,I'm self-employed on contract with small start...,0
1190,44,1.0,1,1,3,300.0,2,2,0,0,1,1,1,1,1,My mental health issues were the direct result...,0
1219,38,1.0,0,1,3,1200.0,0,2,4,1,1,2,2,1,1,I openly discuss my mental health struggles. I...,0
1221,46,1.0,1,1,3,300.0,1,0,0,1,1,1,2,2,2,Just starting a new job hence the numerous I d...,0
