# Mental Health in Tech: The Silent Burnout

This analysis explores the state of mental health in the tech industry using the OSMI 2014 survey data. We'll look at demographics, the impact of remote work, and predict the likelihood of seeking treatment.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)

# Load Data
try:
    df = pd.read_csv('data/survey.csv')
    print("Data loaded successfully. Shape:", df.shape)
except Exception as e:
    print("Error loading data:", e)

## Data Cleaning

In [None]:
# 1. Standardize Gender
def clean_gender(g):
    if isinstance(g, str):
        g = g.lower().strip()
        if g in ['male', 'm', 'man', 'cis male', 'mal', 'male (cis)', 'make', 'male-ish', 'maile']:
            return 'Male'
        elif g in ['female', 'f', 'woman', 'cis female', 'femake', 'female ']:
            return 'Female'
        else:
            return 'Non-binary/Other'
    return 'Non-binary/Other'

df['Gender_Clean'] = df['Gender'].apply(clean_gender)
print("Gender distribution:\n", df['Gender_Clean'].value_counts())

# 2. Handle Age Outliers
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')
df = df[(df['Age'] >= 18) & (df['Age'] <= 100)]

# 3. Handle Missing Values
df['self_employed'] = df['self_employed'].fillna('No')
df['work_interfere'] = df['work_interfere'].fillna('Unknown')

## Exploratory Data Analysis

In [None]:
# Treatment by Gender
plt.figure(figsize=(8, 5))
sns.countplot(data=df, x='Gender_Clean', hue='treatment')
plt.title('Mental Health Treatment by Gender')
plt.show()

In [None]:
# Tech Company vs. Wellness Program
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='tech_company', hue='wellness_program')
plt.title('Wellness Programs in Tech vs Non-Tech Companies')
plt.show()