# Neural Network Classifier
This notebook implements a depression classifier using a Neural Network.

In [3]:
# --- Basic imports ---
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# --- sklearn ---
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# --- PyTorch ---
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Misc
import warnings
warnings.filterwarnings("ignore")

SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)


In [4]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
original = pd.read_csv("final_depression_dataset_1.csv")

train.head()

Unnamed: 0,id,Name,Gender,Age,City,Working Professional or Student,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression
0,0,Aaradhya,Female,49.0,Ludhiana,Working Professional,Chef,,5.0,,,2.0,More than 8 hours,Healthy,BHM,No,1.0,2.0,No,0
1,1,Vivan,Male,26.0,Varanasi,Working Professional,Teacher,,4.0,,,3.0,Less than 5 hours,Unhealthy,LLB,Yes,7.0,3.0,No,1
2,2,Yuvraj,Male,33.0,Visakhapatnam,Student,,5.0,,8.97,2.0,,5-6 hours,Healthy,B.Pharm,Yes,3.0,1.0,No,1
3,3,Yuvraj,Male,22.0,Mumbai,Working Professional,Teacher,,5.0,,,1.0,Less than 5 hours,Moderate,BBA,Yes,10.0,1.0,Yes,1
4,4,Rhea,Female,30.0,Kanpur,Working Professional,Business Analyst,,1.0,,,1.0,5-6 hours,Unhealthy,BBA,Yes,9.0,4.0,Yes,0


In [7]:
target = "Depression"

numerical_columns = [
    "Age", "Academic Pressure", "Work Pressure", "CGPA",
    "Study Satisfaction", "Job Satisfaction", "Work/Study Hours",
    "Financial Stress"
]

one_hot_columns = [
    "Gender", "Working Professional or Student", "City",
    "Family History of Mental Illness", "Sleep Duration"
]

label_columns = [
    "Degree", "Profession", "Dietary Habits",
    "Have you ever had suicidal thoughts ?"
]

categorical_columns = one_hot_columns + label_columns


In [8]:
def preprocess_dataframe(df, feature_cols, categorical_cols, numerical_cols, target_col, fit_objs=None):
    dfc = df.copy()

    preproc = {} if fit_objs is None else fit_objs

    # ------- One-hot encoding -------
    if fit_objs is None:
        if len(categorical_cols) > 0:
            ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
            X_cat = ohe.fit_transform(dfc[categorical_cols].astype(str))
            preproc["ohe"] = ohe
        else:
            X_cat = np.zeros((len(dfc), 0))
    else:
        if "ohe" in fit_objs and len(categorical_cols) > 0:
            X_cat = fit_objs["ohe"].transform(dfc[categorical_cols].astype(str))
        else:
            X_cat = np.zeros((len(dfc), 0))

    # ------- Scale numeric -------
    if fit_objs is None:
        if len(numerical_cols) > 0:
            scaler = StandardScaler()
            X_num = scaler.fit_transform(dfc[numerical_cols])
            preproc["scaler"] = scaler
        else:
            X_num = np.zeros((len(dfc), 0))
    else:
        if "scaler" in fit_objs and len(numerical_cols) > 0:
            X_num = fit_objs["scaler"].transform(dfc[numerical_cols])
        else:
            X_num = np.zeros((len(dfc), 0))

    # Target
    y = dfc[target_col].values.astype(np.float32)

    # Concatenate
    X = np.concatenate([X_cat, X_num], axis=1).astype(np.float32)

    return X, y, preproc


In [9]:
class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


In [10]:
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)


In [11]:
def train_model(model, train_loader, val_loader, epochs=20, lr=1e-3):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCELoss()

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X, y in train_loader:
            optimizer.zero_grad()
            preds = model(X).squeeze()
            loss = criterion(preds, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # validation
        model.eval()
        val_acc = []
        with torch.no_grad():
            for X, y in val_loader:
                preds = (model(X).squeeze() > 0.5).float()
                val_acc.append((preds == y).float().mean().item())

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.3f} | Val Acc: {np.mean(val_acc):.3f}")


In [14]:
feature_cols = numerical_columns + categorical_columns

train_df, val_df = train_test_split(train, test_size=0.15, random_state=SEED, stratify=train[target])

X_train, y_train, preproc = preprocess_dataframe(train_df, feature_cols, categorical_columns, numerical_columns, target)
X_val, y_val, _       = preprocess_dataframe(val_df, feature_cols, categorical_columns, numerical_columns, target, preproc)

train_data = TabularDataset(X_train, y_train)
val_data   = TabularDataset(X_val, y_val)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_data, batch_size=64, shuffle=False)

model = SimpleNN(input_dim=X_train.shape[1])
train_model(model, train_loader, val_loader, epochs=25, lr=1e-3)


RuntimeError: all elements of input should be between 0 and 1