In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
df = pd.read_csv('train.csv')

label_encoder = LabelEncoder()

# Drop column
df = df.drop(["id"], axis=1)

# fill Na
df.fillna(0.0, inplace=True)

# encoded column:
encoded_columns = ["Name", "City", "Gender", "Age", "Working Professional or Student", "Profession", "Sleep Duration", "Dietary Habits", "Degree", "Have you ever had suicidal thoughts ?", "Family History of Mental Illness"]

df["Profession"] = df["Profession"].astype(str)

for column in encoded_columns:
    df[column] = df[column].astype(str)
    df[column] = label_encoder.fit_transform(df[column])

df = df.astype(float)
df.head(5)

Unnamed: 0,Name,Gender,Age,City,Working Professional or Student,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression
0,11.0,0.0,31.0,50.0,1.0,11.0,0.0,5.0,0.0,0.0,2.0,29.0,8.0,34.0,0.0,1.0,2.0,0.0,0.0
1,407.0,1.0,8.0,93.0,1.0,56.0,0.0,4.0,0.0,0.0,3.0,27.0,21.0,64.0,1.0,7.0,3.0,0.0,1.0
2,417.0,1.0,15.0,97.0,0.0,0.0,5.0,0.0,8.97,2.0,0.0,15.0,8.0,22.0,1.0,3.0,1.0,0.0,1.0
3,417.0,1.0,4.0,64.0,1.0,56.0,0.0,5.0,0.0,0.0,1.0,27.0,16.0,29.0,1.0,10.0,1.0,1.0,1.0
4,286.0,0.0,12.0,37.0,1.0,10.0,0.0,1.0,0.0,0.0,1.0,15.0,21.0,29.0,1.0,9.0,4.0,1.0,0.0


In [7]:
X_train = df.drop(columns=['Depression']).to_numpy()
y_train = df["Depression"].to_numpy().astype(int)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [8]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.dropout = nn.Dropout(p=0.3)
        self.fc1 = nn.Linear(input_size, hidden_size)  # Input to hidden layer
        self.relu = nn.ReLU()                        # Activation function
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)  # Hidden to output layer
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc3(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        return x

# Create the model
input_size = X_train.shape[1]
hidden_size = 32
output_size = 1

model = SimpleNN(input_size, hidden_size, output_size)
model.to("cuda")

SimpleNN(
  (fc1): Linear(in_features=18, out_features=32, bias=True)
  (relu): ReLU()
  (fc3): Linear(in_features=32, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=1, bias=True)
)

In [9]:
# Loss function
criterion = nn.BCEWithLogitsLoss()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

In [12]:
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    correct = 0
    total = 0

    for X_batch, y_batch in train_loader:
        # Move data to the appropriate device
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        # Forward pass
        outputs = model(X_batch)  # Shape: (batch_size, 1)
        loss = criterion(outputs, y_batch)  # Ensure shapes match

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Accumulate loss
        epoch_loss += loss.item()

        # Calculate accuracy
        predictions = (torch.sigmoid(outputs) > 0.5).float()  # Threshold at 0.5
        correct += (predictions == y_batch).sum().item()
        total += y_batch.size(0)

    accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%")

Epoch 1/20, Loss: 442.0760, Accuracy: 91.47%
Epoch 2/20, Loss: 377.4334, Accuracy: 93.03%
Epoch 3/20, Loss: 367.7496, Accuracy: 93.25%
Epoch 4/20, Loss: 362.4687, Accuracy: 93.37%
Epoch 5/20, Loss: 360.9758, Accuracy: 93.36%
Epoch 6/20, Loss: 357.1986, Accuracy: 93.45%
Epoch 7/20, Loss: 355.9586, Accuracy: 93.52%
Epoch 8/20, Loss: 354.2521, Accuracy: 93.50%
Epoch 9/20, Loss: 353.1267, Accuracy: 93.55%
Epoch 10/20, Loss: 353.1414, Accuracy: 93.59%
Epoch 11/20, Loss: 351.1295, Accuracy: 93.58%
Epoch 12/20, Loss: 349.9062, Accuracy: 93.56%
Epoch 13/20, Loss: 351.7762, Accuracy: 93.56%
Epoch 14/20, Loss: 349.7507, Accuracy: 93.58%
Epoch 15/20, Loss: 350.7731, Accuracy: 93.59%
Epoch 16/20, Loss: 349.9022, Accuracy: 93.61%
Epoch 17/20, Loss: 349.2139, Accuracy: 93.64%
Epoch 18/20, Loss: 348.5628, Accuracy: 93.65%
Epoch 19/20, Loss: 348.2823, Accuracy: 93.63%
Epoch 20/20, Loss: 348.2076, Accuracy: 93.64%


In [15]:
test_df = pd.read_csv("test.csv")

test_df = test_df.drop(["id"], axis=1)

# fill Na
test_df.fillna(0.0, inplace=True)

# encoded column:
encoded_columns = ["Name", "City", "Gender", "Age", "Working Professional or Student", "Profession", "Sleep Duration", "Dietary Habits", "Degree", "Have you ever had suicidal thoughts ?", "Family History of Mental Illness"]

test_df["Profession"] = test_df["Profession"].astype(str)

for column in encoded_columns:
    test_df[column] = test_df[column].astype(str)
    test_df[column] = label_encoder.fit_transform(test_df[column])

test_df = test_df.astype(float)
X_test = test_df.to_numpy().reshape(1, len(test_df), len(test_df.columns))
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)

In [16]:
model.eval()
y_pred = model(X_test)

In [23]:
probs = torch.sigmoid(y_pred)
binary_preds = (probs > 0.5).float()
prediction = binary_preds.int().cpu().detach().numpy()

In [30]:
prediction = prediction.reshape(len(prediction[0]), )
print(prediction)

[0 0 0 ... 0 1 0]


In [32]:
pred_df = pd.DataFrame(data={
    "id": np.arange(140700, 140700 + len(prediction)),
    "Depression": prediction
})

pred_df.to_csv("neural_net.csv", index=False)