In [23]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [5]:
df = pd.read_csv('train.csv')

label_encoder = LabelEncoder()

# Drop column
df = df.drop(["id"], axis=1)

# fill Na
df.fillna(0.0, inplace=True)

# encoded column:
encoded_columns = ["Name", "City", "Gender", "Age", "Working Professional or Student", "Profession", "Sleep Duration", "Dietary Habits", "Degree", "Have you ever had suicidal thoughts ?", "Family History of Mental Illness"]

df["Profession"] = df["Profession"].astype(str)

for column in encoded_columns:
    df[column] = df[column].astype(str)
    df[column] = label_encoder.fit_transform(df[column])

df = df.astype(float)
df.head(5)

Unnamed: 0,Name,Gender,Age,City,Working Professional or Student,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression
0,11.0,0.0,31.0,50.0,1.0,11.0,0.0,5.0,0.0,0.0,2.0,29.0,8.0,34.0,0.0,1.0,2.0,0.0,0.0
1,407.0,1.0,8.0,93.0,1.0,56.0,0.0,4.0,0.0,0.0,3.0,27.0,21.0,64.0,1.0,7.0,3.0,0.0,1.0
2,417.0,1.0,15.0,97.0,0.0,0.0,5.0,0.0,8.97,2.0,0.0,15.0,8.0,22.0,1.0,3.0,1.0,0.0,1.0
3,417.0,1.0,4.0,64.0,1.0,56.0,0.0,5.0,0.0,0.0,1.0,27.0,16.0,29.0,1.0,10.0,1.0,1.0,1.0
4,286.0,0.0,12.0,37.0,1.0,10.0,0.0,1.0,0.0,0.0,1.0,15.0,21.0,29.0,1.0,9.0,4.0,1.0,0.0


In [6]:
X = df.drop(columns=['Depression']).to_numpy()
y = df["Depression"].to_numpy().astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [7]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)  # Input to hidden layer
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.relu = nn.ReLU()                        # Activation function
        self.fc3 = nn.Linear(hidden_size, 1)  # Hidden to output layer
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# Create the model
input_size = X_train.shape[1]
hidden_size = 64
output_size = 1

model = SimpleNN(input_size, hidden_size, output_size)
model.to("cuda")

SimpleNN(
  (fc1): Linear(in_features=18, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (relu): ReLU()
  (fc3): Linear(in_features=64, out_features=1, bias=True)
)

In [8]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

In [10]:
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    correct = 0
    total = 0

    for X_batch, y_batch in train_loader:
        # Move data to the appropriate device
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        # Forward pass
        outputs = model(X_batch)  # Shape: (batch_size, 1)
        loss = criterion(outputs, y_batch)  # Ensure shapes match

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Accumulate loss
        epoch_loss += loss.item()

        # Calculate accuracy
        predictions = (torch.sigmoid(outputs) > 0.5).float()  # Threshold at 0.5
        correct += (predictions == y_batch).sum().item()
        total += y_batch.size(0)

    accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%")

Epoch 1/20, Loss: 564.0157, Accuracy: 93.52%
Epoch 2/20, Loss: 562.8195, Accuracy: 93.54%
Epoch 3/20, Loss: 562.5580, Accuracy: 93.56%
Epoch 4/20, Loss: 561.0599, Accuracy: 93.58%
Epoch 5/20, Loss: 560.0089, Accuracy: 93.61%
Epoch 6/20, Loss: 558.8451, Accuracy: 93.59%
Epoch 7/20, Loss: 558.1617, Accuracy: 93.61%
Epoch 8/20, Loss: 556.3651, Accuracy: 93.64%
Epoch 9/20, Loss: 556.6589, Accuracy: 93.60%
Epoch 10/20, Loss: 555.3580, Accuracy: 93.65%
Epoch 11/20, Loss: 553.8361, Accuracy: 93.64%
Epoch 12/20, Loss: 553.1091, Accuracy: 93.69%
Epoch 13/20, Loss: 553.6671, Accuracy: 93.68%
Epoch 14/20, Loss: 552.7470, Accuracy: 93.65%
Epoch 15/20, Loss: 551.3558, Accuracy: 93.67%
Epoch 16/20, Loss: 550.7140, Accuracy: 93.72%
Epoch 17/20, Loss: 549.3748, Accuracy: 93.68%
Epoch 18/20, Loss: 550.5790, Accuracy: 93.66%
Epoch 19/20, Loss: 550.1441, Accuracy: 93.70%
Epoch 20/20, Loss: 550.9553, Accuracy: 93.72%


In [15]:
model.eval()
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
X_test.shape

  X_test = torch.tensor(X_test, dtype=torch.float32).to(device)


torch.Size([28140, 18])

In [16]:
X_test.to(device)

tensor([[  7.,   0.,   3.,  ...,  11.,   5.,   0.],
        [ 20.,   1.,  11.,  ...,   6.,   3.,   0.],
        [192.,   1.,   7.,  ...,   1.,   5.,   1.],
        ...,
        [187.,   1.,  42.,  ...,  12.,   2.,   0.],
        [124.,   1.,  23.,  ...,  11.,   4.,   0.],
        [296.,   1.,   5.,  ...,  10.,   1.,   1.]], device='cuda:0')

In [18]:
model(X_test).squeeze(1)

tensor([ 4.5889, -2.8682, -0.3481,  ..., -8.3919,  1.2322,  1.8660],
       device='cuda:0', grad_fn=<SqueezeBackward1>)

In [20]:
predictions = (torch.sigmoid(model(X_test).squeeze(1)) > 0.5).float().cpu().detach().numpy()
predictions

array([1., 0., 0., ..., 0., 1., 1.], dtype=float32)

In [21]:
predictions.shape

(28140,)

In [22]:
y_test.shape

(28140,)

In [24]:
accuracy_score(y_test, predictions)

0.9331911869225302

In [25]:
test = pd.read_csv("test.csv")

label_encoder = LabelEncoder()

# Drop column
test = test.drop(["id"], axis=1)

# fill Na
test.fillna(0.0, inplace=True)

# encoded column:
encoded_columns = ["Name", "City", "Gender", "Age", "Working Professional or Student", "Profession", "Sleep Duration", "Dietary Habits", "Degree", "Have you ever had suicidal thoughts ?", "Family History of Mental Illness"]

test["Profession"] = test["Profession"].astype(str)

for column in encoded_columns:
    test[column] = test[column].astype(str)
    test[column] = label_encoder.fit_transform(test[column])

test = test.astype(float)
test.head(5)

Unnamed: 0,Name,Gender,Age,City,Working Professional or Student,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness
0,288.0,1.0,36.0,67.0,1.0,28.0,0.0,2.0,0.0,0.0,5.0,25.0,13.0,54.0,0.0,9.0,3.0,1.0
1,279.0,0.0,41.0,25.0,1.0,20.0,0.0,2.0,0.0,0.0,4.0,25.0,13.0,17.0,0.0,6.0,4.0,0.0
2,365.0,1.0,36.0,21.0,1.0,57.0,0.0,4.0,0.0,0.0,1.0,16.0,13.0,13.0,1.0,12.0,4.0,0.0
3,191.0,0.0,5.0,47.0,0.0,0.0,5.0,0.0,6.84,1.0,0.0,27.0,13.0,36.0,1.0,10.0,4.0,0.0
4,285.0,1.0,30.0,22.0,1.0,57.0,0.0,5.0,0.0,0.0,5.0,16.0,13.0,29.0,1.0,3.0,4.0,0.0


In [28]:
test_x = test.to_numpy()
test_x = torch.tensor(test_x, dtype=torch.float32).to(device)

test_y = (torch.sigmoid(model(test_x).squeeze(1)) > 0.5).float().cpu().detach().numpy()
test_y.shape

(93800,)

In [29]:
test_y

array([0., 0., 0., ..., 0., 1., 0.], dtype=float32)

In [33]:
submission = pd.read_csv("sample_submission.csv")
submission["Depression"] = test_y

submission.to_csv("submission/neural_network.csv", index=False)