In [None]:
import numpy as np
import pandas as pd
import os, re

import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader

In [None]:
import tensorflow as tf

In [None]:
import torch
print(torch.__version__)

2.3.0+cu121


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [None]:
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))

Tesla T4


In [None]:
import transformers

In [None]:
df = pd.read_excel('PHQ9DepressionNLP_elaborated.xlsx')

print('Number of training sentences: {:,}\n'.format(df.shape[0]))
# Display 10 random rows from the data.
df.sample(10)

Number of training sentences: 520



Unnamed: 0,AcademicDescription,ViolenceDescription,SocialDescription,PHQDescription,GeneralDescription,PressureDescription,PHQ9_levels
348,I am currently in my 1 year with a CGPA of 3.2...,"I Agree that I experienced physical violence,...",I Neutral that My understanding with my paren...,"I often feel a lack of interest in activities,...","I am 19 years old, studying in my 1 year with ...",I Strongly agree that my Parents are Strict. ...,ModeratelySevere
240,I am currently in my 3 year with a CGPA of 3.6...,I Disagree that I experienced physical violen...,I Neutral that My understanding with my paren...,"I often feel a lack of interest in activities,...","I am 24 years old, studying in my 3 year with ...",I Agree that my Parents are Strict. I Agree ...,Minimal
93,I am currently in my 4 year with a CGPA of 3.5...,I Strongly disagree that I experienced physic...,I Strongly disagree that My understanding wit...,"I often feel a lack of interest in activities,...","I am 24 years old, studying in my 4 year with ...",I Strongly disagree that my Parents are Stric...,Minimal
40,I am currently in my 4 year with a CGPA of 3.2...,I Disagree that I experienced physical violen...,I Disagree that My understanding with my pare...,"I often feel a lack of interest in activities,...","I am 22 years old, studying in my 4 year with ...",I Disagree that my Parents are Strict. I Neu...,Minimal
342,I am currently in my 4 year with a CGPA of 3.5...,I Neutral that I experienced physical violenc...,I Agree that My understanding with my parents...,"I often feel a lack of interest in activities,...","I am 23 years old, studying in my 4 year with ...",I Agree that my Parents are Strict. I Agree ...,Moderate
277,I am currently in my 4 year with a CGPA of 2.7...,I Disagree that I experienced physical violen...,I Neutral that My understanding with my paren...,"I often feel a lack of interest in activities,...","I am 25 years old, studying in my 4 year with ...",I Neutral that my Parents are Strict. I Neut...,Minimal
308,I am currently in my 3 year with a CGPA of 3.3...,I Disagree that I experienced physical violen...,I Not Applicable that My understanding with m...,"I often feel a lack of interest in activities,...","I am 21 years old, studying in my 3 year with ...",I Not Applicable that my Parents are Strict. ...,Minimal
24,I am currently in my 3 year with a CGPA of 3.9...,I Strongly disagree that I experienced physic...,I Disagree that My understanding with my pare...,"I often feel a lack of interest in activities,...","I am 25 years old, studying in my 3 year with ...",I Disagree that my Parents are Strict. I Dis...,Mild
213,I am currently in my 2 year with a CGPA of 3.4...,I Disagree that I experienced physical violen...,I Agree that My understanding with my parents...,"I often feel a lack of interest in activities,...","I am 23 years old, studying in my 2 year with ...",I Strongly agree that my Parents are Strict. ...,Mild
158,I am currently in my 4 year with a CGPA of 2.9...,I Disagree that I experienced physical violen...,I Strongly disagree that My understanding wit...,"I often feel a lack of interest in activities,...","I am 25 years old, studying in my 4 year with ...",I Disagree that my Parents are Strict. I Dis...,Severe


In [None]:
# Concatenate the descriptions and store them in a new column 'statements'
df['statements'] = df.apply(lambda row: ' '.join([str(row['PHQDescription']),
                                                  str(row['ViolenceDescription']),
                                                  str(row['PressureDescription']),
                                                  str(row['SocialDescription']),
                                                  str(row['AcademicDescription'])]), axis=1)

In [None]:
# Import GPT-specific components
from transformers import GPT2LMHeadModel, GPT2Tokenizer, AdamW, get_linear_schedule_with_warmup

In [None]:
# Initialize GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained('gpt2')
model = model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
# Create a custom dataset class
class PHQ9Dataset(Dataset):
    def __init__(self, descriptions, labels, tokenizer, max_len=512):
        self.descriptions = descriptions
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.descriptions)

    def __getitem__(self, idx):
        description = self.descriptions[idx]
        inputs = self.tokenizer.encode_plus(
            description,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )
        input_ids = inputs['input_ids'].squeeze()
        attention_mask = inputs['attention_mask'].squeeze()
        return input_ids, attention_mask, self.labels[idx]

In [None]:
# Prepare the dataset and dataloader
descriptions = df['statements'].tolist()
labels = df['PHQ9_levels'].tolist()
dataset = PHQ9Dataset(descriptions, labels, tokenizer)
train_data_loader = DataLoader(dataset, batch_size=2, shuffle=True)

In [None]:
# Define the optimizer and learning rate scheduler
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
total_steps = len(train_data_loader) * 5  # Assuming 5 epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)




In [None]:
# Training loop
epochs = 1
for epoch in range(epochs):
    model.train()
    total_loss = 0.0

    for batch in train_data_loader:
        input_ids, attention_mask, _ = batch
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)

        # Clear gradients
        model.zero_grad()

        # Forward pass
        outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
        loss = outputs[0]

        # Backward pass
        loss.backward()

        # Update parameters and take a step using the optimizer
        optimizer.step()
        scheduler.step()

        # Accumulate total loss
        total_loss += loss.item()

    # Calculate average loss
    avg_train_loss = total_loss / len(train_data_loader)

    # Print average loss for the epoch
    print(f'Epoch [{epoch + 1}/{epochs}], Average Train Loss: {avg_train_loss:.4f}')

Epoch [1/1], Average Train Loss: 0.6445


In [None]:
#Example of generating text
# prompt = "My name is Leon and I am definitely not depressed. The PHQ9 score for this statement is:"
prompt = df['statements'][0]

In [None]:
# Tokenize prompt
inputs = tokenizer.encode_plus(prompt, return_tensors='pt', padding=True, truncation=True)
input_ids = inputs['input_ids'].to(device)
attention_mask = inputs['attention_mask'].to(device)

In [None]:
# Generate text
output = model.generate(input_ids, attention_mask=attention_mask, max_length=500, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)

In [None]:
# Decode and print generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(f'Generated text: {generated_text}')

Generated text: I often feel a lack of interest in activities, described as I get as much satisfaction out of things as I used to. satisfaction, and I've been feeling down I feel sad. My sleep quality is I don't sleep as well as I used to., and I often feel fatigued I don't get more tired than usual.. My appetite level is My appetite is no worse than usual., and I frequently feel bad about myself due to fear of failure, described as I feel I have failed more than the average person.. I have trouble making decisions I make decisions about as well as I ever could., feel agitated Fidgetiness/ (অস্থিরতা), and have had suicidal feelings Absent.. I  Disagree that I experienced physical violence, I  Disagree that I experienced sexual violence, I  Agree that I experienced verbal violence, and I Disagree that I experienced emotional violence. I  Agree that my Parents are Strict. I  Agree that I experienced Academic Pressure from my parents, I  Agree that I experienced Marriage Pressure from my 

In [None]:
df['statements'][0]

"I often feel a lack of interest in activities, described as I get as much satisfaction out of things as I used to. satisfaction, and I've been feeling down I feel sad. My sleep quality is I don't sleep as well as I used to., and I often feel fatigued I don't get more tired than usual.. My appetite level is My appetite is no worse than usual., and I frequently feel bad about myself due to fear of failure, described as I feel I have failed more than the average person.. I have trouble making decisions I make decisions about as well as I ever could., feel agitated Fidgetiness/ (অস্থিরতা), and have had suicidal feelings Absent.. I  Disagree that I experienced physical violence, I  Disagree that I experienced sexual violence, I  Agree that I experienced verbal violence, and I Disagree that I experienced emotional violence. I  Agree that my Parents are Strict. I  Agree that I experienced Academic Pressure from my parents, I  Agree that I experienced Marriage Pressure from my parents, and I 