In [35]:
!pip install transformers torch scikit-learn pandas



In [36]:
import torch
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

from transformers import (
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    Trainer,
    TrainingArguments
)

In [43]:
df = pd.read_csv("/content/Reddit_Title.csv",
                 sep=";",
                 engine="python",
                 usecols=[0, 1])
df = df[['title', 'label']]
df = df.rename(columns={"title": "text"})
df.dropna(inplace=True)
df["label"] = df["label"].astype(int)

df.head()

Unnamed: 0,text,label
0,My aunt and uncle scoring their first gig as p...,0
1,How do I stop stressing about work when I'm at...,1
2,Meeting a fellow suicidal student in middle sc...,1
3,My brain feels literally numb. Is this depress...,1
4,A mother's reaction after seeing her son has p...,0


In [44]:
import torch
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from transformers import (
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    Trainer,
    TrainingArguments
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

X_train, X_test, y_train, y_test = train_test_split(
    df["text"],
    df["label"],
    test_size=0.2,
    random_state=42,
    stratify=df["label"]
)

tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

train_encodings = tokenizer(
    list(X_train),
    truncation=True,
    padding=True,
    max_length=128
)

test_encodings = tokenizer(
    list(X_test),
    truncation=True,
    padding=True,
    max_length=128
)

class StressDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels.reset_index(drop=True)

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = StressDataset(train_encodings, y_train)
test_dataset = StressDataset(test_encodings, y_test)

model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=2
).to(device)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    return {"accuracy": accuracy_score(labels, preds)}

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer.train()

model.save_pretrained("stress_distilbert")
tokenizer.save_pretrained("stress_distilbert")


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.129459,0.961331
2,0.146300,0.154087,0.961331
3,0.146300,0.153688,0.966727
4,0.035100,0.164091,0.965827
5,0.035100,0.174564,0.965827


('stress_distilbert/tokenizer_config.json',
 'stress_distilbert/special_tokens_map.json',
 'stress_distilbert/vocab.txt',
 'stress_distilbert/added_tokens.json',
 'stress_distilbert/tokenizer.json')

In [48]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def predict_stress(text):
    model.to(device)
    model.eval()

    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128
    )

    inputs = {key: val.to(device) for key, val in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)

    probs = torch.softmax(outputs.logits, dim=1)
    stress_prob = probs[0][1].item()

    if stress_prob >= 0.7:
        return "Stressed"
    else:
        return "Not Stressed"

In [49]:
print(predict_stress("I feel overwhelmed with work and exams"))
print(predict_stress("I am extremely stressed and anxious"))
print(predict_stress("I feel calm and relaxed today"))
print(predict_stress("Today was a good and happy day"))

Stressed
Stressed
Stressed
Not Stressed


In [47]:
df['label'].value_counts(normalize=True)

Unnamed: 0_level_0,proportion
label,Unnamed: 1_level_1
0,0.50594
1,0.49406


In [50]:
%%writefile app.py
import streamlit as st
import torch
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification

st.set_page_config(page_title="Stress Detection", page_icon="ðŸ§ ")

@st.cache_resource
def load_model():
    tokenizer = DistilBertTokenizerFast.from_pretrained("stress_distilbert")
    model = DistilBertForSequenceClassification.from_pretrained("stress_distilbert")
    model.eval()
    return tokenizer, model

tokenizer, model = load_model()

st.title("ðŸ§  Stress Detection from Text")

text = st.text_area("Enter text")

if st.button("Detect Stress"):
    if text.strip() == "":
        st.warning("Please enter some text")
    else:
        inputs = tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=128
        )
        with torch.no_grad():
            outputs = model(**inputs)

        pred = torch.argmax(outputs.logits, dim=1).item()
        st.error("Stress Detected") if pred == 1 else st.success("No Stress Detected")

Writing app.py


In [58]:
!zip -r stress_distilbert.zip stress_distilbert

  adding: stress_distilbert/ (stored 0%)
  adding: stress_distilbert/special_tokens_map.json (deflated 42%)
  adding: stress_distilbert/tokenizer_config.json (deflated 75%)
  adding: stress_distilbert/config.json (deflated 45%)
  adding: stress_distilbert/vocab.txt (deflated 53%)
  adding: stress_distilbert/model.safetensors (deflated 8%)
  adding: stress_distilbert/tokenizer.json (deflated 71%)


In [59]:
from google.colab import files
files.download("stress_distilbert.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>