In [3]:
import pickle
import torch
import pandas as pd
import numpy as np
import os
import re
import contractions
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from datasets import load_dataset
import torch.nn.functional as F
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
ps = PorterStemmer()
os.environ["TORCH_TEXT_DISABLE_CPP_EXTENSIONS"] = "1"
from sklearn.preprocessing import LabelEncoder
from Material import * 

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
def remove_html_tags(text):
    return re.sub(r'<.*?>', '', text)
def remove_urls1(text) :
    return re.sub(r'http\S+', '', text)
def remove_urls2(text) :
    return re.sub(r'www.\S+', '', text) 
def remove_emojis(text):
        emoji_pattern = re.compile("["
                                    u"\U0001F600-\U0001F64F"  # emoticons
                                    u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                                    u"\U0001F680-\U0001F6FF"  # transport & map symbols
                                    u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                                    u"\U00002500-\U00002BEF"  # chinese char
                                    u"\U00002702-\U000027B0"
                                    u"\U00002702-\U000027B0"
                                    u"\U000024C2-\U0001F251"
                                    u"\U0001f926-\U0001f937"
                                    u"\U00010000-\U0010ffff"
                                    u"\u2640-\u2642"
                                    u"\u2600-\u2B55"
                                    u"\u200d"
                                    u"\u23cf"
                                    u"\u23e9"
                                    u"\u231a"
                                    u"\ufe0f"  # dingbats
                                    u"\u3030"
                                    "]+", flags=re.UNICODE)
        return emoji_pattern.sub(r'', text)
def remove_mentions(text):
    return re.sub(r'@[A-Za-z0-9_]+', '', text)
def LowerCase(text):
    return text.lower()
def expand_contractions(text):
    return contractions.fix(text)
def remove_non_ascii(text):
    return text.encode('ascii', 'ignore').decode('utf-8', 'ignore')
def Remove_text_within_sqbrackets (text) :
    return re.sub(r'\[.*?\]', ' ', text)
def Remove_special_characters (text) :
    return re.sub(r'[()!?]', ' ', text)
def remove_punctuation(text):
    return text.translate(str.maketrans('', '', string.punctuation))
def Replace_multiple_spaces_with_a_single_space (text):
    return re.sub(r'\s+', ' ', text).strip()
def normalize_repeated_chars(text):
    return re.sub(r'(.)\1{2,}', r'\1\1', text)
def remove_numbers(text):
    return re.sub(r'\d+', '', text)
def remove_stopwords(text):
    return ' '.join([word for word in text.split() if word.lower() not in stop_words])
def lemmatize_text(text):
    return ' '.join([lemmatizer.lemmatize(word) for word in text.split()])
def stem_text(text):
    return ' '.join([ps.stem(word) for word in text.split()])

In [6]:
def preprocess_text(text):
    # Initial cleaning
    text = remove_html_tags(text)
    text = remove_urls1(text)
    text = remove_urls2(text)
    text = remove_emojis(text)
    text = remove_mentions(text)
    
    # Standardization
    text = LowerCase(text)
    text = expand_contractions(text)
    text = remove_non_ascii(text)
    text = Remove_text_within_sqbrackets(text)
    
    # Remove special characters and punctuation
    text = Remove_special_characters(text)
    text = remove_punctuation(text)
    
    # Handling spaces and repeated characters
    text = Replace_multiple_spaces_with_a_single_space(text)
    text = normalize_repeated_chars(text)
    text = remove_numbers(text)
    
    # Remove stopwords 
    #text = remove_stopwords(text)
    
    # Stemming or Lemmatization (choose one)
    # text = stem_text(text)
    #text = lemmatize_text(text)
    
    return text

In [7]:
def tokenize_text(text, v, max_seq_len):
    # Pass the text as a string directly
    tokens = v.text_to_tensor(text, max_seq_len)
    tokens = tokens.unsqueeze(0)
    return tokens.to(DEVICE)
# Step 4: Function to predict class probabilities and the predicted class
def predict_text(text, model, v, max_seq_len):
    processed_text = preprocess_text(text)
    tokens = tokenize_text(processed_text, v, max_seq_len)
    # Ensure no gradients are calculated during prediction
    with torch.no_grad():
        output,_ = model(tokens)  # Raw logits from the model
        probabilities = F.softmax(output, dim=1).cpu().numpy()[0]  # Convert to probabilities
        predicted_class = probabilities.argmax()  # Get class index with highest probability
    return probabilities, predicted_class

# PosNegNeu_model

In [8]:
v = Vocabulary([])

In [14]:
MAX_SEQ_LEN = 200 #400, 500
EMBED_DIM = 128 #256, 256
HIDDEN_DIM = 32 #128, 128
NUM_LAYERS = 2 # 4, 3
DROPOUT = 0.1 #0.2, 0.5

In [16]:
model = LSTMClassifierBi(v,len(v.vocab),EMBED_DIM, HIDDEN_DIM, 3, NUM_LAYERS, dropout=DROPOUT).to("cuda" if torch.cuda.is_available() else "cpu")
model.to(DEVICE)

# Load only the weights (state_dict)
model.load_state_dict(torch.load("PosNegNeu_model.pth", map_location=DEVICE))

# Set the model to evaluation mode
model.eval()

LSTMClassifierBi(
  (embedding): Embedding(30522, 128, padding_idx=0)
  (lstm): LSTM(128, 32, num_layers=2, batch_first=True, dropout=0.1, bidirectional=True)
  (attention): Attention(
    (attn): Linear(in_features=64, out_features=1, bias=True)
    (softmax): Softmax(dim=1)
  )
  (fc): Linear(in_features=64, out_features=3, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [18]:
with open("PosNegNeu_model_tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

In [33]:
label_map = {0:'negative', 1:'neutral', 2:'positive'}

# Step 5: Example prediction
text ='i am good'
probabilities, predicted_label = predict_text(text, model, v, MAX_SEQ_LEN)


print("Class Probabilities:", probabilities)
print("Predicted Class:", label_map[predicted_label])

Class Probabilities: [2.5316521e-07 2.1057552e-08 9.9999976e-01]
Predicted Class: positive


# Suicide_Detection

In [67]:
EMBED_DIM = 256
HIDDEN_DIM = 64
NUM_LAYERS = 2 
DROPOUT = 0.1 
MAX_SEQ_LEN = 150

In [68]:
v = Vocabulary([])
model = LSTMClassifierBi(v,len(v.vocab),EMBED_DIM, HIDDEN_DIM, 2, NUM_LAYERS, dropout=DROPOUT).to("cuda" if torch.cuda.is_available() else "cpu")
model.to(DEVICE)

# Load only the weights (state_dict)
model.load_state_dict(torch.load("Suicide_model.pth", map_location=DEVICE))

# Set the model to evaluation mode
model.eval()

LSTMClassifierBi(
  (embedding): Embedding(30522, 256, padding_idx=0)
  (lstm): LSTM(256, 64, num_layers=2, batch_first=True, dropout=0.1, bidirectional=True)
  (attention): Attention(
    (attn): Linear(in_features=128, out_features=1, bias=True)
    (softmax): Softmax(dim=1)
  )
  (fc): Linear(in_features=128, out_features=2, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [69]:
with open("Suicide_model_tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

In [70]:
label_map = {0:'Non Suicide', 1:'Suicide'}

# Step 5: Example prediction
text = "I am gonna kill my self"
probabilities, predicted_label = predict_text(text, model, v, MAX_SEQ_LEN)

print("Class Probabilities:", probabilities)
print("Predicted Class:", label_map[predicted_label])

Class Probabilities: [1.1356238e-07 9.9999988e-01]
Predicted Class: Suicide


# Facebook Emo

In [50]:
EMBED_DIM = 128 #256, 256
HIDDEN_DIM = 64 #128, 128
NUM_LAYERS = 2 # 4, 3
DROPOUT = 0.1 #0.2, 0.5
MAX_SEQ_LEN = 100

In [52]:
v = Vocabulary([])
model = LSTMClassifierBi(v,len(v.vocab),EMBED_DIM, HIDDEN_DIM, 6, NUM_LAYERS, dropout=DROPOUT).to("cuda" if torch.cuda.is_available() else "cpu")
model.to(DEVICE)

# Load only the weights (state_dict)
model.load_state_dict(torch.load(r"facebookEmo_model.pth", map_location=DEVICE))
# Set the model to evaluation mode
model.eval()

LSTMClassifierBi(
  (embedding): Embedding(30522, 128, padding_idx=0)
  (lstm): LSTM(128, 64, num_layers=2, batch_first=True, dropout=0.1, bidirectional=True)
  (attention): Attention(
    (attn): Linear(in_features=128, out_features=1, bias=True)
    (softmax): Softmax(dim=1)
  )
  (fc): Linear(in_features=128, out_features=6, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [53]:
with open(r"facebookEmo_model_tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

In [64]:
label_map = {0:'sadness', 1:'joy', 2:'love', 3:'anger', 4:'fear', 5:'surprise'}

# Step 5: Example prediction
text = 'wow'
probabilities, predicted_label = predict_text(text, model, v, MAX_SEQ_LEN)


print("Class Probabilities:", probabilities)
print("Predicted Class:", label_map[predicted_label])

Class Probabilities: [8.2023054e-02 8.1911081e-01 3.6644936e-04 9.3319237e-02 5.0887861e-03
 9.1686103e-05]
Predicted Class: joy


# Litigious Model

In [19]:
EMBED_DIM = 128 #256, 256
HIDDEN_DIM = 32 #128, 128
NUM_LAYERS = 2 # 4, 3
DROPOUT = 0.1 #0.2, 0.5
MAX_SEQ_LEN = 200

In [20]:
v= Vocabulary([])
model = LSTMClassifierBi(v,len(v.vocab),EMBED_DIM, HIDDEN_DIM, 4, NUM_LAYERS, dropout=DROPOUT).to("cuda" if torch.cuda.is_available() else "cpu")
model.to(DEVICE)

# Load only the weights (state_dict)
model.load_state_dict(torch.load("Models&Tokenizers\litigious_model.pth", map_location=DEVICE))

# Set the model to evaluation mode
model.eval()

LSTMClassifierBi(
  (embedding): Embedding(30522, 128, padding_idx=0)
  (lstm): LSTM(128, 32, num_layers=2, batch_first=True, dropout=0.1, bidirectional=True)
  (attention): Attention(
    (attn): Linear(in_features=64, out_features=1, bias=True)
    (softmax): Softmax(dim=1)
  )
  (fc): Linear(in_features=64, out_features=4, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [21]:
with open("Models&Tokenizers\litigious_model_tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

In [None]:
label_map = {0:'litigious', 1:'negative', 2:'positive', 3:'uncertainty'}

# Step 5: Example prediction
text = 'wow'

probabilities, predicted_label = predict_text(text, model, v, MAX_SEQ_LEN)


print("Class Probabilities:", probabilities)
print("Predicted Class:", label_map[predicted_label])

Class Probabilities: [9.9998534e-01 1.2048759e-05 2.6228622e-06 3.8594610e-11]
Predicted Class: litigious


# Depression Model

In [23]:
EMBED_DIM = 128 #256, 256
HIDDEN_DIM = 32 #128, 128
NUM_LAYERS = 1 # 4, 3
DROPOUT = 0.2 #0.2, 0.5
MAX_SEQ_LEN = 100

In [24]:
v= Vocabulary([])
model = LSTMClassifierBi(v,len(v.vocab),EMBED_DIM, HIDDEN_DIM, 2, NUM_LAYERS, dropout=DROPOUT).to("cuda" if torch.cuda.is_available() else "cpu")
model.to(DEVICE)

# Load only the weights (state_dict)
model.load_state_dict(torch.load("Models&Tokenizers\depression_model.pth", map_location=DEVICE))

# Set the model to evaluation mode
model.eval()



LSTMClassifierBi(
  (embedding): Embedding(30522, 128, padding_idx=0)
  (lstm): LSTM(128, 32, batch_first=True, dropout=0.2, bidirectional=True)
  (attention): Attention(
    (attn): Linear(in_features=64, out_features=1, bias=True)
    (softmax): Softmax(dim=1)
  )
  (fc): Linear(in_features=64, out_features=2, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [25]:
with open("Models&Tokenizers\depression_model_tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

In [26]:
label_map = {0:'Non-Depression', 1:'Depression'}

# Step 5: Example prediction
text = 'open discussion between the transfer portal and the nil will the become obsolete as an organization and governing body hopelessness gopokes loyalandtrue'

probabilities, predicted_label = predict_text(text, model, v, MAX_SEQ_LEN)


print("Class Probabilities:", probabilities)
print("Predicted Class:", label_map[predicted_label])

Class Probabilities: [6.9089659e-04 9.9930906e-01]
Predicted Class: Depression


# Main Model Old

In [1]:
EMBED_DIM = 128 #256, 256
HIDDEN_DIM = 64 #128, 128
NUM_LAYERS = 3 # 4, 3
DROPOUT = 0.2 #0.2, 0.5
MAX_SEQ_LEN = 450

In [10]:
v= Vocabulary([])
model = LSTMClassifierBi(v,len(v.vocab),EMBED_DIM, HIDDEN_DIM, 7, NUM_LAYERS, dropout=DROPOUT).to("cuda" if torch.cuda.is_available() else "cpu")
model.to(DEVICE)

# Load only the weights (state_dict)
model.load_state_dict(torch.load("main2_model.pth", map_location=DEVICE))

# Set the model to evaluation mode
model.eval()

LSTMClassifierBi(
  (embedding): Embedding(30522, 128, padding_idx=0)
  (lstm): LSTM(128, 64, num_layers=3, batch_first=True, dropout=0.2, bidirectional=True)
  (attention): Attention(
    (attn): Linear(in_features=128, out_features=1, bias=True)
    (softmax): Softmax(dim=1)
  )
  (fc): Linear(in_features=128, out_features=7, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [12]:
with open("main2_model_tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

In [None]:
label_map = {0:'anxiety', 1:'bipolar', 2:'depression', 3:'normal', 4:'personality disorder', 5:'stress', 6:'suicidal'}

# Step 5: Example prediction
text = 'my mind is a neverending cycle of worry and even the simplest tasks feel insurmountable i am consumed by fear and doubt and every decision feels like a minefield waiting to explode anxiety has a grip on me and i am powerless to break free from its relentless hold my thoughts race like a runaway train and i cannot seem to find a way to slow down and catch my breath i am trapped in a whirlwind of fear and uncertainty and i cannot seem to find a way to escape my heart pounds in my chest and my'

probabilities, predicted_label = predict_text(text, model, v, MAX_SEQ_LEN)


print("Class Probabilities:", probabilities)
print("Predicted Class:", label_map[predicted_label])

Class Probabilities: [9.9998486e-01 1.6958934e-06 1.4108380e-06 2.7860273e-07 1.3248567e-06
 1.0169501e-05 2.3955613e-07]
Predicted Class: anxiety


# Main model New

In [31]:
EMBED_DIM = 256 #256, 256
HIDDEN_DIM = 256 #128, 128
NUM_LAYERS = 3 # 4, 3
DROPOUT = 0.3 #0.2, 0.5
MAX_SEQ_LEN = 500

In [32]:
v= Vocabulary([])
model = LSTMClassifierBi(v,len(v.vocab),EMBED_DIM, HIDDEN_DIM, 5, NUM_LAYERS, dropout=DROPOUT).to("cuda" if torch.cuda.is_available() else "cpu")
model.to(DEVICE)

# Load only the weights (state_dict)
model.load_state_dict(torch.load("Models&Tokenizers\main_model.pth", map_location=DEVICE))

# Set the model to evaluation mode
model.eval()

LSTMClassifierBi(
  (embedding): Embedding(30522, 256, padding_idx=0)
  (lstm): LSTM(256, 256, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  (attention): Attention(
    (attn): Linear(in_features=512, out_features=1, bias=True)
    (softmax): Softmax(dim=1)
  )
  (fc): Linear(in_features=512, out_features=5, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [33]:
with open("Models&Tokenizers\main_model_tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

In [34]:
label_map = {0:'ADHD', 1:'OCD', 2:'aspergers', 3:'depression', 4:'ptsd'}

# Step 5: Example prediction
text = 'a few months ago i was accepted into this full time software engineering fellowship and it is made me realize that i cannot work sustainably to save my life it is so hard to prioritize my time when i get so hyper focused on each task or just on something completely irrelevant i was just diagnosed last year so i am still learning how to learn with adhd but i feel even more pressure to work so much harder to prove my worth because i am a black woman in engineering i have been falling into a really unhealthy cycle of taking more than my prescribed dose to work longer because i would waste so much time during the day and it is just gotten out of hand it is like you go your whole life feeling so dumb and incompetent and now you do not and you just want to learn everything all the time but that is just not sustainable or normal anyways i do not know sorry for the rant i am just tired do not know what to do'

probabilities, predicted_label = predict_text(text, model, v, MAX_SEQ_LEN)


print("Class Probabilities:", probabilities)
print("Predicted Class:", label_map[predicted_label])

Class Probabilities: [1.0000000e+00 4.3307749e-10 4.2600562e-10 6.4689670e-10 8.0607278e-11]
Predicted Class: ADHD
