In [2]:
import torch
import numpy as np
import torch.nn as nn
import math
import os
import pandas as pd
import torch.optim as optim
import torch.nn.functional as F



In [3]:
class NGramLanguageModeler(nn.Module):

    def __init__(self, vocab_size, embedding_dim, context_size):
        super(NGramLanguageModeler, self).__init__()
        #does not pass all input words with eachother. Each word goes through independantly
        #and the output are the embeddings of the word. We want this because we do not 
        #want to concacenate the embeddings to the output nodes.
        self.embeddings = nn.Embedding(vocab_size, embedding_dim) 
        #now takes in all embeddings of each word stretched out
        self.linear1 = nn.Linear(context_size * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)

    def forward(self, inputs):
        #embeds will be flattened matrix
        embeds = self.embeddings(inputs).view((1, -1))
        #rectified relu to learn embeddings
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1)
        #output is the log probablities of all vocabulary
        return log_probs

    


In [40]:
#file path to credit card csv file
file_path = os.path.join(os.path.expanduser("~"), "Downloads", "mental_health.csv")
orig_dataset = pd.read_csv(file_path) #read csv file as pandas object
orig_dataset = orig_dataset.to_numpy()
print("Dataset size: "+ str(len(orig_dataset)))
dataset = orig_dataset[0:700] #use part of the dataset
print("Train Dataset size: 700")
minFreq = {} #word must appear n times to be added to dictionary
dictionary = {} #relevant words in the dicationary
index = 2
for example in range(len(dataset)):
    for cont_response in range(2):
        if type(dataset[example][cont_response]) == float: #NaN values
            continue
        for word in dataset[example][cont_response].split():
            if word not in minFreq:
                minFreq[word]=1
            else:
                if minFreq[word]==3: #word needs to appear
                    dictionary[word] = index
                    index+=1
                minFreq[word]+=1
for word in dictionary:
    print(f"Word: {word} Freq: {minFreq[word]}")
print("Dataset Training Size: 10489")
print("Training Dictionary Size: " + str(index))

CONTEXT_SIZE = 3 #look 3 words back to predict current word
EMBEDDING_DIM = 252 #total embeddings for each word
all_ngrams = [] #ngram setup -> [(['through', 'going', "I'm"], 'some')]
for example in range(len(dataset)): 
    for cont_response in range(2): #context than response
        if type(dataset[example][cont_response]) == float: #NaN values
            continue
        cur_Sentence = dataset[example][cont_response].split() #seperate by word
        ngrams = [ #[(['through', 'going', "I'm"], 'some')]
            ([cur_Sentence[i - j - 1] for j in range(CONTEXT_SIZE)],cur_Sentence[i])
            for i in range(CONTEXT_SIZE, len(cur_Sentence))
            ]
        #append the grams to all_ngrams
        for i in ngrams:
            all_ngrams.append(i)

loss_function = nn.NLLLoss() #loss layer
model = NGramLanguageModeler(index, EMBEDDING_DIM, CONTEXT_SIZE) #intialize Ngram model
optimizer = optim.Adam(model.parameters(), lr=0.0001)
for epoch in range(10):
    total_loss = 0
    total  = 0
    correct = 0
    print("Epoch: "+ str(epoch))
    for context, target in all_ngrams:
        #if unknown word, just don't train
        if context[0] not in dictionary or context[1] not in dictionary or context[2] not in dictionary:
                continue
        if target not in dictionary:
                continue
        #turn each word to an integer and wrapped in tensor so pass as an input to the model
        context_idxs = torch.tensor([dictionary[w] for w in context], dtype=torch.long)

        #zero out gradients cause it accumulates
        model.zero_grad()

        # Step 3. Run the forward pass, getting log probabilities over next
        # words
        log_probs = model(context_idxs)
        
        #the predicted class will be the max log probability
        predicted_class = torch.max(log_probs, dim= 1)
        predicted_index = int(predicted_class[1])  #convert to int
        #accumulate correct predictions
        if predicted_index == dictionary[target]:
            correct+=1
            

        #apply the loss function to the log probabilties with the correct target word
        loss = loss_function(log_probs, torch.tensor([dictionary[target]], dtype=torch.long))

        # Step 5. Do the backward pass and update the gradient
        loss.backward()
        optimizer.step()
        # Get the Python number from a 1-element Tensor by calling tensor.item()
        total_loss += loss.item()
        total+=1
    print(f"Accuracy: {correct/total}")
    print(f"Total_Loss: {total_loss}")
#test set of size 10,000 examples

test = orig_dataset[1001: 2001]
#setting up n_grams for testing
test_grams = []
for example in range(len(test)):
    for cont_response in range(2):
        if type(test[example][cont_response]) == float:
            continue
        cur_Sentence = test[example][cont_response].split()
        sngrams = [
            ([cur_Sentence[i - j - 1] for j in range(CONTEXT_SIZE)],cur_Sentence[i])
            for i in range(CONTEXT_SIZE, len(cur_Sentence))
            ]
        for i in sngrams:
            test_grams.append(i)
      
    
total = 0
correct = 0
for context, target in test_grams:
    if context[0] not in dictionary or context[1] not in dictionary or context[2] not in dictionary:
            continue
    if target not in dictionary:
            continue
    context_idxs = torch.tensor([dictionary[w] for w in context], dtype=torch.long)
    log_probs = model(context_idxs)
    predicted_classes = torch.max(log_probs, dim= 1)
    predicted_index = int(predicted_classes[1]) 
    if predicted_index == dictionary[target]:
        correct+=1
    # Step 4. Compute your loss function. (Again, Torch wants the target
    # word wrapped in a tensor)
    loss = loss_function(log_probs, torch.tensor([dictionary[target]], dtype=torch.long))
    # Get the Python number from a 1-element Tensor by calling tensor.item()
    total_loss += loss.item()
    total+=1
print(f"Accuracy: {correct/total}")
print(f"Total_Loss: {total_loss}")




Dataset size: 3512
Train Dataset size: 700
Word: I Freq: 3788
Word: to Freq: 6009
Word: and Freq: 4564
Word: the Freq: 3363
Word: that Freq: 2349
Word: is Freq: 2497
Word: of Freq: 2765
Word: in Freq: 2022
Word: you Freq: 4392
Word: are Freq: 1603
Word: feelings Freq: 177
Word: my Freq: 964
Word: do Freq: 638
Word: I'm Freq: 456
Word: how Freq: 436
Word: be Freq: 1376
Word: I've Freq: 186
Word: but Freq: 686
Word: never Freq: 215
Word: can Freq: 1292
Word: worthless Freq: 54
Word: some Freq: 468
Word: your Freq: 2347
Word: with Freq: 1661
Word: out Freq: 427
Word: which Freq: 235
Word: a Freq: 3656
Word: thought Freq: 77
Word: change Freq: 105
Word: on Freq: 781
Word: they Freq: 506
Word: have Freq: 1580
Word: good Freq: 227
Word: thoughts, Freq: 26
Word: this Freq: 871
Word: not Freq: 815
Word: for Freq: 1303
Word: about Freq: 846
Word: The Freq: 270
Word: things Freq: 312
Word: think Freq: 232
Word: here. Freq: 45
Word: tried Freq: 53
Word: or Freq: 1168
Word: feeling Freq: 310
Word:

KeyboardInterrupt: 