In [1]:
import torch
import numpy as np
import torch.nn as nn
import math
import os
import pandas as pd
import torch.optim as optim
import torch.nn.functional as F



In [5]:
class NGramLanguageModeler(nn.Module):

    def __init__(self, vocab_size, embedding_dim, context_size):
        super(NGramLanguageModeler, self).__init__()
        #does not pass all input words with eachother. Each word goes through independantly
        #and the output are the embeddings of the word. We want this because we do not 
        #want to concacenate the embeddings to the output nodes.
        self.embeddings = nn.Embedding(vocab_size, embedding_dim) 
        #now takes in all embeddings of each word stretched out
        self.linear1 = nn.Linear(context_size * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)

    def forward(self, inputs):
        #embeds will be flattened matrix
        embeds = self.embeddings(inputs).view((1, -1))
        #rectified relu to learn embeddings
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1)
        #output is the log probablities of all vocabulary
        return log_probs

    


In [6]:
#file path to credit card csv file
file_path = os.path.join(os.path.expanduser("~"), "Downloads", "mental_health.csv")
dataset = pd.read_csv(file_path) #read csv file as pandas object
dataset = dataset.to_numpy()
dataset = dataset[0:len(dataset)//30] #use part of the dataset


minFreq = {} #word must appear n times to be added to dictionary
dictionary = {} #relevant words in the dicationary
index = 2
for example in range(len(dataset)):
    for cont_response in range(2):
        if type(dataset[example][cont_response]) == float: #NaN values
            continue
        for word in dataset[example][cont_response].split():
            if word not in minFreq:
                minFreq[word]=1
            else:
                if minFreq[word]==3: #word needs to appear
                    dictionary[word] = index
                    index+=1
                minFreq[word]+=1
                
CONTEXT_SIZE = 3 #look 3 words back to predict current word
EMBEDDING_DIM = 252 #total embeddings for each word
all_ngrams = [] #ngram setup -> [(['through', 'going', "I'm"], 'some')]
for example in range(len(dataset)): 
    for cont_response in range(2): #context than response
        if type(dataset[example][cont_response]) == float: #NaN values
            continue
        cur_Sentence = dataset[example][cont_response].split() #seperate by word
        ngrams = [ #[(['through', 'going', "I'm"], 'some')]
            ([cur_Sentence[i - j - 1] for j in range(CONTEXT_SIZE)],cur_Sentence[i])
            for i in range(CONTEXT_SIZE, len(cur_Sentence))
            ]
        #append the grams to all_ngrams
        for i in ngrams:
            all_ngrams.append(i)

loss_function = nn.NLLLoss() #loss layer
model = NGramLanguageModeler(index, EMBEDDING_DIM, CONTEXT_SIZE) #intialize Ngram model
optimizer = optim.Adam(model.parameters(), lr=0.0001) #use adam optimizer
for epoch in range(10):
    total_loss = 0
    total  = 0
    correct = 0
    print(epoch)
    for context, target in all_ngrams:
        #if unknown word, just don't train
        if context[0] not in dictionary or context[1] not in dictionary or context[2] not in dictionary:
                continue
        if target not in dictionary:
                continue
        #turn each word to an integer and wrapped in tensor so pass as an input to the model
        context_idxs = torch.tensor([dictionary[w] for w in context], dtype=torch.long)

        #zero out gradients cause it accumulates
        model.zero_grad()

        # Step 3. Run the forward pass, getting log probabilities over next
        # words
        log_probs = model(context_idxs)
        
        #the predicted class will be the max log probability
        predicted_class = torch.max(log_probs, dim= 1)
        predicted_index = int(predicted_class[1])  #convert to int
        #accumulate correct predictions
        if predicted_index == dictionary[target]:
            correct+=1
            
        

        #apply the loss function to the log probabilties with the correct target word
        loss = loss_function(log_probs, torch.tensor([dictionary[target]], dtype=torch.long))

        # Step 5. Do the backward pass and update the gradient
        loss.backward()
        optimizer.step()
        # Get the Python number from a 1-element Tensor by calling tensor.item()
        total_loss += loss.item()
        total+=1
    print(f"Accuracy: {correct/total}")
    print(f"Total_Loss: {total_loss}")

    
test = dataset[(len(dataset)//30)+1:(len(dataset)//30)+ 10000]
test_grams = []
for example in range(len(test)):
    for cont_response in range(2):
        if type(test[example][cont_response]) == float:
            continue
        cur_Sentence = test[example][cont_response].split()
        sngrams = [
            ([cur_Sentence[i - j - 1] for j in range(CONTEXT_SIZE)],cur_Sentence[i])
            for i in range(CONTEXT_SIZE, len(cur_Sentence))
            ]
        for i in sngrams:
            test_grams.append(i)
      
    
    
total = 0
correct = 0
for context, target in test_grams:
    i+=1
    # Step 1. Prepare the inputs to be passed to the model (i.e, turn the words
    # into integer indices and wrap them in tensors)
    if context[0] not in dictionary or context[1] not in dictionary or context[2] not in dictionary:
            continue
    if target not in dictionary:
            continue

    context_idxs = torch.tensor([dictionary[w] for w in context], dtype=torch.long)


    # Step 3. Run the forward pass, getting log probabilities over next
    # words
    log_probs = model(context_idxs)
    predicted_classes = torch.max(log_probs, dim= 1)
    predicted_index = int(predicted_classes[1]) 
    if predicted_index == dictionary[target]:
        correct+=1



    # Step 4. Compute your loss function. (Again, Torch wants the target
    # word wrapped in a tensor)
    loss = loss_function(log_probs, torch.tensor([dictionary[target]], dtype=torch.long))
    # Get the Python number from a 1-element Tensor by calling tensor.item()
    total_loss += loss.item()
    total+=1
print(f"Accuracy: {correct/total}")
print(f"Total_Loss: {total_loss}")




22107
0
tensor([52, 48, 15])
tensor([[ 7.4414e-01,  1.9966e-01, -1.4527e+00,  8.4134e-01, -7.4846e-01,
         -7.8566e-01, -3.9399e-01, -2.3985e-01,  1.2674e+00,  7.2022e-01,
         -4.9126e-02,  1.8048e+00, -1.3585e-01, -8.3153e-01, -2.5309e-01,
          2.5834e+00,  2.9872e-01, -1.3039e+00,  1.2968e-01,  8.2620e-01,
         -4.2135e-01,  1.5511e+00, -7.2142e-01, -6.2104e-01, -9.4118e-01,
          1.1324e+00,  1.6325e+00,  1.2489e+00,  1.4183e+00, -2.1409e+00,
          1.1068e+00,  2.0854e-01, -1.3033e+00,  3.6316e-01,  8.5487e-01,
         -7.4163e-01, -7.6465e-01, -4.1204e-01,  1.2499e-01,  1.9339e+00,
         -2.0929e-01, -9.9339e-01,  7.3446e-02,  1.2776e+00, -2.0305e+00,
         -4.1967e-01, -8.2650e-01, -6.8663e-01,  1.8267e-01,  8.0684e-01,
          7.2712e-01,  4.9817e-01,  1.1443e+00, -5.0324e-04, -1.0744e+00,
          4.8676e-01, -2.8906e-01,  2.8544e+00,  1.0016e+00, -6.8231e-01,
         -1.3146e+00, -1.0661e+00,  1.3817e+00,  1.3175e+00, -1.1066e-01,
         

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x252 and 756x128)