# Text Classification using Logistic Regression


# Exercise:

1. Build a simple sentiment classifier with a new data given below, and predict test_data: utilize BoWClassifier2() defined in previous steps.


In [1]:
# ALGORITHM STEPS:

In [2]:
# CODE ANALYSIS:

In [3]:
%matplotlib inline

In [4]:
# Importing necessary libraries

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


In [5]:
#STEP 1: # Data Setup
training_data = [("Well done".split(), "postive"),
        ("Good work".split(), "postive"),
        ("Great effort".split(), "postive"),
        ("Weak".split(), "negative"),
        ("Poor effort".split(), "negative"),
        ("not good".split(), "negative")]

test_data = [("Great work".split(), "postive"),
             ("Good job".split(), "postive"),
             ("poor work".split(), "negative"),
             ("not great".split(), "negative")]

In [6]:
# STEP 2 & STEP3: # Clean up of Data to collect distinct elements into bag.
word_to_ix = {}
for sent, _ in training_data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

label_to_ix = {"negative": 0, "postive": 1}

{'Well': 0, 'done': 1, 'Good': 2, 'work': 3, 'Great': 4, 'effort': 5, 'Weak': 6, 'Poor': 7, 'not': 8, 'good': 9, 'job': 10, 'poor': 11, 'great': 12}


In [7]:
# STEP 4:
# In this Sentiment Analysis, we have 1 input of 2 classifiers are "positive" & "negative"; 
# One neuron and One Sigmoid Function as "Activation Method"

# Collect word size from bag
VOCAB_SIZE = len(word_to_ix)
OUTPUT_SIZE = 1 
class BoWClassifier2(nn.Module):  # inheriting from nn.Module!

    def __init__(self, output_size, vocab_size):
        
        #input stage
        super(BoWClassifier2, self).__init__() #If this super class isnt specified, then the classifier from nn.Module will be taken; To avoid that we are utilizing this one.
    
        # Neuron Stage; here its 1 bias
        self.linear = nn.Linear(vocab_size, output_size)  # output_size = 1 #Rectified Linear Unit(reLu is used)
        
        #Activation Stage; and then it results in output
    def forward(self, bow_vec):
        return torch.sigmoid(self.linear(bow_vec))  #Sigmoid

def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)    # return a matrix: 1 x len(vec)


def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])  # [0] or [1]

model = BoWClassifier2(OUTPUT_SIZE, VOCAB_SIZE)

for param in model.parameters():
    print(param)


Parameter containing:
tensor([[ 0.2209,  0.2453,  0.0927,  0.1888, -0.2073,  0.0321, -0.0741, -0.1782,
         -0.0025,  0.0384,  0.1747, -0.1245, -0.2363]], requires_grad=True)
Parameter containing:
tensor([0.0850], requires_grad=True)


In [8]:
# Sample Run on test data:
with torch.no_grad():
    for instance, label in test_data:   # Data Set: ["Great work", "Good job", "poor work", "not great"]
        bow_vec = make_bow_vector(instance, word_to_ix)
        output_probs = model(bow_vec)
        print(output_probs)             # tensor equivalent for each Data Set.

print("*******************")
print(next(model.parameters()))

tensor([[0.5166]])
tensor([[0.5872]])
tensor([[0.5373]])
tensor([[0.4616]])
*******************
Parameter containing:
tensor([[ 0.2209,  0.2453,  0.0927,  0.1888, -0.2073,  0.0321, -0.0741, -0.1782,
         -0.0025,  0.0384,  0.1747, -0.1245, -0.2363]], requires_grad=True)


In [9]:
# STEP5 :CALCULATE THE LOSS FUNCTION
loss_function = nn.BCELoss() 
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Pass over the training Set; Can assume to run epoch 30-50 times till the curve is flat; here its ran for 100 times
for epoch in range(100):
    for instance, label in training_data:
        model.zero_grad() 

        bow_vec = make_bow_vector(instance, word_to_ix)    #Preparing the bow vector
        target = make_target(label, label_to_ix).float().view(1,-1) 
        
        output_probs = model(bow_vec)
        
        #Computing the loss and gradients
        loss = loss_function(output_probs, target)
        loss.backward()
        optimizer.step()

with torch.no_grad():
    for instance, label in test_data:  # Test_Data Set
        bow_vec = make_bow_vector(instance, word_to_ix)
        output_probs = model(bow_vec)
        print("***********************")
        print(output_probs)

print(next(model.parameters()))

***********************
tensor([[0.9688]])
***********************
tensor([[0.8000]])
***********************
tensor([[0.7655]])
***********************
tensor([[0.1280]])
Parameter containing:
tensor([[ 1.5585,  1.5829,  1.5076,  1.6036,  2.1288,  0.2876, -2.0790, -2.2589,
         -1.3865, -1.3456,  0.1747, -0.1245, -0.2363]], requires_grad=True)


## SUMMARY 
1. Workflow Steps: Input-->Linear Layer --> Sigmoid Layer --> Output
2. Loss Calculation Methodolgy: BCE Function
3. Optimizer Used: SGD
4. Learning Rate : 0.1
5. Epoc: 100

# INFERENCES
1. Positive sentence's probability is higher 1st and 2nd Test_Data Sentences.
2. Negative sentence's probability is lower than +ve sentences in 3rd and 4th Test_Data Sentences.
3. A peculiar inference is the third data ("("poor work".split(), "negative")") has the word "poor" in negative label and "work" under positive label.And hence the classifier has predicted it in middle level but as still negative with a little higher probability.
4. Since the "data_Set" is small, loss function keeps little deviating and sometimes "100" epoch makes system over-trained as well.
