In [1]:
import torch
import torch.nn as nn
import torch.optim as optim


In [2]:
train_data = {
  'good': True,
  'bad': False,
  'happy': True,
  'sad': False,
  'not good': False,
  'not bad': True,
  'not happy': False,
  'not sad': True,
  'very good': True,
  'very bad': False,
  'very happy': True,
  'very sad': False,
  'i am happy': True,
  'this is good': True,
  'i am bad': False,
  'this is bad': False,
  'i am sad': False,
  'this is sad': False,
  'i am not happy': False,
  'this is not good': False,
  'i am not bad': True,
  'this is not sad': True,
  'i am very happy': True,
  'this is very good': True,
  'i am very bad': False,
  'this is very sad': False,
  'this is very happy': True,
  'i am good not bad': True,
  'this is good not bad': True,
  'i am bad not good': False,
  'i am good and happy': True,
  'this is not good and not happy': False,
  'i am not at all good': False,
  'i am not at all bad': True,
  'i am not at all happy': False,
  'this is not at all sad': True,
  'this is not at all happy': False,
  'i am good right now': True,
  'i am bad right now': False,
  'this is bad right now': False,
  'i am sad right now': False,
  'i was good earlier': True,
  'i was happy earlier': True,
  'i was bad earlier': False,
  'i was sad earlier': False,
  'i am very bad right now': False,
  'this is very good right now': True,
  'this is very sad right now': False,
  'this was bad earlier': False,
  'this was very good earlier': True,
  'this was very bad earlier': False,
  'this was very happy earlier': True,
  'this was very sad earlier': False,
  'i was good and not bad earlier': True,
  'i was not good and not happy earlier': False,
  'i am not at all bad or sad right now': True,
  'i am not at all good or happy right now': False,
  'this was not happy and not good earlier': False,
}

test_data = {
  'this is happy': True,
  'i am good': True,
  'this is not happy': False,
  'i am not good': False,
  'this is not bad': True,
  'i am not sad': True,
  'i am very good': True,
  'this is very bad': False,
  'i am very sad': False,
  'this is bad not good': False,
  'this is good and happy': True,
  'i am not good and not happy': False,
  'i am not at all sad': True,
  'this is not at all good': False,
  'this is not at all bad': True,
  'this is good right now': True,
  'this is sad right now': False,
  'this is very bad right now': False,
  'this was good earlier': True,
  'i was not happy and not good earlier': False,
}

# NLP Problems
1. Variable Sequence Length
2. Order Matters
3. Short and Long Term Context

In [3]:
# Bag of Words
# finding number of unique words

unique_words = set()

for key in train_data.keys():
    unique_words.update(key.split())

unique_words = list(sorted(unique_words))
print(unique_words)

['all', 'am', 'and', 'at', 'bad', 'earlier', 'good', 'happy', 'i', 'is', 'not', 'now', 'or', 'right', 'sad', 'this', 'very', 'was']


In [21]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


In [22]:
# sentence to bag of words
X_train = []
for key in train_data.keys():
    vector = []
    for word in key.split():
        arr = [0] * len(unique_words)
        arr[unique_words.index(word)] = 1
        vector.append(arr)
    X_train.append(torch.tensor(vector, dtype=torch.float32).to(device))

for v, k in zip(X_train, train_data.keys()):
    print(v, k)



tensor([[0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='mps:0') good
tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='mps:0') bad
tensor([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='mps:0') happy
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]],
       device='mps:0') sad
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='mps:0') not good
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='mps:0') not bad
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

In [23]:
Y_train = list(map(int, train_data.values()))

print(Y_train)
print(len(unique_words))


[1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0]
18


In [47]:
class MyRNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = nn.RNN(input_size=len(unique_words), hidden_size=32)
        self.linear = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        output, hidden = self.rnn(x)
        output = self.linear(hidden[0])
        return self.sigmoid(output)


In [49]:
model = MyRNN().to(device)
print(model)

MyRNN(
  (rnn): RNN(18, 32)
  (linear): Linear(in_features=32, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [53]:
model(X_train[10])

tensor([0.4979], device='mps:0', grad_fn=<SigmoidBackward0>)

In [54]:
for i in range(len(X_train)):
    print(model(X_train[i]))

tensor([0.5683], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5289], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5219], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5584], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5506], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5113], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5041], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5412], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5435], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5039], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.4979], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5342], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5071], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5419], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5142], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5027], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.5439], device=

In [55]:
print(dict(model.named_parameters()))

{'rnn.weight_ih_l0': Parameter containing:
tensor([[-0.1715, -0.1054, -0.1058, -0.0337,  0.0430,  0.0450, -0.0096,  0.1071,
         -0.1603, -0.1078, -0.1514,  0.1611,  0.0770, -0.0964, -0.0674, -0.0350,
         -0.0077, -0.0600],
        [-0.1125,  0.0116,  0.0140,  0.1068, -0.1283,  0.0959, -0.0763,  0.0759,
          0.1181, -0.1241, -0.1637, -0.1700,  0.1236,  0.1152,  0.0301, -0.0056,
          0.1235,  0.1756],
        [ 0.1711, -0.0416,  0.0658,  0.1528, -0.0107, -0.0141,  0.1561, -0.0658,
         -0.0656,  0.0554,  0.0177,  0.0630,  0.1757, -0.0068, -0.0592,  0.1140,
          0.1341,  0.0321],
        [ 0.0134, -0.1585,  0.0614, -0.0827, -0.0710, -0.1670, -0.0824,  0.0687,
          0.0912,  0.1024, -0.0290, -0.0316,  0.0030,  0.0923, -0.0267, -0.1631,
         -0.1540,  0.1518],
        [-0.0704,  0.1047,  0.0816,  0.1341,  0.0084,  0.0932, -0.1166,  0.0901,
         -0.0062, -0.0533,  0.1652,  0.1133, -0.0011,  0.1092,  0.1191, -0.0153,
          0.1301, -0.0919],
       

In [67]:
# loss function
loss_fn = nn.BCELoss()

# optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

def evaluate(model, loss_fn, X_train, Y_train):
    model.eval()
    total_loss = 0
    for i in range(len(X_train)):
        with torch.no_grad():
            prediction = model(X_train[i])
            loss = loss_fn(prediction, torch.tensor([Y_train[i]], dtype=torch.float32).to(device))
            total_loss += loss
    return total_loss / len(X_train)

# training loop
for epoch in range(5):
    model.zero_grad()
    for i in range(len(X_train)):
        x,y = X_train[i], Y_train[i]
        prediction = model(x)
        loss = loss_fn(prediction, torch.tensor([y], dtype=torch.float32).to(device))
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    if epoch % 100 == 0:
        print(f"Epoch {epoch} loss: {evaluate(model, loss_fn, X_train, Y_train)}")


Epoch 0 loss: 0.0029956370126456022


In [69]:
print(model(X_train[0]))
print(Y_train)

tensor([0.9997], device='mps:0', grad_fn=<SigmoidBackward0>)
[1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0]


In [37]:
results = []
for op in model(X_train):
    if op > 0.5:
        results.append(1)
    else:
        results.append(0)

print(results)
print(Y_train)

[1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0]
tensor([1., 0., 1., 0., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 0., 0., 0., 0.,
        0., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 1.,
        0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 1.,
        0., 1., 0., 0.], device='mps:0')


In [39]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.1-cp312-cp312-macosx_14_0_arm64.whl.metadata (61 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl (11.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hUsing cached joblib-1.4.2-py3-none-any.whl (301 kB)
Downloading scipy-1.15.1-cp312-cp312-macosx_14_0_arm64.whl (24.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.9/24.9 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
I

In [41]:
print(results)
print(Y_train)

[1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0]
tensor([1., 0., 1., 0., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 0., 0., 0., 0.,
        0., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 1.,
        0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 1.,
        0., 1., 0., 0.], device='mps:0')


In [42]:
from sklearn.metrics import accuracy_score
print(accuracy_score(results, Y_train.tolist()))

0.9827586206896551


In [43]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(results, Y_train.tolist()))

[[31  0]
 [ 1 26]]


In [44]:
from sklearn.metrics import classification_report
print(classification_report(results, Y_train.tolist()))

              precision    recall  f1-score   support

           0       0.97      1.00      0.98        31
           1       1.00      0.96      0.98        27

    accuracy                           0.98        58
   macro avg       0.98      0.98      0.98        58
weighted avg       0.98      0.98      0.98        58



In [45]:
print(unique_words)

['all', 'am', 'and', 'at', 'bad', 'earlier', 'good', 'happy', 'i', 'is', 'not', 'now', 'or', 'right', 'sad', 'this', 'very', 'was']


In [71]:
new_ip_good = "i am good not bad at all"
new_ip_bad = "i am bad not good at all"


# sentence to bag of words
X_train_new = []
for key in [new_ip_good, new_ip_bad]:
    vector = []
    for word in key.split():
        arr = [0] * len(unique_words)
        arr[unique_words.index(word)] = 1
        vector.append(arr)
    X_train_new.append(torch.tensor(vector, dtype=torch.float32).to(device))

for v, k in zip(X_train_new, [new_ip_good, new_ip_bad]):
    print(v, k)



tensor([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='mps:0') i am good not bad at all
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 

In [72]:
print(model(torch.tensor(X_train_new[0], dtype=torch.float32).to(device)))
print(model(torch.tensor(X_train_new[1], dtype=torch.float32).to(device)))

tensor([0.9730], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([0.0003], device='mps:0', grad_fn=<SigmoidBackward0>)


  print(model(torch.tensor(X_train_new[0], dtype=torch.float32).to(device)))
  print(model(torch.tensor(X_train_new[1], dtype=torch.float32).to(device)))
