In [85]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from transformers import AutoTokenizer, DistilBertModel
import numpy as np

In [86]:
# create a simple torch model with 1 fully connected layer

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(768, 2)

    def forward(self, x):
        # apply relu on the output of the fully connected layer
        x = F.relu(self.fc1(x))
        return x

In [87]:
words = [
    "apple", "book", "car", "dog", "cat", "house", "tree", "friend", "time", "money",
    "heart", "sun", "moon", "sky", "water", "fire", "earth", "flower", "city", "music",
    "child", "parent", "school", "job", "love", "smile", "day", "night", "star", "cloud",
    "bird", "fish", "food", "computer", "phone", "internet", "coffee", "tea", "shoes", "hat",
    "dream", "goal", "team", "game", "hope", "fear", "joy", "anger", "peace", "war",
    "friendship", "family", "health", "beauty", "knowledge", "power", "nature", "history", "science",
    "art", "happiness", "sadness", "color", "mind", "body", "soul", "memory", "experience", "idea",
    "faith", "truth", "lie", "problem", "solution", "question", "answer", "light", "darkness", "wind",
    "rain", "snow", "smell", "taste", "touch", "sound", "silence", "joy", "freedom", "future",
    "past", "present", "purpose", "journey", "adventure", "discovery", "challenge", "victory", "defeat"
]

most_common_words = []

# remove duplicates
words = list(set(words))
# most_common_words = list(set(most_common_words))

print(len(words))
print(len(most_common_words))

assert len(list(set(words + most_common_words))) == len(words) + len(most_common_words)

97
0


In [88]:
non_words = [
    "run", "jump", "eat", "sleep", "think", "happy", "fast", "slow", "beautiful", "smart",
    "quickly", "easily", "always", "never", "under", "over", "beside", "between", "through", "with",
    "and", "but", "or", "because", "although", "well", "yes", "no", "oh", "wow", "ouch",
    "go", "come", "stop", "start", "win", "lose", "hot", "cold", "loud", "soft",
    "bright", "dark", "high", "low", "near", "far", "big", "small", "old", "new",
    "first", "last", "next", "previous", "good", "bad", "happy", "sad", "rich", "poor",
    "early", "late", "hard", "easy", "simple", "complex", "right", "wrong", "true", "false",
    "up", "down", "in", "out", "on", "off", "up", "down", "here", "there",
    "now", "then", "today", "tomorrow", "yesterday", "soon", "later", "before", "after", "while",
    "once", "twice", "thrice", "nevertheless", "however", "suddenly", "finally", "quickly", "slowly", "nowadays",
    "some", "many", "few", "most", "none", "all", "each", "every", "somebody", "nobody",
    "something", "nothing", "somewhere", "nowhere", "anywhere", "everywhere", "this", "that", "these", "those",
    "which", "what", "who", "whom", "whose", "where", "when", "why", "how", "whether",
    "if", "unless", "because", "since", "although", "though", "while", "before", "after", "when",
    "and", "or", "but", "nor", "for", "so", "yet", "either", "neither", "however"
]

# remove duplicates
non_words = list(set(non_words))

print(len(non_words))


137


In [89]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertModel.from_pretrained("distilbert-base-uncased")

In [90]:
# create a function that takes a list of words and returns a list of embeddings
def get_embeddings(words):
    embeddings = []

    for word in words:
        input_ids = torch.tensor(tokenizer.encode(word)).unsqueeze(0)
        outputs = model(input_ids)
        last_hidden_states = outputs[0]
        embeddings.append(torch.mean(last_hidden_states[0][1:-1], dim=0).tolist())
    
    assert len(embeddings) == len(words)
    return embeddings

embeddings = get_embeddings(words)
most_common_embeddings = get_embeddings(most_common_words)
non_embeddings = get_embeddings(non_words)

In [91]:
# slice the lists into training and test sets
words_train = words[:int(len(words)*0.8)]
words_test = words[int(len(words)*0.8):]
embeddings_train = embeddings[:int(len(embeddings)*0.8)]
embeddings_test = embeddings[int(len(embeddings)*0.8):]

most_common_words_train = most_common_words[:int(len(most_common_words)*0.8)]
most_common_words_test = most_common_words[int(len(most_common_words)*0.8):]
most_common_embeddings_train = most_common_embeddings[:int(len(most_common_embeddings)*0.8)]
most_common_embeddings_test = most_common_embeddings[int(len(most_common_embeddings)*0.8):]

non_words_train = non_words[:int(len(non_words)*0.8)]
non_words_test = non_words[int(len(non_words)*0.8):]
non_embeddings_train = non_embeddings[:int(len(non_embeddings)*0.8)]
non_embeddings_test = non_embeddings[int(len(non_embeddings)*0.8):]

# create a dataframe with the training and test sets
df_train = pd.DataFrame({
    'word': words_train + most_common_words_train + non_words_train,
    'embedding': embeddings_train + most_common_embeddings_train + non_embeddings_train,
    'label': [1]*len(words_train) + [1]*len(most_common_words_train) + [0]*len(non_words_train)
})

df_test = pd.DataFrame({
    'word': words_test + most_common_words_test + non_words_test,
    'embedding': embeddings_test + most_common_embeddings_test + non_embeddings_test,
    'label': [1]*len(words_test) + [1]*len(most_common_words_test) + [0]*len(non_words_test)
})

# shuffle the dataframes
# df_train = df_train.sample(frac=1).reset_index(drop=True)
# df_test = df_test.sample(frac=1).reset_index(drop=True)

print(df_train[:5])
print(df_train[-5:])
print(df_test[:5])
print(df_test[-5:])

    word                                          embedding  label
0    job  [0.14350789785385132, -0.15940922498703003, -0...      1
1   body  [0.7147403359413147, -0.1555342972278595, 0.31...      1
2  faith  [0.6398938298225403, 0.13858512043952942, -0.1...      1
3  earth  [0.26693275570869446, 0.4444328844547272, -0.1...      1
4  water  [0.2036658525466919, 0.2685178220272064, -0.28...      1
          word                                          embedding  label
181   somebody  [-0.007474187761545181, -0.06840874254703522, ...      0
182         in  [-0.8657609224319458, -0.5856645703315735, -0....      0
183         on  [-0.11932921409606934, -0.3286181688308716, -0...      0
184      later  [-0.2563585937023163, -0.453279972076416, -0.1...      0
185  something  [-0.15402115881443024, -0.17753523588180542, -...      0
     word                                          embedding  label
0    mind  [0.38985970616340637, 0.27778860926628113, -0....      1
1   smile  [-0.117154628

In [92]:
class CustomDataset(Dataset):
    def __init__(self, tokenized_texts, labels):
        self.tokenized_texts = torch.tensor(tokenized_texts)
        self.labels = torch.tensor(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.tokenized_texts[idx], self.labels[idx]

In [93]:
train_dataset = CustomDataset(df_train['embedding'], df_train['label'])
test_dataset = CustomDataset(df_test['embedding'], df_test['label'])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [118]:
# train the model
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.003)

num_epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net.to(device)

for epoch in range(num_epochs):
    net.train()
    combined_loss = 0
    for batch in train_loader:
        # get data to GPU if possible
        data = batch[0].to(device=device)
        targets = batch[1].to(device=device)

        # forward
        scores = net(data)
        loss = criterion(scores, targets)
        combined_loss += loss.item()

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()
    
    # print initial loss
    if epoch == 0:
        print(f'Initial loss: {combined_loss/len(train_loader)}')
        
    # print average loss per epoch every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}, loss={combined_loss/len(train_loader)}')
        # print(combined_loss / len(train_loader))



Initial loss: 0.4711984694004059
Epoch 10, loss=0.04769730878372987
Epoch 20, loss=0.01954430671563993
Epoch 30, loss=0.012031516913945476
Epoch 40, loss=0.0076595131734696524
Epoch 50, loss=0.0050835938794383155
Epoch 60, loss=0.003941135480999947
Epoch 70, loss=0.0030330109681623676
Epoch 80, loss=0.0024938517793392143
Epoch 90, loss=0.002030371077125892
Epoch 100, loss=0.0016996313740188878


A lot of variance over different runs
Best so far: 0.001

In [119]:
# evaluate the model
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for batch in loader:
            # get data to GPU if possible
            data = batch[0].to(device=device)
            targets = batch[1].to(device=device)

            # forward
            scores = model(data)
            _, predictions = scores.max(1)
            num_correct += (predictions == targets).sum()
            num_samples += predictions.size(0)
    
    return num_correct / num_samples

print(f'Accuracy on training set: {check_accuracy(train_loader, net)*100:.2f}%')
print(f'Accuracy on test set: {check_accuracy(test_loader, net)*100:.2f}%')

Accuracy on training set: 100.00%
Accuracy on test set: 100.00%


In [120]:
# Find the most important features of the model
# get the gradients of the model
net.eval()
gradients = []
for batch in train_loader:
    # get data to GPU if possible
    data = batch[0].to(device=device)
    targets = batch[1].to(device=device)

    # forward
    scores = net(data)
    loss = criterion(scores, targets)
    loss.backward()
    gradients.append(net.fc1.weight.grad.detach().cpu().numpy())

# average the gradients
gradients = np.mean(gradients, axis=0)
print(gradients.shape)

# print(len(gradients))
# print(len(gradients[0]))
# print(len(gradients[0][0]))

# find the indices of the 10 most important features
most_important_gradients_0 = gradients[0].argsort()[-10:][::-1]
most_important_gradients_1 = gradients[1].argsort()[-10:][::-1]

# print the 10 most important features
print(most_important_gradients_0.shape)
print(most_important_gradients_0)
print(gradients[0][most_important_gradients_0])

print(most_important_gradients_1.shape)
print(most_important_gradients_1)
print(gradients[1][most_important_gradients_1])

(2, 768)
(10,)
[308 289 522 282 163   4  41  34 314 313]
[0.00462142 0.00220568 0.00209771 0.00206503 0.00171991 0.00157391
 0.00152331 0.00146067 0.0013911  0.00134368]
(10,)
[191 161 733 158 601 576 170 217 558 626]
[0.00191559 0.00170402 0.00164448 0.00149729 0.0014471  0.00139613
 0.00137255 0.00134782 0.00133149 0.00127643]


In [121]:
combined_importance_grad = list(set(list(most_important_gradients_0) + list(most_important_gradients_1)))
print(combined_importance_grad)
print(len(combined_importance_grad))

[4, 522, 282, 158, 289, 34, 163, 161, 41, 170, 558, 308, 313, 314, 191, 576, 217, 601, 733, 626]
20


In [122]:
# Find the most important features for the model
# get the weights of the fully connected layer
weights = net.fc1.weight.data.cpu().numpy()

# find the indices of the 10 most important features
most_important_features_0 = weights[0].argsort()[-10:][::-1]
most_important_features_1 = weights[1].argsort()[-10:][::-1]

# print the 10 most important features and their weights
print(most_important_features_0.shape)
print(most_important_features_0)
print(weights[0][most_important_features_0])

print(most_important_features_1.shape)
print(most_important_features_1)
print(weights[1][most_important_features_1])

(10,)
[158 558 343 446 705 486 628 582 379 672]
[0.36490867 0.34004512 0.33436078 0.3275797  0.32661593 0.32173577
 0.32137144 0.31152323 0.3093089  0.30298623]
(10,)
[289 489 442   5 715 282 603 630 610 565]
[0.36971068 0.36326203 0.33986703 0.33446872 0.33414614 0.32822368
 0.32525712 0.32286963 0.32209966 0.32026997]


In [123]:
combined_importance = list(set(list(most_important_features_0) + list(most_important_features_1)))
print(combined_importance)
print(len(combined_importance))

[5, 282, 158, 672, 289, 558, 565, 442, 446, 705, 582, 715, 343, 603, 610, 486, 489, 628, 630, 379]
20


In [124]:
class Net_small(nn.Module):
    def __init__(self):
        super(Net_small, self).__init__()
        self.fc1 = nn.Linear(20, 2)

    def forward(self, x):
        # apply relu on the output of the fully connected layer
        x = F.relu(self.fc1(x))
        return x

In [125]:
# create datasets with the 20 most important features
df_train_small = pd.DataFrame({
    'word': df_train['word'],
    'embedding': df_train['embedding'].apply(lambda x: [x[i] for i in combined_importance]),
    'label': df_train['label']
})

df_test_small = pd.DataFrame({
    'word': df_test['word'],
    'embedding': df_test['embedding'].apply(lambda x: [x[i] for i in combined_importance]),
    'label': df_test['label']
})

print(df_train_small[:5])
print(df_train_small[-5:])
print(df_test_small[:5])
print(df_test_small[-5:])

print("\nLength of reduced embedding: ", len(df_train_small['embedding'].tolist()[0]))

    word                                          embedding  label
0    job  [0.16212625801563263, -0.16747981309890747, 0....      1
1   body  [0.3314345180988312, 0.19887007772922516, 0.26...      1
2  faith  [0.11599896103143692, -0.09121094644069672, -0...      1
3  earth  [0.33062461018562317, 0.118535615503788, -0.03...      1
4  water  [0.5099238753318787, -0.38860055804252625, 0.0...      1
          word                                          embedding  label
181   somebody  [0.23361219465732574, -0.30281540751457214, 0....      0
182         in  [-0.019756067544221878, -0.4305441677570343, 0...      0
183         on  [0.3970641493797302, -0.3543335199356079, -0.2...      0
184      later  [0.26222217082977295, 0.024935398250818253, 0....      0
185  something  [-0.2308577299118042, -0.48253750801086426, 0....      0
     word                                          embedding  label
0    mind  [0.2802393436431885, 0.11320915818214417, 0.18...      1
1   smile  [0.1179622039

In [126]:
# create datasets
train_dataset_small = CustomDataset(df_train_small['embedding'], df_train_small['label'])
test_dataset_small = CustomDataset(df_test_small['embedding'], df_test_small['label'])

# create dataloaders
train_loader_small = DataLoader(train_dataset_small, batch_size=16, shuffle=True)
test_loader_small = DataLoader(test_dataset_small, batch_size=16, shuffle=True)

In [127]:
# train the model
net2 = Net_small()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.003)

num_epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net2.to(device)

for epoch in range(num_epochs):
    net2.train()
    combined_loss = 0
    for batch in train_loader_small:
        # get data to GPU if possible
        data = batch[0].to(device=device)
        targets = batch[1].to(device=device)

        # forward
        scores = net2(data)
        loss = criterion(scores, targets)
        combined_loss += loss.item()

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()
    
    # print initial loss
    if epoch == 0:
        print(f'Initial loss: {combined_loss/len(train_loader_small)}')
        
    # print average loss per epoch every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}, loss={combined_loss/len(train_loader_small)}')
        # print(combined_loss / len(train_loader))


Initial loss: 0.6962780604759852
Epoch 10, loss=0.6959789196650187
Epoch 20, loss=0.6961021721363068
Epoch 30, loss=0.6960124870141348
Epoch 40, loss=0.6960191975037257
Epoch 50, loss=0.6960007150967916
Epoch 60, loss=0.696659043431282
Epoch 70, loss=0.6952183991670609
Epoch 80, loss=0.6960041423638662
Epoch 90, loss=0.695601205031077
Epoch 100, loss=0.6965534736712774


In [128]:
# create datasets with the 20 most important features by gradients
df_train_small_grad = pd.DataFrame({
    'word': df_train['word'],
    'embedding': df_train['embedding'].apply(lambda x: [x[i] for i in combined_importance_grad]),
    'label': df_train['label']
})

df_test_small_grad = pd.DataFrame({
    'word': df_test['word'],
    'embedding': df_test['embedding'].apply(lambda x: [x[i] for i in combined_importance_grad]),
    'label': df_test['label']
})

print(df_train_small_grad[:5])
print(df_train_small_grad[-5:])
print(df_test_small_grad[:5])
print(df_test_small_grad[-5:])

print("\nLength of reduced embedding: ", len(df_train_small_grad['embedding'].tolist()[0]))

    word                                          embedding  label
0    job  [0.34226033091545105, -0.08179537951946259, -0...      1
1   body  [0.10487144440412521, -0.5605854392051697, 0.1...      1
2  faith  [0.43051886558532715, -0.22967687249183655, -0...      1
3  earth  [0.6749990582466125, -0.48906248807907104, 0.1...      1
4  water  [0.5786231160163879, -0.42757871747016907, -0....      1
          word                                          embedding  label
181   somebody  [0.0014382358640432358, -0.2624276280403137, -...      0
182         in  [0.1521024852991104, -0.49227994680404663, -0....      0
183         on  [0.2419023960828781, -0.5570359230041504, -0.3...      0
184      later  [0.09961800277233124, -0.44795429706573486, 0....      0
185  something  [0.29623791575431824, -0.44595324993133545, -0...      0
     word                                          embedding  label
0    mind  [0.36257031559944153, -0.20241917669773102, 0....      1
1   smile  [0.5048218965

In [129]:
# create datasets
train_dataset_small_grad = CustomDataset(df_train_small_grad['embedding'], df_train_small_grad['label'])
test_dataset_small_grad = CustomDataset(df_test_small_grad['embedding'], df_test_small_grad['label'])

# create dataloaders
train_loader_small_grad = DataLoader(train_dataset_small_grad, batch_size=16, shuffle=True)
test_loader_small_grad = DataLoader(test_dataset_small_grad, batch_size=16, shuffle=True)

In [131]:
# train the model
net3 = Net_small()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.003)

num_epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net3.to(device)

for epoch in range(num_epochs):
    net3.train()
    combined_loss = 0
    for batch in train_loader_small_grad:
        # get data to GPU if possible
        data = batch[0].to(device=device)
        targets = batch[1].to(device=device)

        # forward
        scores = net3(data)
        loss = criterion(scores, targets)
        combined_loss += loss.item()

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()
    
    # print initial loss
    if epoch == 0:
        print(f'Initial loss: {combined_loss/len(train_loader_small_grad)}')
        
    # print average loss per epoch every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}, loss={combined_loss/len(train_loader_small_grad)}')
        # print(combined_loss / len(train_loader))


Initial loss: 0.6974899619817734
Epoch 10, loss=0.7004535098870596
Epoch 20, loss=0.6999430259068807
Epoch 30, loss=0.7004847377538681
Epoch 40, loss=0.6987025936444601
Epoch 50, loss=0.6984369705120722
Epoch 60, loss=0.6974718024333318
Epoch 70, loss=0.6973331769307455
Epoch 80, loss=0.698237473766009
Epoch 90, loss=0.6975740840037664
Epoch 100, loss=0.700429230928421
