In [135]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from transformers import AutoTokenizer, DistilBertModel
import numpy as np

In [86]:
# create a simple torch model with 1 fully connected layer

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(768, 2)

    def forward(self, x):
        # apply relu on the output of the fully connected layer
        x = F.relu(self.fc1(x))
        return x

In [87]:
color_words = [
    'aqua', 'aquamarine', 'azure', 'beige', 'bisque',
    'chartreuse', 'chocolate', 'coral', 'crimson', 'cyan', 'firebrick', 'fuchsia',
    'gold', 'gray', 'indigo', 'ivory', 'khaki', 'lavender', 'lime', 'magenta',
    'maroon', 'navy', 'olive', 'orchid', 'plum', 
    'salmon', 'sienna', 'silver', 'tan', 'teal', 'tomato', 'turquoise', 
    'wheat', 'sienna', 'ochre', 'umber', 'sepia', 'vermillion',
    'carmine', 'cerulean', 'auburn', 'viridian', 'ultramarine', 'emerald'
]

most_common_color_words = [
    'red', 'green', 'blue', 'yellow', 'orange', 'purple', 'brown', 'pink', 'violet', 'white', 'black'
]

# remove duplicates
color_words = list(set(color_words))
most_common_color_words = list(set(most_common_color_words))

print(len(color_words))
print(len(most_common_color_words))

assert len(list(set(color_words + most_common_color_words))) == len(color_words) + len(most_common_color_words)

43
11


In [88]:
non_color_words = [
    "dog", "cat", "house", "car", "book", "computer", "table", "chair", "tree", "river",
    "song", "movie", "friend", "family", "phone", "city", "food", "drink", "shoe", "hat",
    "jacket", "pen", "paper", "cloud", "sun", "moon", "star", "road", "bridge", "key",
    "lock", "door", "window", "mirror", "clock", "lamp", "flower", "bird", "fish", "ship",
    "plane", "train", "bus", "child", "adult", "student", "teacher", "doctor", "engineer",
    "artist", "writer", "singer", "actor", "politician", "lawyer", "doctor", "patient",
    "dream", "memory", "idea", "emotion", "love", "hate", "fear", "joy", "anger", "hope",
    "doubt", "peace", "war", "freedom", "justice", "truth", "lie", "friendship", "loneliness",
    "success", "failure", "wealth", "poverty", "nature", "technology", "culture", "history",
    "science", "religion", "politics", "economy", "society", "language", "knowledge", "wisdom"
]

# remove duplicates
non_color_words = list(set(non_color_words))

print(len(non_color_words))


91


In [89]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertModel.from_pretrained("distilbert-base-uncased")

In [90]:
# create a function that takes a list of words and returns a list of embeddings
def get_embeddings(words):
    embeddings = []

    for word in words:
        input_ids = torch.tensor(tokenizer.encode(word)).unsqueeze(0)
        outputs = model(input_ids)
        last_hidden_states = outputs[0]
        embeddings.append(torch.mean(last_hidden_states[0][1:-1], dim=0).tolist())
    
    assert len(embeddings) == len(words)
    return embeddings

color_embeddings = get_embeddings(color_words)
most_common_color_embeddings = get_embeddings(most_common_color_words)
non_color_embeddings = get_embeddings(non_color_words)

In [91]:
# slice the lists into training and test sets
color_words_train = color_words[:int(len(color_words)*0.8)]
color_words_test = color_words[int(len(color_words)*0.8):]
color_embeddings_train = color_embeddings[:int(len(color_embeddings)*0.8)]
color_embeddings_test = color_embeddings[int(len(color_embeddings)*0.8):]

most_common_color_words_train = most_common_color_words[:int(len(most_common_color_words)*0.8)]
most_common_color_words_test = most_common_color_words[int(len(most_common_color_words)*0.8):]
most_common_color_embeddings_train = most_common_color_embeddings[:int(len(most_common_color_embeddings)*0.8)]
most_common_color_embeddings_test = most_common_color_embeddings[int(len(most_common_color_embeddings)*0.8):]

non_color_words_train = non_color_words[:int(len(non_color_words)*0.8)]
non_color_words_test = non_color_words[int(len(non_color_words)*0.8):]
non_color_embeddings_train = non_color_embeddings[:int(len(non_color_embeddings)*0.8)]
non_color_embeddings_test = non_color_embeddings[int(len(non_color_embeddings)*0.8):]

# create a dataframe with the training and test sets
df_train = pd.DataFrame({
    'word': color_words_train + most_common_color_words_train + non_color_words_train,
    'embedding': color_embeddings_train + most_common_color_embeddings_train + non_color_embeddings_train,
    'label': [1]*len(color_words_train) + [1]*len(most_common_color_words_train) + [0]*len(non_color_words_train)
})

df_test = pd.DataFrame({
    'word': color_words_test + most_common_color_words_test + non_color_words_test,
    'embedding': color_embeddings_test + most_common_color_embeddings_test + non_color_embeddings_test,
    'label': [1]*len(color_words_test) + [1]*len(most_common_color_words_test) + [0]*len(non_color_words_test)
})

# shuffle the dataframes
# df_train = df_train.sample(frac=1).reset_index(drop=True)
# df_test = df_test.sample(frac=1).reset_index(drop=True)

print(df_train[:5])
print(df_train[-5:])
print(df_test[:5])
print(df_test[-5:])

          word                                          embedding  label
0  ultramarine  [0.24822497367858887, -0.5821539759635925, -0....      1
1        ivory  [0.16264645755290985, 0.28781405091285706, -0....      1
2       sienna  [0.31997525691986084, -0.5059263706207275, 0.0...      1
3          tan  [-0.0007681101560592651, -0.07876596599817276,...      1
4        olive  [-0.16430577635765076, 0.23664475977420807, -0...      1
         word                                          embedding  label
109   science  [-0.12869563698768616, 0.1851356029510498, -0....      0
110       car  [0.3777954578399658, -0.013787495903670788, -0...      0
111   emotion  [-0.494671493768692, 0.11225312203168869, -0.1...      0
112     actor  [-0.1514865756034851, 0.24995048344135284, -0....      0
113  religion  [0.2626156508922577, 0.5329587459564209, -0.28...      0
         word                                          embedding  label
0  vermillion  [-0.2022084891796112, -0.6616164445877075, 

In [92]:
class CustomDataset(Dataset):
    def __init__(self, tokenized_texts, labels):
        self.tokenized_texts = torch.tensor(tokenized_texts)
        self.labels = torch.tensor(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.tokenized_texts[idx], self.labels[idx]

In [93]:
train_dataset = CustomDataset(df_train['embedding'], df_train['label'])
test_dataset = CustomDataset(df_test['embedding'], df_test['label'])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [98]:
# train the model
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.003)

num_epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net.to(device)

for epoch in range(num_epochs):
    net.train()
    combined_loss = 0
    for batch in train_loader:
        # get data to GPU if possible
        data = batch[0].to(device=device)
        targets = batch[1].to(device=device)

        # forward
        scores = net(data)
        loss = criterion(scores, targets)
        combined_loss += loss.item()

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()
    
    # print initial loss
    if epoch == 0:
        print(f'Initial loss: {combined_loss/len(train_loader)}')
        
    # print average loss per epoch every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}, loss={combined_loss/len(train_loader)}')
        # print(combined_loss / len(train_loader))



Initial loss: 0.5581544563174248
Epoch 10, loss=0.028347435989417136
Epoch 20, loss=0.010821076430147514
Epoch 30, loss=0.006283213515416719
Epoch 40, loss=0.004778077709488571
Epoch 50, loss=0.00310863385675475
Epoch 60, loss=0.0026184962480328977
Epoch 70, loss=0.0020052643812960014
Epoch 80, loss=0.0015348755623563193
Epoch 90, loss=0.001361042304779403
Epoch 100, loss=0.0011844615437439643


A lot of variance over different runs
Best so far: 0.001

In [143]:
# evaluate the model
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for batch in loader:
            # get data to GPU if possible
            data = batch[0].to(device=device)
            targets = batch[1].to(device=device)

            # forward
            scores = model(data)
            _, predictions = scores.max(1)
            num_correct += (predictions == targets).sum()
            num_samples += predictions.size(0)
    
    return num_correct / num_samples

print(f'Accuracy on training set: {check_accuracy(train_loader, net)*100:.2f}%')
print(f'Accuracy on test set: {check_accuracy(test_loader, net)*100:.2f}%')

Accuracy on training set: 100.00%
Accuracy on test set: 100.00%


In [149]:
# Find the most important features of the model
# get the gradients of the model
net.eval()
gradients = []
for batch in train_loader:
    # get data to GPU if possible
    data = batch[0].to(device=device)
    targets = batch[1].to(device=device)

    # forward
    scores = net(data)
    loss = criterion(scores, targets)
    loss.backward()
    gradients.append(net.fc1.weight.grad.detach().cpu().numpy())

# average the gradients
gradients = np.mean(gradients, axis=0)
print(gradients.shape)

# print(len(gradients))
# print(len(gradients[0]))
# print(len(gradients[0][0]))

# find the indices of the 10 most important features
most_important_gradients_0 = gradients[0].argsort()[-10:][::-1]
most_important_gradients_1 = gradients[1].argsort()[-10:][::-1]

# print the 10 most important features
print(most_important_gradients_0.shape)
print(most_important_gradients_0)
print(gradients[0][most_important_gradients_0])

print(most_important_gradients_1.shape)
print(most_important_gradients_1)
print(gradients[1][most_important_gradients_1])

(2, 768)
(10,)
[308 733 727 752 372 381 251 666 191 414]
[0.1024159  0.03050245 0.02732667 0.02579284 0.0254136  0.02382957
 0.02367691 0.02188338 0.02164853 0.02030942]
(10,)
[473 163 722 314 472 446  40 590 284 135]
[0.01946797 0.01939146 0.01858333 0.01790875 0.01757148 0.0161675
 0.01464333 0.01424586 0.01409603 0.01402995]


In [150]:
combined_importance_grad = list(set(list(most_important_gradients_0) + list(most_important_gradients_1)))
print(combined_importance_grad)
print(len(combined_importance_grad))

[135, 666, 284, 414, 163, 40, 308, 314, 446, 191, 590, 722, 727, 472, 473, 733, 752, 372, 251, 381]
20


In [140]:
# Find the most important features for the model
# get the weights of the fully connected layer
weights = net.fc1.weight.data.cpu().numpy()

# find the indices of the 10 most important features
most_important_features_0 = weights[0].argsort()[-10:][::-1]
most_important_features_1 = weights[1].argsort()[-10:][::-1]

# print the 10 most important features and their weights
print(most_important_features_0.shape)
print(most_important_features_0)
print(weights[0][most_important_features_0])

print(most_important_features_1.shape)
print(most_important_features_1)
print(weights[1][most_important_features_1])

(10,)
[696 590 472 360 650  47 100  37 663 314]
[0.22362967 0.20893602 0.20430925 0.20341647 0.19949287 0.19481428
 0.19434766 0.19377029 0.19191854 0.19186114]
(10,)
[511 611 267 719 271  14  82 626 238  10]
[0.31380597 0.29059002 0.25619408 0.2505709  0.2479416  0.24274242
 0.23808081 0.23435041 0.2332934  0.2308459 ]


In [113]:
combined_importance = list(set(list(most_important_features_0) + list(most_important_features_1)))
print(combined_importance)
print(len(combined_importance))

[650, 267, 10, 14, 271, 663, 37, 47, 696, 314, 590, 719, 82, 472, 611, 100, 360, 238, 626, 511]
20


In [120]:
class Net_small(nn.Module):
    def __init__(self):
        super(Net_small, self).__init__()
        self.fc1 = nn.Linear(20, 2)

    def forward(self, x):
        # apply relu on the output of the fully connected layer
        x = F.relu(self.fc1(x))
        return x

In [121]:
# create datasets with the 20 most important features
df_train_small = pd.DataFrame({
    'word': df_train['word'],
    'embedding': df_train['embedding'].apply(lambda x: [x[i] for i in combined_importance]),
    'label': df_train['label']
})

df_test_small = pd.DataFrame({
    'word': df_test['word'],
    'embedding': df_test['embedding'].apply(lambda x: [x[i] for i in combined_importance]),
    'label': df_test['label']
})

print(df_train_small[:5])
print(df_train_small[-5:])
print(df_test_small[:5])
print(df_test_small[-5:])

print("\nLength of reduced embedding: ", len(df_train_small['embedding'].tolist()[0]))

          word                                          embedding  label
0  ultramarine  [0.3691755533218384, 0.12182317674160004, -0.1...      1
1        ivory  [-0.8768669366836548, 0.01354244165122509, 0.0...      1
2       sienna  [-0.4798089563846588, -0.05054834485054016, 0....      1
3          tan  [-0.07974042743444443, 0.3735435903072357, -0....      1
4        olive  [0.117793507874012, 0.19942378997802734, -0.02...      1
         word                                          embedding  label
109   science  [0.19104573130607605, -0.30567416548728943, -0...      0
110       car  [-0.04580724239349365, 0.048196613788604736, 0...      0
111   emotion  [0.3724481463432312, -0.12157141417264938, -0....      0
112     actor  [0.47917813062667847, -0.08011671900749207, 0....      0
113  religion  [0.05262870341539383, -0.08452796190977097, -0...      0
         word                                          embedding  label
0  vermillion  [0.5471291542053223, 0.05379761755466461, -

In [122]:
# create datasets
train_dataset_small = CustomDataset(df_train_small['embedding'], df_train_small['label'])
test_dataset_small = CustomDataset(df_test_small['embedding'], df_test_small['label'])

# create dataloaders
train_loader_small = DataLoader(train_dataset_small, batch_size=16, shuffle=True)
test_loader_small = DataLoader(test_dataset_small, batch_size=16, shuffle=True)

In [174]:
# train the model
net2 = Net_small()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.003)

num_epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net2.to(device)

for epoch in range(num_epochs):
    net2.train()
    combined_loss = 0
    for batch in train_loader_small:
        # get data to GPU if possible
        data = batch[0].to(device=device)
        targets = batch[1].to(device=device)

        # forward
        scores = net2(data)
        loss = criterion(scores, targets)
        combined_loss += loss.item()

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()
    
    # print initial loss
    if epoch == 0:
        print(f'Initial loss: {combined_loss/len(train_loader_small)}')
        
    # print average loss per epoch every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}, loss={combined_loss/len(train_loader_small)}')
        # print(combined_loss / len(train_loader))


Initial loss: 0.6927627995610237
Epoch 10, loss=0.6904962435364723
Epoch 20, loss=0.6927627995610237
Epoch 30, loss=0.6941152662038803
Epoch 40, loss=0.6888934075832367
Epoch 50, loss=0.6982776075601578
Epoch 60, loss=0.6929636225104332
Epoch 70, loss=0.6935726255178452
Epoch 80, loss=0.6927627921104431
Epoch 90, loss=0.6936252266168594
Epoch 100, loss=0.693363830447197


In [151]:
# create datasets with the 20 most important features by gradients
df_train_small_grad = pd.DataFrame({
    'word': df_train['word'],
    'embedding': df_train['embedding'].apply(lambda x: [x[i] for i in combined_importance_grad]),
    'label': df_train['label']
})

df_test_small_grad = pd.DataFrame({
    'word': df_test['word'],
    'embedding': df_test['embedding'].apply(lambda x: [x[i] for i in combined_importance_grad]),
    'label': df_test['label']
})

print(df_train_small_grad[:5])
print(df_train_small_grad[-5:])
print(df_test_small_grad[:5])
print(df_test_small_grad[-5:])

print("\nLength of reduced embedding: ", len(df_train_small_grad['embedding'].tolist()[0]))

          word                                          embedding  label
0  ultramarine  [0.320054292678833, 0.5433744192123413, -0.113...      1
1        ivory  [-0.585979163646698, -0.5540187358856201, 0.08...      1
2       sienna  [-0.18103229999542236, -0.20594161748886108, 0...      1
3          tan  [-0.2616365849971771, -0.29962193965911865, 0....      1
4        olive  [-0.06778931617736816, -0.4506070911884308, 0....      1
         word                                          embedding  label
109   science  [-0.3798688054084778, -0.15420736372470856, 0....      0
110       car  [0.07106814533472061, -0.2173701375722885, 0.3...      0
111   emotion  [-0.15966369211673737, -0.3579808473587036, 0....      0
112     actor  [0.06760074943304062, -0.6156966686248779, -0....      0
113  religion  [-0.01598823070526123, -0.15939755737781525, 0...      0
         word                                          embedding  label
0  vermillion  [0.29682737588882446, -0.12140871584415436,

In [152]:
# create datasets
train_dataset_small_grad = CustomDataset(df_train_small_grad['embedding'], df_train_small_grad['label'])
test_dataset_small_grad = CustomDataset(df_test_small_grad['embedding'], df_test_small_grad['label'])

# create dataloaders
train_loader_small_grad = DataLoader(train_dataset_small_grad, batch_size=16, shuffle=True)
test_loader_small_grad = DataLoader(test_dataset_small_grad, batch_size=16, shuffle=True)

In [171]:
# train the model
net3 = Net_small()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.003)

num_epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net3.to(device)

for epoch in range(num_epochs):
    net3.train()
    combined_loss = 0
    for batch in train_loader_small_grad:
        # get data to GPU if possible
        data = batch[0].to(device=device)
        targets = batch[1].to(device=device)

        # forward
        scores = net3(data)
        loss = criterion(scores, targets)
        combined_loss += loss.item()

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()
    
    # print initial loss
    if epoch == 0:
        print(f'Initial loss: {combined_loss/len(train_loader_small_grad)}')
        
    # print average loss per epoch every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}, loss={combined_loss/len(train_loader_small_grad)}')
        # print(combined_loss / len(train_loader))


Initial loss: 0.6035684198141098
Epoch 10, loss=0.6862051263451576
Epoch 20, loss=0.6025184318423271
Epoch 30, loss=0.6017399318516254
Epoch 40, loss=0.5993422865867615
Epoch 50, loss=0.6391497701406479
Epoch 60, loss=0.6371235996484756
Epoch 70, loss=0.6007024198770523
Epoch 80, loss=0.6436096280813217
Epoch 90, loss=0.5970332324504852
Epoch 100, loss=0.5979795902967453
