In [1]:
#!pip install sentence_transformers
from sentence_transformers import SentenceTransformer,InputExample,losses,evaluation,util
import pandas as pd
import numpy as np
import torch
import argparse
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from sklearn import preprocessing
from sklearn.utils.class_weight import compute_class_weight

In [2]:
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
# print all output to a file
import sys
import os
import datetime
now = datetime.datetime.now()
data = 'R8_toy'
random_pairs = 20
positive_pairs = 5
EPS = 1e-9
model_name = 'all-MiniLM-L6-v2'
# model_name = 'all-mpnet-base-v2'
data_dir = 'data/'+data+'/'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using device:', device)
sys.stdout = open('output_'+ data+'_baseline_'+model_name+now.strftime("%Y-%m-%d-%H-%M-%S")+'.txt', 'w')
#model = SentenceTransformer(model_name)

Using device: cpu


In [3]:
train = pd.read_csv(r'/content/train.csv')
train = train.sample(frac=1).reset_index(drop=True)
label_encoder = preprocessing.LabelEncoder()
X_train = train['text'].values
y_train = train['label'].values
y_train = label_encoder.fit_transform(y_train)

In [4]:
val = pd.read_csv(r'/content/val.csv')
X_val = val['text'].values
y_val = val['label'].values
y_val = label_encoder.fit_transform(y_val)
#print(X_train.shape, y_train.shape, X_val.shape, y_val.shape)
num_labels = len(np.unique(y_train))
#print(num_labels)

In [5]:
test = pd.read_csv(r'/content/test.csv')
X_test = test['text'].values
y_test = test['label'].values
y_test = label_encoder.fit_transform(y_test)

In [6]:
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
y_test = torch.tensor(y_test, dtype=torch.long).to(device)

In [7]:
import torch.optim as optim
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [8]:
X_train = X_train.tolist()
X_val = X_val.tolist()
X_test = X_test.tolist()

In [9]:
total_X = X_train + X_val+ X_test

In [10]:
total_X = np.array(total_X)

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GraphLearningLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.a = nn.Parameter(torch.randn(size=[1, 384])).to(device)  # Learnable parameter

    def calculate_sim_matrix(self, embeddings):
        # Calculate the pairwise absolute difference between embeddings
        # Use broadcasting for efficient computation
        diff = embeddings.unsqueeze(1) - embeddings.unsqueeze(0)  # [N, N, D]
        abs_diff = torch.abs(diff)  # Absolute difference

        # Compute the similarity matrix
        sub_emb = abs_diff.permute(0, 1, 2).reshape(-1, embeddings.shape[-1]).T  # [D, N*N]
        sub_emb = sub_emb.to(device)
        temp = torch.exp(F.relu(self.a @ sub_emb))  # [1, N*N]
        sim_matrix = temp.reshape(len(embeddings), len(embeddings))  # [N, N]

        # Normalize the similarity matrix
        sim_matrix = sim_matrix / sim_matrix.sum()
        return sim_matrix

    def forward(self, embeddings):
        # Calculate similarity matrix
        my_sim = self.calculate_sim_matrix(embeddings).to(device)

        # Compute the loss
        diff = embeddings.unsqueeze(1) - embeddings.unsqueeze(0)  # [N, N, D]
        norms = torch.norm(diff, dim=-1).to(device)  # [N, N]
        my_loss = (norms * my_sim).sum()  # Weighted sum of norms
        return my_loss


In [12]:
import math
class GraphConvolution(nn.Module):

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = nn.Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = nn.Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

In [13]:
SEED = 123
torch.manual_seed(SEED)
np.random.seed(SEED)

test_layer = GraphConvolution(5, 4)
print("Weight Matrix", test_layer.weight)
print("Bias", test_layer.bias )
print(test_layer.in_features,test_layer.out_features)

In [14]:
class GCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(GCN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj)
        return F.log_softmax(x, dim=1)

In [15]:
def intialize_model(nfeat, hidden, c, dropout, lr, wd):
  # Model and optimizer
  gcn_model = GCN(nfeat=nfeat,
              nhid=hidden,
              nclass=c,
              dropout=dropout).to(device)
  gcn_optimizer = optim.Adam(gcn_model.parameters(),
                        lr=lr, weight_decay=wd)
  sbert_model = SentenceTransformer(model_name).to(device)
  sbert_optimizer = optim.Adam(sbert_model.parameters(),
                        lr=lr, weight_decay=wd)
  return gcn_model, gcn_optimizer, sbert_model, sbert_optimizer


In [16]:
def accuracy(output, labels):
  preds   = output.max(1)[1].type_as(labels)
  print(preds, labels)
  correct = preds.eq(labels).double()
  correct = correct.sum()
  return correct / len(labels)

In [17]:
import scipy.sparse as sp
def normalize(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()

In [18]:
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
  sparse_mx = sparse_mx.tocoo().astype(np.float32)
  indices = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
  values = torch.from_numpy(sparse_mx.data)
  shape = torch.Size(sparse_mx.shape)
  return torch.sparse.FloatTensor(indices, values, shape)

In [19]:
h = 16
d=0.5
l = 5e-3
w = 5e-3
labels = torch.unique(y_train)
gcn_model, gcn_optimizer, sbert_model, sbert_optimizer = intialize_model(384, h, labels.max().item() + 1, d, l, w)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [20]:
sbert_model.load_state_dict(torch.load(r"/content/sbert_model_for_task_2.pth", weights_only=True, map_location=device))
gcn_model.load_state_dict(torch.load(r"/content/gcn_model_for_task_2.pth", weights_only=True, map_location=device))

<All keys matched successfully>

In [21]:
embeddings = sbert_model.encode(total_X)
embeddings = torch.tensor(embeddings).to(device)


In [22]:
gl_loss = GraphLearningLoss()
#loss_term_2 = 1e-4*gl_loss(embeddings)
adj = sp.coo_matrix(gl_loss.calculate_sim_matrix(embeddings).to("cpu").detach().numpy())
adj = normalize(adj + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj).to(device)
output = gcn_model(embeddings, adj)

  return torch.sparse.FloatTensor(indices, values, shape)


In [26]:
acc_train = accuracy(output[:len(X_train)], y_train)

In [27]:
acc_train

tensor(0.8286, dtype=torch.float64)

In [28]:
acc_val = accuracy(output[len(X_train):len(X_train)+len(X_val)], y_val)

In [29]:
acc_val

tensor(0.7547, dtype=torch.float64)

In [23]:
acc_test = accuracy(output[len(X_train)+len(X_val):len(X_train)+len(X_val)+len(X_test)], y_test)

In [24]:
acc_test

tensor(0.8611, dtype=torch.float64)

In [25]:
!pip install torcheval

In [30]:
from torcheval.metrics.functional import multiclass_f1_score
multiclass_f1_score(output[len(X_train)+len(X_val):len(X_train)+len(X_val)+len(X_test)], y_test[:], num_classes=8, average=None)

tensor([0.8732, 0.6087, 0.9459, 0.0000, 0.4286, 0.7778, 0.0000, 0.4444])