Returning only 0.5 AUC and AUPR (i.e., model is not working) #9104
-
Dear PyG community, When I ran the model with several datasets, it always returned 0.49-0.5 AUC and AUPR scores and about 0.6 f1-score either using all datasets or one dataset.
I doubted the input data, but the other basic model showed a reliable result (0.6~0.8 AUC) despite the label imbalance. # PyG version: 2.5.0.dev20240313
data_list = ['A', 'B', 'C', 'D', 'E'] # The whole datasets
...
print(graph_list[0]) # Convert the datasets (A~E) into the PyG graph objects. graph_list contains 5 objects.
# Data(edge_index=[2, 5955], edge_label=[5955], x=[19392, 4], edge_class=[5955]) // edge_class: custom variable indicating the index of dataset(0~4)
for i in range(len(graph_list)):
print(torch.bincount(graph_list[i].edge_label))
# tensor([8246, 1613]) // # of 0: 8246, # of 1: 1613
# tensor([4778, 1177])
# tensor([75423, 1560])
# tensor([118978, 5585])
# tensor([951, 274])
key_cID
# array([ 0, 1, 3, ..., 101089, 101090, 101091]) # list for ID mapping. len(key_cID)=19392 I doubt two aspects: class MyModel(torch.nn.Module):
def __init__(self, in_channels, hidden_channels, out_channels):
super(MyModel, self).__init__()
# MLP
self.s_data_layer = nn.Sequential(
nn.Linear(4, 64, bias=True),
nn.LeakyReLU(), #inplace=True
nn.Linear(64, 256, bias=True)
)
# GAT
self.conv1 = GATConv(in_channels, hidden_channels, heads=12) # in_channel, hidden_channel, heads
self.lin1 = torch.nn.Linear(in_channels, hidden_channels * 12)
self.conv2 = GATConv(hidden_channels * 12, out_channels, heads=1, concat=True) # hidden_channel * heads, out_channel, heads
self.lin2 = torch.nn.Linear(hidden_channels * 12, out_channels)
def convert_data_size(self, l_data, s_data):
x2 = torch.zeros((len(l_data), 256), dtype=torch.float).to(device) #(107940, 256)
x3 = self.s_data_layer(s_data).to(device) #(19392, 256)
nonzero_index = torch.tensor(key_cID).to(device)
x2.index_add_(0, nonzero_index, x3) #(107940, 256)
return x2
def forward(self, l_data, s_data, node_ids, neighbor_edge_class, edge_index):
x1 = l_data #(107940, 256)
for i in range(len(s_data)):
s_data[i] = self.convert_data_size(l_data, s_data[i]) #(107940, 256). convert node feature dimension from 4 to 256
tmp_filled = torch.zeros((len(l_data), 256), dtype=torch.float).to(device) #(107940, 256)
# for loop: assigning the additional node features(s_data) to the corresponding index in the l_data.
for e_i_class in torch.unique(neighbor_edge_class):
indices = (neighbor_edge_class == e_i_class).nonzero(as_tuple=True)[0]
key_n_id = torch.unique(node_ids[edge_index[:, indices]])
s_n_id = np.in1d(key_cID, key_n_id.cpu()).nonzero()[0]
tmp_filled[key_cID[s_n_id]] = s_data[int(e_i_class.item())][s_n_id].float()
x = torch.cat((x1, tmp_filled), dim=1) #(107940, 512)
x_1, a1 = self.conv1(x, edge_index, return_attention_weights=True)
x = F.leaky_relu(x_1)
x = F.dropout(x, p = 0.2, training = self.training)
x_2, a2 = self.conv2(x, edge_index, return_attention_weights=True)
x = x_2
return x, a1, a2 |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 5 replies
-
def train(num_epochs):
for epoch in range(1, num_epochs+1):
tr_losses = 0
val_losses = 0
model.train()
for data in tqdm(train_loader):
data = data.to(device)
data.edge_class = data.edge_class[data.input_id]
data.edge_index_class = torch.zeros(len(data.edge_index[0])).to(device)
# Assigning the source of the dataset for the sampled neighbor nodes
for i in range(len(data.edge_index_class)):
if ((data.edge_index[1][i] in data.edge_label_index[0]) or (data.edge_index[1][i] in data.edge_label_index[1])):
data.edge_index_class[i] = data.edge_class[(data.edge_index[1][i]==data.edge_label_index[0])
|(data.edge_index[1][i]==data.edge_label_index[1])]
else:
data.edge_index_class[i] = torch.max(data.edge_index_class[(data.edge_index[1][i]==data.edge_index[0])])
optimizer.zero_grad()
z, a1, a2 = model(data.x[0], data.x[1], data.n_id, data.edge_index_class, data.edge_index)
out = ((z[data.edge_label_index[0]] * z[data.edge_label_index[1]]).sum(dim=-1)).view(-1) # product of a pair of nodes on each edge
tr_loss = criterion(out, data.edge_label.float())
tr_losses += tr_loss.item()
tr_loss.backward()
optimizer.step()
avg_tr_loss = tr_losses/len(train_loader.dataset)
model.eval()
with torch.no_grad():
y_val_pred, y_val_pred_prob, y_val_true = [], [], []
for data in tqdm(val_loader):
data = data.to(device)
data.edge_class = data.edge_class[data.input_id]
data.edge_index_class = torch.zeros(len(data.edge_index[0])).to(device)
for i in range(len(data.edge_index_class)):
if ((data.edge_index[1][i] in data.edge_label_index[0]) or (data.edge_index[1][i] in data.edge_label_index[1])):
data.edge_index_class[i] = data.edge_class[(data.edge_index[1][i]==data.edge_label_index[0])
|(data.edge_index[1][i]==data.edge_label_index[1])]
else:
data.edge_index_class[i] = torch.max(data.edge_index_class[(data.edge_index[1][i]==data.edge_index[0])])
y_val_true.append(data.edge_label)
z, a1, a2 = model(data.x[0], data.x[1], data.n_id, data.edge_index_class, data.edge_index)
out = ((z[data.edge_label_index[0]] * z[data.edge_label_index[1]]).sum(dim=-1)).view(-1)
out_sig = ((z[data.edge_label_index[0]] * z[data.edge_label_index[1]]).sum(dim=-1)).view(-1).sigmoid()
val_loss = criterion(out, data.edge_label.float())
val_losses += val_loss.item()
y_val_pred.append((out_sig>0.5).float().cpu())
y_val_pred_prob.append((out_sig).float().cpu())
avg_val_loss = val_losses/len(val_loader.dataset)
y, pred, pred_prob = torch.cat(y_val_true, dim=0).cpu().numpy(), torch.cat(y_val_pred, dim=0).cpu().numpy(), torch.cat(y_val_pred_prob, dim=0).cpu().numpy()
val_f1 = f1_score(y, pred) #average='micro'
val_auc = roc_auc_score(y, pred_prob)
val_aupr = average_precision_score(y, pred_prob)
val_acc = accuracy_score(y, pred)
print(f'Epoch: {epoch:03d}, Training Loss: {avg_tr_loss:.4f}, Validation Loss: {avg_val_loss:.4f} \n Validation AUC: {val_auc:.4f}, Validation AUPR: {val_aupr:.4f}, Validation F1-score: {val_f1:.4f}') |
Beta Was this translation helpful? Give feedback.
-
# ConcatedGraphList is just for splitting overall edges in the whole dataset during the k-fold CV. If there are any better ways, please let me know.
class ConcatedGraphList(Dataset):
def __init__(self, s_g_list):
self.s_g_list= s_g_list
self.edge_index = torch.cat([self.s_g_list[i].edge_index for i in range(len(self.s_g_list))], dim=1)
self.edge_label = torch.cat([self.s_g_list[i].edge_label for i in range(len(self.s_g_list))])
self.edge_class = torch.cat([self.s_g_list[i].edge_class for i in range(len(self.s_g_list))])
self.x = [self.s_g_list[i].x for i in range(len(self.s_g_list))]
def __getitem__(self, idx):
edge_index = self.edge_index[:, idx]
edge_label = self.edge_label[idx]
edge_class = self.edge_class[idx]
return edge_index, edge_label, edge_class
def __len__(self):
return len(self.edge_label)
SmallData= ConcatedGraphList(graph_list) for fold, (train_idx, val_idx, test_idx) in enumerate(zip(*k_fold(SmallData, folds))): # SmallData: Concated data list (A~E) for k-fold CV
print(f'FOLD {fold}')
print('-------------------------------------------')
kf_train_data = Data(edge_index = LargeData.edge_index,
edge_label = SmallData.edge_label[train_idx],
edge_label_index = SmallData.edge_index[:, train_idx],
edge_class = SmallData.edge_class[train_idx],
x=[opt_rotate.node_emb.weight, SmallData.x],
num_nodes=opt_rotate.num_nodes)
kf_val_data = Data(edge_index=LargeData.edge_index,
edge_label=SmallData.edge_label[val_idx],
...
kf_test_data = Data(edge_index=LargeData.edge_index,
edge_label=SmallData.edge_label[test_idx],
...
tr_sampler=ImbalancedSampler(dataset = kf_train_data.edge_label)
val_sampler=ImbalancedSampler(dataset = kf_val_data.edge_label)
ts_sampler=ImbalancedSampler(dataset = kf_test_data.edge_label)
train_loader = LinkNeighborLoader(kf_train_data, edge_label_index=kf_train_data.edge_label_index, edge_label=kf_train_data.edge_label,
batch_size=32, shuffle=False, neg_sampling_ratio=0.0, num_neighbors=[2,2], disjoint=True, sampler=tr_sampler)
val_loader = LinkNeighborLoader(kf_val_data, ...)
test_loader = LinkNeighborLoader(kf_test_data, ...)
### Model declaration ###
model = MyModel(in_channels=512, hidden_channels=128, out_channels=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)
criterion = torch.nn.BCEWithLogitsLoss()
### Training and pre-Test ###
train(num_epochs)
y, pred, pred_prob = test(test_loader, 0.5) I'm really sorry for the long question, but I couldn't recognize any problem in my code. |
Beta Was this translation helpful? Give feedback.
It's a bit hard for me to give a good advice here. For different batches though, the ratio can be different. It's just that on average, the labels should be uniformly distributed. However, I don't think you want to turn this flag on for validation/test, as it would lead to making predictions on an artificial data distribution (and you would also predict certain examples twice or more).