<a href="https://colab.research.google.com/github/sneha1012/knowledge-graph-completion/blob/main/Knowlegegraph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Graph attention Networks for knowledge completion graphs using Pytorch**


Graph Attention Networks (GAT)
Graph Attention Networks (GAT) introduce an attention mechanism that allows nodes to focus on their most informative neighbors, rather than treating all neighbors equally. This is particularly useful for graph-structured data where the importance of neighbors can vary significantly.

we are using **Freebase 15k-237** Dataset present.
**Origin**: Derived from Freebase and contains a subset of the FB15K dataset.
**Textual** Mentions: Derived from 200 million sentences from the ClueWeb12 corpus coupled with Freebase entity mention annotations.





In [None]:
!pip install torch torchvision networkx matplotlib

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE


Mounting the drive on collab and then defining the path to our datset

In [6]:
from google.colab import drive
drive.mount('/content/drive')

file_path = '/content/drive/My Drive/Data for GATS/train.txt'


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
#Preprocessing our data

# Dictionaries to map entities and relations to unique integer IDs
entity2id = {}
relation2id = {}

# Lists to store triples
triples = []

# Read the file and process the data
with open(file_path, 'r') as f:
    for line in f:
        s, p, o = line.strip().split('\t')

        # Assign unique IDs to entities and relations
        if s not in entity2id:
            entity2id[s] = len(entity2id)
        if o not in entity2id:
            entity2id[o] = len(entity2id)
        if p not in relation2id:
            relation2id[p] = len(relation2id)

        # Store the triples
        triples.append((entity2id[s], relation2id[p], entity2id[o]))


In [11]:
class GraphAttentionLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GraphAttentionLayer, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat

        self.W = nn.Linear(in_features, out_features, bias=False)
        self.a = nn.Linear(2*out_features, 1, bias=False)
        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, input, adj):
        h = self.W(input)
        N = h.size()[0]
        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features)
        e = self.leakyrelu(self.a(a_input).squeeze(2))

        zero_vec = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.matmul(attention, h)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime

class GAT(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, alpha, nheads):
        super(GAT, self).__init__()
        self.dropout = dropout

        self.attentions = [GraphAttentionLayer(nfeat, nhid, dropout=dropout, alpha=alpha, concat=True) for _ in range(nheads)]
        for i, attention in enumerate(self.attentions):
            self.add_module('attention_{}'.format(i), attention)

        self.out_att = GraphAttentionLayer(nhid * nheads, nclass, dropout=dropout, alpha=alpha, concat=False)

    def forward(self, x, adj):
        x = F.dropout(x, self.dropout, training=self.training)
        x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.elu(self.out_att(x, adj))
        return x


In [10]:
# Hyperparameters
learning_rate = 0.005
weight_decay = 5e-4
epochs = 200
nhid = 8
nclass = 7
dropout = 0.6
alpha = 0.2
nheads = 8

# Model and optimizer
model = GAT(nfeat=initial_entity_embeddings.shape[1],
            nhid=nhid,
            nclass=nclass,
            dropout=dropout,
            nheads=nheads,
            alpha=alpha)
optimizer = Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Training loop
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(initial_entity_embeddings_tensor, adjacency_matrix)  # Define the adjacency_matrix
    loss_train = F.nll_loss(output, labels)  # Define the labels
    loss_train.backward()
    optimizer.step()


NameError: ignored