# Load and Prepare Data

In [None]:
import pandas as pd

# Load synthetic data
interactions_df = pd.read_csv('synthetic_user_item_interactions_multimedia.csv')
item_attributes_df = pd.read_csv('synthetic_item_attributes_multimedia.csv')
kg_relations_df = pd.read_csv('synthetic_knowledge_graph_relations_multimedia.csv')

# Display the first few rows of each DataFrame to ensure data is loaded correctly
print("User-Item Interactions:")
print(interactions_df.head())

print("\nItem-Attribute Relationships:")
print(item_attributes_df.head())

print("\nKnowledge Graph Relations:")
print(kg_relations_df.head())


# Preprocess Data for GNN/GAT

In [None]:
import networkx as nx
import numpy as np
import torch
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx

# Convert user-item interactions to a graph
G = nx.Graph()

# Add edges for user-item interactions
for index, row in interactions_df.iterrows():
    user_id = f"user_{row['user_id']}"
    item_id = f"item_{row['item_id']}"
    G.add_edge(user_id, item_id)

# Add edges for item-attribute relationships
for index, row in item_attributes_df.iterrows():
    item_id = f"item_{row['item_id']}"
    attribute_id = f"attr_{row['attribute_id']}"
    G.add_edge(item_id, attribute_id)

# Add edges for knowledge graph relationships
for index, row in kg_relations_df.iterrows():
    item_id = f"item_{row['item_id']}"
    related_item_id = f"item_{row['related_item_id']}"
    G.add_edge(item_id, related_item_id)

# Convert the NetworkX graph to PyTorch Geometric Data object
data = from_networkx(G)

# Display the PyTorch Geometric Data object
print(data)


# Build the KGAT Model Using GNN and GAT

In [None]:
import torch.nn.functional as F
from torch_geometric.nn import GATConv, GCNConv

class KGAT(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(KGAT, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GATConv(hidden_channels, hidden_channels)
        self.conv3 = GATConv(hidden_channels, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        # First GCN layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)

        # Two GAT layers
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.conv3(x, edge_index)

        return F.log_softmax(x, dim=1)

# Initialize the model, define the loss and optimizer
model = KGAT(num_node_features=data.num_node_features, hidden_channels=16, num_classes=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()


# Train the Model

In [None]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def test():
    model.eval()
    _, pred = model(data).max(dim=1)
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
    acc = int(correct) / int(data.test_mask.sum())
    return acc

# Training loop
for epoch in range(1, 201):
    loss = train()
    if epoch % 10 == 0:
        train_acc = test()
        print(f'Epoch: {epoch}, Loss: {loss:.4f}, Test Acc: {train_acc:.4f}')


# Visualization

In [None]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

# Extract embeddings from the last hidden layer
model.eval()
with torch.no_grad():
    out = model(data)

# Apply t-SNE for visualization
z = TSNE(n_components=2).fit_transform(out.cpu().numpy())

plt.figure(figsize=(10, 10))
plt.scatter(z[:, 0], z[:, 1], s=70, c=data.y.cpu(), cmap="Set2")
plt.show()
