<a href="https://colab.research.google.com/github/ksadowski13/GCN/blob/main/GCN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies

In [24]:
import random
from random import choice, choices, randint
from typing import Dict, List, Tuple

import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import plotly.express as px
import torch
import torch.nn as nn
import torch.nn.functional as F
from networkx.algorithms.community.modularity_max import \
    greedy_modularity_communities
from tqdm.notebook import tqdm

# Utilities

In [25]:
def draw_graph(
    graph: nx.Graph,
    color_map: List[str] = None,
    positions: Dict[int, Tuple[float, float]] = None,
    axis_labels: Tuple[str, str] = None,
    with_edges=True,
) -> None:
    fig, ax = plt.subplots(figsize=(13, 13))

    nx.draw(
        graph,
        nx.spring_layout(graph) if positions is None else positions,
        node_size=400,
        node_color=color_map,
        with_labels=True,
        ax=ax,
    )

    if axis_labels is not None:
        plt.axis('on')
        ax.tick_params(left=True, bottom=True,
                       labelleft=True, labelbottom=True)
        plt.xlabel(axis_labels[0])
        plt.ylabel(axis_labels[1])


def create_graph() -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, nx.Graph]:
    number_of_samples = 200
    probability_of_edge = 0.03
    seed = 13

    random.seed(seed)

    feature_range = {
        0: {
            0: (1, 3),
            1: (1, 3),
            2: (1, 10),
            3: (1, 10),
            4: (7, 10),
            5: (4, 6),
        },
        1: {
            0: (1, 5),
            1: (6, 10),
            2: (1, 3),
            3: (1, 5),
            4: (5, 10),
            5: (1, 3),
        },
        2: {
            0: (4, 7),
            1: (1, 3),
            2: (1, 10),
            3: (4, 6),
            4: (1, 3),
            5: (1, 10),
        },
        3: {
            0: (1, 2),
            1: (2, 6),
            2: (8, 10),
            3: (8, 10),
            4: (8, 10),
            5: (4, 10),
        },
        4: {
            0: (3, 8),
            1: (4, 6),
            2: (2, 5),
            3: (1, 3),
            4: (1, 4),
            5: (1, 5),
        },
        4: {
            0: (3, 8),
            1: (4, 6),
            2: (2, 5),
            3: (1, 3),
            4: (1, 4),
            5: (1, 5),
        },
        5: {
            0: (4, 7),
            1: (5, 8),
            2: (5, 8),
            3: (7, 10),
            4: (5, 10),
            5: (3, 6),
        },
        6: {
            0: (7, 10),
            1: (9, 10),
            2: (1, 4),
            3: (1, 10),
            4: (3, 10),
            5: (6, 10),
        },
        7: {
            0: (3, 8),
            1: (7, 10),
            2: (8, 10),
            3: (5, 10),
            4: (6, 10),
            5: (1, 10),
        },
    }

    graph = nx.fast_gnp_random_graph(
        number_of_samples, probability_of_edge, seed=seed)

    communities = list(greedy_modularity_communities(graph))

    features = [[] for _ in range(number_of_samples)]
    labels = [[] for _ in range(number_of_samples)]

    for community in range(len(communities)):
        community_features = feature_range[community]
        label = 0 if community in [0, 1, 2, 4] else 1

        for v in communities[community]:
            features[v] = [
                randint(community_features[0][0], community_features[0][1]),
                randint(community_features[1][0], community_features[1][1]),
                randint(community_features[2][0], community_features[2][1]),
                randint(community_features[3][0], community_features[3][1]),
                randint(community_features[4][0], community_features[4][1]),
                randint(community_features[5][0], community_features[5][1]),
            ]
            labels[v] = choices(
                [0, 1] if label == 0 else [1, 0], cum_weights=[95, 5])[0]

    indexes = list(range(number_of_samples))
    train_indexes = []

    while len(set(train_indexes)) <= 50:
        train_indexes.append(choice(indexes))

    train_indexes = list(set(train_indexes))

    features = torch.Tensor(features) / 10

    train_labels = torch.tensor([[labels[i], i]
                                 for i in train_indexes], dtype=torch.long)
    validation_labels = torch.tensor(labels, dtype=torch.long)

    adjacency_matrix = torch.from_numpy(nx.to_numpy_matrix(graph))

    return features, train_labels, validation_labels, adjacency_matrix, graph

# Dataset

### Number of customers: 200

### Features:
0. Number of dogs
1. Level of love for dogs
2. Wealth of customer
3. Healthy eating awareness of customer
4. Size of the city of residance
5. Age of the customer

In [None]:
features, train_labels, validation_labels, adjacency_matrix, G = create_graph()

colors = ['gray', 'purple']
color_map_train = ['red' for _ in range(200)]
color_map = [colors[i] for i in validation_labels]

for i in train_labels:
    color_map_train[i[1]] = colors[i[0]]

draw_graph(G)

## Graph with train labels



* Gray - Junk Dog Food
* Purple - Premium Dog Food
* Red - (?) not specified




In [None]:
draw_graph(G, color_map_train)

## Graph with ground truth labels (validation)
* Gray - Junk Dog Food
* Purple - Premium Dog Food

In [None]:
draw_graph(G, color_map)

## Relationship between **'level of love for dogs'** and **'wealth of customer'**



In [None]:
positions = {i: (features[i, 1], features[i, 2]) for i in range(len(features))}

draw_graph(G, color_map, positions, axis_labels=('love', 'wealth'))

## Relationship between **'number of dogs'** and **'age of customer'**






In [None]:
positions = {i: (features[i, 0], features[i, 5]) for i in range(len(features))}

draw_graph(
    G, color_map, positions, axis_labels=('number of dogs', 'age of customer'))

## Relationship between **'size of the city of residence'** and **'healthy eating awareness of customer'**



In [None]:
positions = {i: (features[i, 3], features[i, 4]) for i in range(len(features))}

draw_graph(G, color_map, positions, axis_labels=(
    'city size', 'healthy eating'))

# Model

In [32]:
class GraphConv(nn.Module):
    def __init__(
        self,
        input_features: int,
        output_features: int,
        weight=True,
        bias=True,
    ):
        super(GraphConv, self).__init__()
        self._input_features = input_features
        self._output_features = output_features
        self.weight = nn.Parameter(
            torch.Tensor(input_features, output_features))
        self.bias = nn.Parameter(torch.Tensor(output_features))
        self.reset_parameters()

    def reset_parameters(self) -> None:
        if self.weight is not None:
            nn.init.xavier_uniform_(self.weight)

        if self.bias is not None:
            nn.init.zeros_(self.bias)

    def forward(
        self,
        inputs: torch.Tensor,
        adjacency_matrix: torch.Tensor,
    ) -> torch.Tensor:
        A_hat = adjacency_matrix + torch.diag(adjacency_matrix)
        D_hat = torch.diag(torch.sum(A_hat, 1, keepdim=False))
        D_hat_inv_sqrt = torch.inverse(torch.sqrt(D_hat))

        # D_hat_inv_sqrt @ A_hat @ D_hat_inv_sqrt @ features @ weights + bias
        A_sym = D_hat_inv_sqrt @ A_hat @ D_hat_inv_sqrt
        message_passing = A_sym @ inputs
        x = message_passing @ self.weight + self.bias

        return x

In [33]:
class GCN(nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self._graph_conv_1 = GraphConv(6, 4)
        self._activation_1 = nn.ReLU()
        self._graph_conv_2 = GraphConv(4, 2)
        self._activation_2 = nn.LogSoftmax(dim=1)

    def forward(
        self, 
        inputs: torch.Tensor, 
        adjacency_matrix: torch.Tensor,
    ) -> None:
        x = self._graph_conv_1(inputs, adjacency_matrix)
        x = self._activation_1(x)
        x = self._graph_conv_2(x, adjacency_matrix)
        x = self._activation_2(x)

        return x

# Training

In [34]:
def train(
    model: nn.Module,
    features: torch.Tensor,
    train_labels: torch.Tensor,
    validation_labels: torch.Tensor,
    adjacency_matrix: torch.Tensor,
    epochs: int,
) -> pd.DataFrame:
    metrics = []

    with tqdm(total=epochs) as pbar:
        for epoch in range(epochs):
            # training phase
            model.train()
            optimizer.zero_grad()

            output = model(features.double(), adjacency_matrix.double())
            train_output = torch.index_select(output, 0, train_labels[:, 1])

            train_loss = F.nll_loss(train_output, train_labels[:, 0])
            train_accuracy = torch.sum(torch.argmax(
                train_output, dim=1) == train_labels[:, 0]).item() * 100.0 / len(train_output)

            train_loss.backward()
            optimizer.step()

            metrics.append(['train', 'loss', epoch + 1, train_loss.item()])
            metrics.append(['train', 'accuracy', epoch + 1, train_accuracy])

            # validation phase
            model.eval()

            validation_loss = F.nll_loss(output, validation_labels)
            validation_accuracy = torch.sum(torch.argmax(
                output, dim=1) == validation_labels).item() * 100.0 / len(output)

            metrics.append(['validation', 'loss', epoch +
                            1, validation_loss.item()])
            metrics.append(['validation', 'accuracy',
                            epoch + 1, validation_accuracy])

            if (epoch + 1) % 100 == 0:
                print(
                    f'Epoch: {epoch + 1:4} '
                    f'Train Loss: {round(train_loss.item(), 4):.4f} '
                    f'Validation Loss: {round(validation_loss.item(), 4):.4f} '
                    f'Train Accuracy: {round(train_accuracy, 2):.2f} '
                    f'Validation Accuracy: {round(validation_accuracy, 2):.2f}'
                )

            pbar.update(1)

    df = pd.DataFrame(metrics, columns=['phase', 'metric', 'epoch', 'value'])

    fig = px.line(
        df,
        x='epoch',
        y='value',
        color='phase',
        facet_col='metric',
        facet_col_wrap=1,
        template='plotly_dark',
        width=800,
    )
    fig.update_yaxes(matches=None)
    fig.update_yaxes(showticklabels=True, col=2)
    fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
    fig.show()

In [None]:
# seed is set for repeatability of results for presentation purposes
torch.manual_seed(13)

EPOCHS = 3000

model = GCN().double()
optimizer = torch.optim.Adam(model.parameters())

train(model, features, train_labels, validation_labels, adjacency_matrix, EPOCHS)

## Evaluation

In [None]:
model.eval()

outputs = model(features.double(), adjacency_matrix.double())
outputs_arg_max = torch.argmax(outputs, dim=1)

positions = {
    i: (outputs_arg_max[i], outputs[i, outputs_arg_max[i]]) for i in range(200)}

draw_graph(G, color_map, positions, axis_labels=('junk, premium', 'certainty'))