In [2]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='/tmp/Cora', name='Cora', transform=NormalizeFeatures())
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of classes: {dataset.num_classes}')#number of node classes

Dataset: Cora():
Number of graphs: 1
Number of classes: 7


In [3]:
for (i,graph) in zip(range(len(dataset)),dataset):
        print(f'{i}-th graph: {graph}')
        print(f'------------')
        print(f'Number of nodes: {graph.num_nodes}')
        print(f'Number of node features: {graph.num_node_features}')
        print(f'Number of edges: {graph.num_edges}')
        print(f'Number of edge features: {graph.num_edge_features}')
        print(f'Average node degree: {graph.num_edges / graph.num_nodes:.2f}')
        print(f'Number of training nodes: {graph.train_mask.sum()}')
        print(f'Number of validation nodes: {graph.val_mask.sum()}')
        print(f'Number of test nodes: {graph.test_mask.sum()}')
        print(f'Has isolated nodes(nodes without edges): {graph.has_isolated_nodes()}')
        print(f'Has self-loops: {graph.has_self_loops()}')
        print(f'Is undirected: {graph.is_undirected()}')

0-th graph: Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
------------
Number of nodes: 2708
Number of node features: 1433
Number of edges: 10556
Number of edge features: 0
Average node degree: 3.90
Number of training nodes: 140
Number of validation nodes: 500
Number of test nodes: 1000
Has isolated nodes(nodes without edges): False
Has self-loops: False
Is undirected: True


In [8]:
graph.train_mask

tensor([ True,  True,  True,  ..., False, False, False])

In [9]:
len(graph.train_mask)

2708

In [10]:
graph.train_mask.sum()


tensor(140)

In [11]:
graph.val_mask

tensor([False, False, False,  ..., False, False, False])

In [12]:
len(graph.val_mask)

2708

In [13]:
graph.val_mask.sum()

tensor(500)

In [14]:
graph.test_mask

tensor([False, False, False,  ...,  True,  True,  True])

In [15]:
len(graph.test_mask)

2708

In [16]:
graph.test_mask.sum()

tensor(1000)

## Training an MLP on Cora

**Lets construct a simple MLP that solely operates on input node features (using shared weights across all nodes)...**

In [6]:
import torch
from torch.nn import Linear
import torch.nn.functional as F


class MLP(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        torch.manual_seed(12345)
        self.lin1 = Linear(dataset.num_features, hidden_channels)
        self.lin2 = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x):
        x = self.lin1(x)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return x

model = MLP(hidden_channels=16)
print(model)

criterion = torch.nn.CrossEntropyLoss()  # Define loss criterion.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)  # Define optimizer.

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(graph.x)  # Perform a single forward pass.
      loss = criterion(out[graph.train_mask], graph.y[graph.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(graph.x)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[graph.test_mask] == graph.y[graph.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(graph.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc

for epoch in range(1, 201):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
    
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

MLP(
  (lin1): Linear(in_features=1433, out_features=16, bias=True)
  (lin2): Linear(in_features=16, out_features=7, bias=True)
)
Epoch: 001, Loss: 1.9615
Epoch: 002, Loss: 1.9557
Epoch: 003, Loss: 1.9505
Epoch: 004, Loss: 1.9423
Epoch: 005, Loss: 1.9327
Epoch: 006, Loss: 1.9279
Epoch: 007, Loss: 1.9144
Epoch: 008, Loss: 1.9087
Epoch: 009, Loss: 1.9023
Epoch: 010, Loss: 1.8893
Epoch: 011, Loss: 1.8776
Epoch: 012, Loss: 1.8594
Epoch: 013, Loss: 1.8457
Epoch: 014, Loss: 1.8365
Epoch: 015, Loss: 1.8280
Epoch: 016, Loss: 1.7965
Epoch: 017, Loss: 1.7984
Epoch: 018, Loss: 1.7832
Epoch: 019, Loss: 1.7495
Epoch: 020, Loss: 1.7441
Epoch: 021, Loss: 1.7188
Epoch: 022, Loss: 1.7124
Epoch: 023, Loss: 1.6785
Epoch: 024, Loss: 1.6660
Epoch: 025, Loss: 1.6119
Epoch: 026, Loss: 1.6236
Epoch: 027, Loss: 1.5827
Epoch: 028, Loss: 1.5784
Epoch: 029, Loss: 1.5524
Epoch: 030, Loss: 1.5020
Epoch: 031, Loss: 1.5065
Epoch: 032, Loss: 1.4742
Epoch: 033, Loss: 1.4581
Epoch: 034, Loss: 1.4246
Epoch: 035, Loss: 1.

## Converting the Above MLP into a GNN

**We can convert the MLP implemented above by simply substituting the `torch.nn.Linear()` layers with `GCNConv()` layers from PyTorch Geoemtric's `torch_geometric.nn`**

I have put the MLP code (commented off) too for comparison. The changes or differences have the comment ***DIFFERENT*** next to them.

In [10]:
import torch
from torch_geometric.nn import GCNConv

#class MLP(torch.nn.Module):
#    def __init__(self, hidden_channels):
#        super().__init__()
#        torch.manual_seed(12345)
#        self.lin1 = Linear(dataset.num_features, hidden_channels)
#        self.lin2 = Linear(hidden_channels, dataset.num_classes)

#    def forward(self, x):
#        x = self.lin1(x)
#        x = x.relu()
#        x = F.dropout(x, p=0.5, training=self.training)
#        x = self.lin2(x)
#        return x

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_features, hidden_channels) #DIFFERENT
        self.conv2 = GCNConv(hidden_channels, dataset.num_classes)  #DIFFERENT

    def forward(self, x, edge_index):  #DIFFERENT
        x = self.conv1(x, edge_index)  #DIFFERENT
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index) #DIFFERENT
        return x

#model = MLP(hidden_channels=16)
model = GCN(hidden_channels=16)
print(model)


#optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
#criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

#def train():
#      model.train()
#      optimizer.zero_grad()
#      out = model(graph.x)
#      loss = criterion(out[graph.train_mask], graph.y[graph.train_mask])
#      loss.backward()
#      optimizer.step()
#      return loss


def train():
      model.train()
      optimizer.zero_grad()
      out = model(graph.x, graph.edge_index) #DIFFERENT
      loss = criterion(out[graph.train_mask], graph.y[graph.train_mask])
      loss.backward()
      optimizer.step()
      return loss
    
#def test():
#      model.eval()
#      out = model(graph.x)
#      pred = out.argmax(dim=1)
#      test_correct = pred[graph.test_mask] == graph.y[graph.test_mask]
#      test_acc = int(test_correct.sum()) / int(graph.test_mask.sum())
#      return test_acc


def test():
      model.eval()
      out = model(graph.x, graph.edge_index)  #DIFFERENT
      pred = out.argmax(dim=1)
      test_correct = pred[graph.test_mask] == graph.y[graph.test_mask]
      test_acc = int(test_correct.sum()) / int(graph.test_mask.sum())
      return test_acc


#for epoch in range(1, 201):
#    loss = train()
#    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

for epoch in range(1, 101):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
    
#test_acc = test()
#print(f'Test Accuracy: {test_acc:.4f}')

test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

GCN(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 7)
)
Epoch: 001, Loss: 1.9463
Epoch: 002, Loss: 1.9409
Epoch: 003, Loss: 1.9343
Epoch: 004, Loss: 1.9275
Epoch: 005, Loss: 1.9181
Epoch: 006, Loss: 1.9086
Epoch: 007, Loss: 1.9015
Epoch: 008, Loss: 1.8933
Epoch: 009, Loss: 1.8808
Epoch: 010, Loss: 1.8685
Epoch: 011, Loss: 1.8598
Epoch: 012, Loss: 1.8482
Epoch: 013, Loss: 1.8290
Epoch: 014, Loss: 1.8233
Epoch: 015, Loss: 1.8057
Epoch: 016, Loss: 1.7966
Epoch: 017, Loss: 1.7825
Epoch: 018, Loss: 1.7617
Epoch: 019, Loss: 1.7491
Epoch: 020, Loss: 1.7310
Epoch: 021, Loss: 1.7147
Epoch: 022, Loss: 1.7056
Epoch: 023, Loss: 1.6954
Epoch: 024, Loss: 1.6697
Epoch: 025, Loss: 1.6538
Epoch: 026, Loss: 1.6312
Epoch: 027, Loss: 1.6161
Epoch: 028, Loss: 1.5899
Epoch: 029, Loss: 1.5711
Epoch: 030, Loss: 1.5576
Epoch: 031, Loss: 1.5393
Epoch: 032, Loss: 1.5137
Epoch: 033, Loss: 1.4948
Epoch: 034, Loss: 1.4913
Epoch: 035, Loss: 1.4698
Epoch: 036, Loss: 1.3998
Epoch: 037, Loss: 1.4041
Epoch: 038, L

**The `train()` and `test()` methods for this GNN are almost the same as the MLP, except that GNN model makes use of not only the node features `x` (in case of the MLP) but also the graph connectivity `edge_index` as input to our model. This is because the `GCNConv()` layer takes into account the neighboring node information whereas the `Linear()` layer does not.**

1. `GCNConv()` is defined as:

$$
\mathbf{x}_v^{(\ell + 1)} = \mathbf{W}^{(\ell + 1)} \sum_{w \in \mathcal{N}(v) \, \cup \, \{ v \}} \frac{1}{c_{w,v}} \cdot \mathbf{x}_w^{(\ell)}
$$

where $\mathbf{W}^{(\ell + 1)}$ denotes a trainable weight matrix of shape `[num_output_features, num_input_features]` and $c_{w,v}$ refers to a fixed normalization coefficient for each edge.

2. `Linear()` is defined as:

$$
\mathbf{x}_v^{(\ell + 1)} = \mathbf{W}^{(\ell + 1)} \mathbf{x}_v^{(\ell)}
$$

By simply substituting the `Linear()` layers with `GCNConv()` layers, we obtained **81.5%** of test accuracy. This is in stark contrast to the **59%** of test accuracy obtained by our MLP, indicating that neighbourhood information plays a crucial role in obtaining better performance. The neighbourhood information is relevant and useful for Cora dataset because **cited papers are very likely related to the category of a document**.

**To achieve better model performance and to avoid overfitting, it is usually a good idea to select the best model based on an additional validation set. The `Cora` dataset provides a validation node set as `data.val_mask`. Below, we modify the code to select and test the model with the highest validation performance**

In [30]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

#class MLP(torch.nn.Module):
#    def __init__(self, hidden_channels):
#        super().__init__()
#        torch.manual_seed(12345)
#        self.lin1 = Linear(dataset.num_features, hidden_channels)
#        self.lin2 = Linear(hidden_channels, dataset.num_classes)

#    def forward(self, x):
#        x = self.lin1(x)
#        x = x.relu()
#        x = F.dropout(x, p=0.5, training=self.training)
#        x = self.lin2(x)
#        return x

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_features, hidden_channels) #DIFFERENT
        self.conv2 = GCNConv(hidden_channels, dataset.num_classes)  #DIFFERENT

    def forward(self, x, edge_index):  #DIFFERENT
        x = self.conv1(x, edge_index)  #DIFFERENT
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index) #DIFFERENT
        return x

#model = MLP(hidden_channels=16)
model = GCN(hidden_channels=16)
print(model)


#optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
#criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

#def train():
#      model.train()
#      optimizer.zero_grad()
#      out = model(graph.x)
#      loss = criterion(out[graph.train_mask], graph.y[graph.train_mask])
#      loss.backward()
#      optimizer.step()
#      return loss


def train():
      model.train()
      optimizer.zero_grad()
      out = model(graph.x, graph.edge_index) #DIFFERENT
      loss = criterion(out[graph.train_mask], graph.y[graph.train_mask])
      loss.backward()
      optimizer.step()
      return loss
    
#def test():
#      model.eval()
#      out = model(graph.x)
#      pred = out.argmax(dim=1)
#      test_correct = pred[graph.test_mask] == graph.y[graph.test_mask]
#      test_acc = int(test_correct.sum()) / int(graph.test_mask.sum())
#      return test_acc


def test(mask):
      model.eval()
      out = model(graph.x, graph.edge_index)  #DIFFERENT
      pred = out.argmax(dim=1)
      if mask == 'test':
        test_correct = pred[graph.test_mask] == graph.y[graph.test_mask]
        acc = int(test_correct.sum()) / int(graph.test_mask.sum())
      elif mask == 'val':
        val_correct = pred[graph.val_mask] == graph.y[graph.val_mask]
        acc = int(val_correct.sum()) / int(graph.val_mask.sum())
      return acc


#for epoch in range(1, 201):
#    loss = train()
#    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

best_val_acc = -1
best_epoch = -1

for epoch in range(1, 901):
    loss = train()
    val_acc = test('val')
    if val_acc >= best_val_acc:
        torch.save(model.state_dict(),'./best_model.pth')
        best_val_acc = val_acc
        best_epoch = epoch
    test_acc = test('test')
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Validation Accuracy:{val_acc}, Test Accuracy:{test_acc}')
    
#test_acc = test()
#print(f'Test Accuracy: {test_acc:.4f}')

#test_acc = test('test')
#print(f'Final-Epoch Test Accuracy: {test_acc:.4f}')


model.load_state_dict(torch.load('./best_model.pth'))
test_acc = test('test')
val_acc = test('val')
print(f'\n\n\nBest Epoch:{best_epoch}, Best Validation Accuracy:{val_acc:.4f}, Best Test Accuracy: {test_acc:.4f}')

GCN(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 7)
)
Epoch: 001, Loss: 1.9460, Validation Accuracy:0.356, Test Accuracy:0.385
Epoch: 002, Loss: 1.9412, Validation Accuracy:0.39, Test Accuracy:0.42
Epoch: 003, Loss: 1.9363, Validation Accuracy:0.524, Test Accuracy:0.538
Epoch: 004, Loss: 1.9308, Validation Accuracy:0.414, Test Accuracy:0.445
Epoch: 005, Loss: 1.9216, Validation Accuracy:0.418, Test Accuracy:0.409
Epoch: 006, Loss: 1.9137, Validation Accuracy:0.528, Test Accuracy:0.525
Epoch: 007, Loss: 1.9078, Validation Accuracy:0.626, Test Accuracy:0.613
Epoch: 008, Loss: 1.8986, Validation Accuracy:0.672, Test Accuracy:0.669
Epoch: 009, Loss: 1.8868, Validation Accuracy:0.652, Test Accuracy:0.666
Epoch: 010, Loss: 1.8825, Validation Accuracy:0.656, Test Accuracy:0.672
Epoch: 011, Loss: 1.8701, Validation Accuracy:0.68, Test Accuracy:0.686
Epoch: 012, Loss: 1.8614, Validation Accuracy:0.696, Test Accuracy:0.697
Epoch: 013, Loss: 1.8486, Validation Accuracy:0.696, Test Accurac

Epoch: 118, Loss: 0.5013, Validation Accuracy:0.798, Test Accuracy:0.817
Epoch: 119, Loss: 0.4780, Validation Accuracy:0.798, Test Accuracy:0.815
Epoch: 120, Loss: 0.4716, Validation Accuracy:0.796, Test Accuracy:0.815
Epoch: 121, Loss: 0.4501, Validation Accuracy:0.796, Test Accuracy:0.815
Epoch: 122, Loss: 0.4296, Validation Accuracy:0.8, Test Accuracy:0.811
Epoch: 123, Loss: 0.4604, Validation Accuracy:0.802, Test Accuracy:0.812
Epoch: 124, Loss: 0.4693, Validation Accuracy:0.802, Test Accuracy:0.812
Epoch: 125, Loss: 0.4452, Validation Accuracy:0.802, Test Accuracy:0.813
Epoch: 126, Loss: 0.4840, Validation Accuracy:0.8, Test Accuracy:0.813
Epoch: 127, Loss: 0.4382, Validation Accuracy:0.804, Test Accuracy:0.813
Epoch: 128, Loss: 0.4705, Validation Accuracy:0.796, Test Accuracy:0.819
Epoch: 129, Loss: 0.4700, Validation Accuracy:0.798, Test Accuracy:0.816
Epoch: 130, Loss: 0.4404, Validation Accuracy:0.796, Test Accuracy:0.816
Epoch: 131, Loss: 0.4225, Validation Accuracy:0.794, Te

Epoch: 242, Loss: 0.2749, Validation Accuracy:0.798, Test Accuracy:0.805
Epoch: 243, Loss: 0.3227, Validation Accuracy:0.794, Test Accuracy:0.8
Epoch: 244, Loss: 0.2956, Validation Accuracy:0.794, Test Accuracy:0.804
Epoch: 245, Loss: 0.2825, Validation Accuracy:0.796, Test Accuracy:0.804
Epoch: 246, Loss: 0.2870, Validation Accuracy:0.792, Test Accuracy:0.809
Epoch: 247, Loss: 0.2737, Validation Accuracy:0.788, Test Accuracy:0.816
Epoch: 248, Loss: 0.2992, Validation Accuracy:0.788, Test Accuracy:0.817
Epoch: 249, Loss: 0.2911, Validation Accuracy:0.784, Test Accuracy:0.815
Epoch: 250, Loss: 0.2577, Validation Accuracy:0.786, Test Accuracy:0.816
Epoch: 251, Loss: 0.2922, Validation Accuracy:0.788, Test Accuracy:0.812
Epoch: 252, Loss: 0.2782, Validation Accuracy:0.792, Test Accuracy:0.811
Epoch: 253, Loss: 0.2810, Validation Accuracy:0.798, Test Accuracy:0.813
Epoch: 254, Loss: 0.2801, Validation Accuracy:0.798, Test Accuracy:0.813
Epoch: 255, Loss: 0.2699, Validation Accuracy:0.8, Te

Epoch: 365, Loss: 0.2124, Validation Accuracy:0.792, Test Accuracy:0.814
Epoch: 366, Loss: 0.2298, Validation Accuracy:0.788, Test Accuracy:0.815
Epoch: 367, Loss: 0.2326, Validation Accuracy:0.788, Test Accuracy:0.816
Epoch: 368, Loss: 0.2557, Validation Accuracy:0.79, Test Accuracy:0.814
Epoch: 369, Loss: 0.2624, Validation Accuracy:0.79, Test Accuracy:0.812
Epoch: 370, Loss: 0.2338, Validation Accuracy:0.792, Test Accuracy:0.81
Epoch: 371, Loss: 0.2431, Validation Accuracy:0.794, Test Accuracy:0.811
Epoch: 372, Loss: 0.2658, Validation Accuracy:0.788, Test Accuracy:0.807
Epoch: 373, Loss: 0.2176, Validation Accuracy:0.788, Test Accuracy:0.804
Epoch: 374, Loss: 0.2365, Validation Accuracy:0.794, Test Accuracy:0.804
Epoch: 375, Loss: 0.2293, Validation Accuracy:0.794, Test Accuracy:0.809
Epoch: 376, Loss: 0.2294, Validation Accuracy:0.796, Test Accuracy:0.813
Epoch: 377, Loss: 0.2268, Validation Accuracy:0.792, Test Accuracy:0.816
Epoch: 378, Loss: 0.2381, Validation Accuracy:0.794, T

Epoch: 490, Loss: 0.2014, Validation Accuracy:0.794, Test Accuracy:0.814
Epoch: 491, Loss: 0.2205, Validation Accuracy:0.798, Test Accuracy:0.81
Epoch: 492, Loss: 0.2268, Validation Accuracy:0.796, Test Accuracy:0.812
Epoch: 493, Loss: 0.2105, Validation Accuracy:0.794, Test Accuracy:0.812
Epoch: 494, Loss: 0.2177, Validation Accuracy:0.794, Test Accuracy:0.81
Epoch: 495, Loss: 0.2041, Validation Accuracy:0.792, Test Accuracy:0.81
Epoch: 496, Loss: 0.1966, Validation Accuracy:0.792, Test Accuracy:0.812
Epoch: 497, Loss: 0.2078, Validation Accuracy:0.794, Test Accuracy:0.812
Epoch: 498, Loss: 0.2228, Validation Accuracy:0.792, Test Accuracy:0.812
Epoch: 499, Loss: 0.2289, Validation Accuracy:0.792, Test Accuracy:0.814
Epoch: 500, Loss: 0.1833, Validation Accuracy:0.794, Test Accuracy:0.814
Epoch: 501, Loss: 0.2198, Validation Accuracy:0.79, Test Accuracy:0.812
Epoch: 502, Loss: 0.2046, Validation Accuracy:0.792, Test Accuracy:0.813
Epoch: 503, Loss: 0.2005, Validation Accuracy:0.794, Te

Epoch: 614, Loss: 0.1994, Validation Accuracy:0.792, Test Accuracy:0.811
Epoch: 615, Loss: 0.2258, Validation Accuracy:0.79, Test Accuracy:0.813
Epoch: 616, Loss: 0.1763, Validation Accuracy:0.792, Test Accuracy:0.811
Epoch: 617, Loss: 0.2638, Validation Accuracy:0.79, Test Accuracy:0.809
Epoch: 618, Loss: 0.1794, Validation Accuracy:0.788, Test Accuracy:0.805
Epoch: 619, Loss: 0.1920, Validation Accuracy:0.792, Test Accuracy:0.804
Epoch: 620, Loss: 0.1829, Validation Accuracy:0.79, Test Accuracy:0.808
Epoch: 621, Loss: 0.1916, Validation Accuracy:0.79, Test Accuracy:0.812
Epoch: 622, Loss: 0.1827, Validation Accuracy:0.79, Test Accuracy:0.813
Epoch: 623, Loss: 0.1952, Validation Accuracy:0.788, Test Accuracy:0.811
Epoch: 624, Loss: 0.2200, Validation Accuracy:0.792, Test Accuracy:0.812
Epoch: 625, Loss: 0.2036, Validation Accuracy:0.792, Test Accuracy:0.814
Epoch: 626, Loss: 0.2317, Validation Accuracy:0.79, Test Accuracy:0.812
Epoch: 627, Loss: 0.1887, Validation Accuracy:0.79, Test 

Epoch: 737, Loss: 0.2013, Validation Accuracy:0.794, Test Accuracy:0.814
Epoch: 738, Loss: 0.1836, Validation Accuracy:0.792, Test Accuracy:0.817
Epoch: 739, Loss: 0.1941, Validation Accuracy:0.792, Test Accuracy:0.815
Epoch: 740, Loss: 0.2067, Validation Accuracy:0.794, Test Accuracy:0.812
Epoch: 741, Loss: 0.1918, Validation Accuracy:0.788, Test Accuracy:0.81
Epoch: 742, Loss: 0.2013, Validation Accuracy:0.788, Test Accuracy:0.811
Epoch: 743, Loss: 0.1630, Validation Accuracy:0.792, Test Accuracy:0.808
Epoch: 744, Loss: 0.1712, Validation Accuracy:0.794, Test Accuracy:0.812
Epoch: 745, Loss: 0.1730, Validation Accuracy:0.796, Test Accuracy:0.809
Epoch: 746, Loss: 0.2214, Validation Accuracy:0.794, Test Accuracy:0.809
Epoch: 747, Loss: 0.1739, Validation Accuracy:0.79, Test Accuracy:0.814
Epoch: 748, Loss: 0.2037, Validation Accuracy:0.788, Test Accuracy:0.815
Epoch: 749, Loss: 0.1696, Validation Accuracy:0.794, Test Accuracy:0.817
Epoch: 750, Loss: 0.1906, Validation Accuracy:0.794, 

Epoch: 859, Loss: 0.1828, Validation Accuracy:0.792, Test Accuracy:0.815
Epoch: 860, Loss: 0.1879, Validation Accuracy:0.792, Test Accuracy:0.812
Epoch: 861, Loss: 0.2079, Validation Accuracy:0.794, Test Accuracy:0.81
Epoch: 862, Loss: 0.1775, Validation Accuracy:0.796, Test Accuracy:0.806
Epoch: 863, Loss: 0.1693, Validation Accuracy:0.792, Test Accuracy:0.806
Epoch: 864, Loss: 0.1959, Validation Accuracy:0.79, Test Accuracy:0.809
Epoch: 865, Loss: 0.1984, Validation Accuracy:0.786, Test Accuracy:0.817
Epoch: 866, Loss: 0.1949, Validation Accuracy:0.786, Test Accuracy:0.815
Epoch: 867, Loss: 0.1805, Validation Accuracy:0.786, Test Accuracy:0.814
Epoch: 868, Loss: 0.1944, Validation Accuracy:0.788, Test Accuracy:0.814
Epoch: 869, Loss: 0.2050, Validation Accuracy:0.786, Test Accuracy:0.812
Epoch: 870, Loss: 0.2006, Validation Accuracy:0.79, Test Accuracy:0.812
Epoch: 871, Loss: 0.1629, Validation Accuracy:0.792, Test Accuracy:0.809
Epoch: 872, Loss: 0.2315, Validation Accuracy:0.792, T