In [1]:
import sys
sys.path.append('/Users/sbhardwaj/Documents/GraphNodeClassification')

from src.models.models import GCN , MLP , mlp_GCN , n2vnet
from src.models.training import weighted_BCE , train_mlp , test_mlp , train_gcn , test_gcn , train_n2v , test_n2v
from src.data.data_loader import DBLP_dataset
from src.utilities.utils import acc , recall , save_model
import os
import node2vec

import torch
import copy
import logging

### Select GPS device

In [2]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("mps device found!")
else:
    device = "cpu"
    print ("MPS device not found.")

mps device found!


# Edge sampling experiment

### Set-up logging

In [8]:
run = 1

In [28]:
# gcn_logger = logging.getLogger('gcn')
# mlp_logger = logging.getLogger('mlp')
mlp_gcn_logger = logging.getLogger('mlp_gcn')
n2vnet_logger = logging.getLogger("n2vnet")

# gcn_logger.setLevel(logging.INFO)
# mlp_logger.setLevel(logging.INFO)
mlp_gcn_logger.setLevel(logging.INFO)
n2vnet_logger.setLevel(logging.INFO)

# file_handler_gcn = logging.FileHandler(f'../src/logs/es_expt/gcn_{run}.log' , mode="w")
# file_handler_mlp = logging.FileHandler(f'../src/logs/es_expt/mlp_{run}.log' , mode="w")
file_handler_mlp_gcn = logging.FileHandler(f"../src/logs/es_expt/mlp_gcn_{run}.log" , mode="w")
file_handler_n2vnet = logging.FileHandler(f"../src/logs/es_expt/n2vnet_{run}.log" , mode="w")

formatter = logging.Formatter('%(asctime)s - %(message)s')
# file_handler_gcn.setFormatter(formatter)
# file_handler_mlp.setFormatter(formatter)
file_handler_mlp_gcn.setFormatter(formatter)
file_handler_n2vnet.setFormatter(formatter)

# gcn_logger.addHandler(file_handler_gcn)
# mlp_logger.addHandler(file_handler_mlp)
mlp_gcn_logger.addHandler(file_handler_mlp_gcn)
n2vnet_logger.addHandler(file_handler_n2vnet)

### Dataloader

In [5]:
dataloader_args = {
    "root":os.path.abspath("..")+"/data",
    "raw_filenames":["graph_edges.txt" , "5000_communities.txt"],
    "expt":"edge_sampling",
    "processed_filenames":["data_es_0.pt" , "data_es_1.pt" , "data_es_2.pt" , "data_es_3.pt" ]
}

dataloader_args

{'root': '/Users/sbhardwaj/Documents/GraphNodeClassification/data',
 'raw_filenames': ['graph_edges.txt', '5000_communities.txt'],
 'expt': 'edge_sampling',
 'processed_filenames': ['data_es_0.pt',
  'data_es_1.pt',
  'data_es_2.pt',
  'data_es_3.pt']}

In [6]:
d_es = DBLP_dataset(root = dataloader_args["root"] , raw_filenames = dataloader_args["raw_filenames"] ,
                    expt = dataloader_args["expt"], processed_filenames = dataloader_args["processed_filenames"])

Processing...


Computing transition probabilities:   0%|          | 0/13169 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 100/100 [00:27<00:00,  3.58it/s]
Done!


In [10]:
data = d_es[0].to(device)
data

Data(x=[13169, 21], edge_index=[2, 57621], y=[13169, 200], dtype=torch.float32, g=Graph with 13169 nodes and 22226 edges, train_mask=[13169], val_mask=[13169], test_mask=[13169])

## GCN Model

In [11]:
gcn_args = {
    'device': device,
    'num_layers': 4,
    'hidden_dim': 16,
    'dropout': 0.2,
    'lr': 0.005,
    'epochs': 1000,
}

gcn_args

{'device': device(type='mps'),
 'num_layers': 4,
 'hidden_dim': 16,
 'dropout': 0.2,
 'lr': 0.005,
 'epochs': 1000}

In [12]:
gcn_model = GCN(data.x.shape[1] , gcn_args['hidden_dim'] , data.y.shape[1] , gcn_args['num_layers'] , gcn_args['dropout']).to(device)

total_params_GCN = sum(
	param.numel() for param in gcn_model.parameters()
)
print("GCN model number of parameters:" , total_params_GCN)

GCN model number of parameters: 4392


In [13]:
gcn_model

GCN(
  (convs): ModuleList(
    (0): GCNConv(21, 16)
    (1-2): 2 x GCNConv(16, 16)
    (3): GCNConv(16, 200)
  )
  (bns): ModuleList(
    (0-2): 3 x BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (output): Sigmoid()
)

### Train GCN model

In [14]:
gcn_model.reset_parameters()

optimizer = torch.optim.Adam(gcn_model.parameters(), lr=gcn_args['lr'])
loss_fn = weighted_BCE(reduction="sum" , true_weight=1.2 , false_weight=0.01)

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + gcn_args["epochs"]):
  
  loss = train_gcn(gcn_model , data , optimizer, loss_fn)
  result = test_gcn(gcn_model, data)

  c_train , c_val , c_test = result

  if acc(c_val) > best_valid_acc:
      best_valid_acc = acc(c_val)
      best_model = copy.deepcopy(gcn_model)

  gcn_logger.info(f'Epoch {epoch:02d} '
        f'Loss {loss:.4f} '
        f'Train {c_train[0][0]:02d} {c_train[0][1]:02d} {c_train[1][0]:02d} {c_train[1][1]:02d} '
        f'Valid {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} '
        f'Test {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} ')
  
  print((f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Recall: {100 * recall(c_train):.2f}%, '
        f'Acc: {100 * acc(c_train):.2f}% '
        f'Test: {100 * recall(c_test):.2f}%'))
  
handlers = gcn_logger.handlers[:]
for handler in handlers:
    gcn_logger.removeHandler(handler)
    handler.close()

loc("mps_not_equal"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/4e1473ee-9f66-11ee-8daf-cedaeb4cabe2/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":253:0)): error: 'anec.not_equal_zero' op Invalid configuration for the following reasons: Tensor dimensions N1D1C1H1W57621 are not within supported range, N[1-65536]D[1-16384]C[1-65536]H[1-16384]W[1-16384].
loc("mps_select"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/4e1473ee-9f66-11ee-8daf-cedaeb4cabe2/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":294:0)): error: 'anec.not_equal_zero' op Invalid configuration for the following reasons: Tensor dimensions N1D1C1H1W57621 are not within supported range, N[1-65536]D[1-16384]C[1-65536]H[1-16384]W[1-16384].


Epoch: 01, Loss: 23739.2793, Recall: 51.17%, Acc: 58.27% Test: 50.59%
Epoch: 02, Loss: 23315.8574, Recall: 52.65%, Acc: 62.44% Test: 52.14%
Epoch: 03, Loss: 22862.6445, Recall: 55.92%, Acc: 66.55% Test: 56.14%
Epoch: 04, Loss: 22489.4609, Recall: 59.28%, Acc: 70.47% Test: 58.88%
Epoch: 05, Loss: 22117.9434, Recall: 62.21%, Acc: 73.96% Test: 61.54%
Epoch: 06, Loss: 21773.3789, Recall: 65.15%, Acc: 76.88% Test: 64.35%
Epoch: 07, Loss: 21416.9219, Recall: 68.85%, Acc: 79.26% Test: 67.16%
Epoch: 08, Loss: 21087.9375, Recall: 70.89%, Acc: 81.13% Test: 69.08%
Epoch: 09, Loss: 20755.8281, Recall: 72.33%, Acc: 82.58% Test: 70.56%
Epoch: 10, Loss: 20455.6855, Recall: 73.09%, Acc: 83.75% Test: 71.67%
Epoch: 11, Loss: 20158.3828, Recall: 73.52%, Acc: 84.75% Test: 72.49%
Epoch: 12, Loss: 19872.5938, Recall: 73.96%, Acc: 85.53% Test: 73.22%
Epoch: 13, Loss: 19628.7969, Recall: 74.23%, Acc: 86.17% Test: 73.08%
Epoch: 14, Loss: 19354.3828, Recall: 74.32%, Acc: 86.69% Test: 73.15%
Epoch: 15, Loss: 191

In [15]:
best_valid_acc

0.9702013677811551

In [16]:
model_path = "../models/gcn_1_200.pt"
torch.save(best_model.state_dict(), model_path)

## MLP model

In [17]:
mlp_args = {
    "hidden_dim":16,
    "num_layers":4,
    "dropout":0.2,
    "lr":0.01,
    "epochs":1000
}

mlp_args

{'hidden_dim': 16, 'num_layers': 4, 'dropout': 0.2, 'lr': 0.01, 'epochs': 1000}

In [18]:
mlp_model = MLP(input_dim = data.x.shape[1], hidden_dim = mlp_args["hidden_dim"], output_dim = data.y.shape[1], num_layers = mlp_args["num_layers"], dropout = mlp_args['dropout']).to(device)

total_params_mlp = sum(
	param.numel() for param in mlp_model.parameters()
)
print("MLP model number of parameters:" , total_params_mlp)

MLP model number of parameters: 4296


### Train MLP model

In [19]:
mlp_model.reset_parameters()

optimizer = torch.optim.Adam(mlp_model.parameters(), lr=mlp_args['lr'])
loss_fn = weighted_BCE(reduction="sum" , true_weight=1.8 , false_weight=0.01)

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + mlp_args["epochs"]):
  
  loss = train_mlp(mlp_model , data , optimizer, loss_fn)
  result = test_mlp(mlp_model, data)

  c_train , c_val , c_test = result

  if acc(c_val) > best_valid_acc:
      best_valid_acc = acc(c_val)
      best_model = copy.deepcopy(mlp_model)
        
  mlp_logger.info(f'Epoch {epoch:02d} '
      f'Loss {loss:.4f} '
      f'Train {c_train[0][0]:02d} {c_train[0][1]:02d} {c_train[1][0]:02d} {c_train[1][1]:02d} '
      f'Valid {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} '
      f'Test {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} ')
  
  print((f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'recall: {100 * recall(c_train):.2f}%, '
        f'acc: {100 * acc(c_train):.2f}% '
        f'Test: {100 * acc(c_test):.2f}%'))
  
handlers = mlp_logger.handlers[:]
for handler in handlers:
    mlp_logger.removeHandler(handler)
    handler.close()

Epoch: 01, Loss: 28084.5059, recall: 59.10%, acc: 55.96% Test: 55.96%
Epoch: 02, Loss: 27716.3262, recall: 60.12%, acc: 59.29% Test: 59.30%
Epoch: 03, Loss: 27367.2031, recall: 62.58%, acc: 62.76% Test: 62.75%
Epoch: 04, Loss: 27000.5078, recall: 65.17%, acc: 67.28% Test: 67.25%
Epoch: 05, Loss: 26548.0273, recall: 65.05%, acc: 73.49% Test: 73.52%
Epoch: 06, Loss: 26000.3613, recall: 66.33%, acc: 78.37% Test: 78.39%
Epoch: 07, Loss: 25324.9062, recall: 69.16%, acc: 81.08% Test: 81.10%
Epoch: 08, Loss: 24525.8555, recall: 69.59%, acc: 82.93% Test: 82.95%
Epoch: 09, Loss: 23589.8477, recall: 68.44%, acc: 85.22% Test: 85.22%
Epoch: 10, Loss: 22503.8086, recall: 67.90%, acc: 86.48% Test: 86.49%
Epoch: 11, Loss: 21661.3164, recall: 70.56%, acc: 86.70% Test: 86.72%
Epoch: 12, Loss: 21291.8691, recall: 71.39%, acc: 86.67% Test: 86.67%
Epoch: 13, Loss: 21328.4004, recall: 71.13%, acc: 86.71% Test: 86.70%
Epoch: 14, Loss: 21062.5000, recall: 71.36%, acc: 86.66% Test: 86.66%
Epoch: 15, Loss: 208

In [20]:
best_valid_acc*100

89.3765197568389

In [21]:
save_model(best_model , "../models/mlp_1_200.pt")

## GCN model with MLP layers

In [29]:
mlp_gcn_args = {
    "hidden_dim":16,
    "encoding_dim":16,
    "num_layers":4,
    "dropout":0.2,
    "lr":0.005,
    "epochs":1000
}

mlp_gcn_args

{'hidden_dim': 16,
 'encoding_dim': 16,
 'num_layers': 4,
 'dropout': 0.2,
 'lr': 0.005,
 'epochs': 1000}

In [30]:
mlp_gcn_model = mlp_GCN(input_dim = data.x.shape[1], encoding_dim = mlp_gcn_args["encoding_dim"] , hidden_dim = mlp_gcn_args["hidden_dim"], output_dim = data.y.shape[1], num_layers = mlp_gcn_args["num_layers"], dropout = mlp_gcn_args['dropout']).to(device)

total_params_mlp_gcn = sum(
	param.numel() for param in mlp_gcn_model.parameters()
)
print("GCN model with MLP layers number of parameters:" , total_params_mlp_gcn)

GCN model with MLP layers number of parameters: 5208


In [31]:
mlp_gcn_model.reset_parameters()

optimizer = torch.optim.Adam(mlp_gcn_model.parameters(), lr=mlp_gcn_args['lr'])
# loss_fn = torch.nn.CrossEntropyLoss(weight=None , reduction="sum")
loss_fn = weighted_BCE(reduction="sum" , true_weight=1.2 , false_weight=0.01)


best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + mlp_gcn_args["epochs"]):
  
  loss = train_gcn(mlp_gcn_model , data , optimizer, loss_fn)
  result = test_gcn(mlp_gcn_model, data)

  c_train , c_val , c_test = result

  if acc(c_val) > best_valid_acc:
      best_valid_acc = acc(c_val)
      best_model = copy.deepcopy(mlp_gcn_model)
        
  mlp_gcn_logger.info(f'Epoch {epoch:02d} '
      f'Loss {loss:.4f} '
      f'Train {c_train[0][0]:02d} {c_train[0][1]:02d} {c_train[1][0]:02d} {c_train[1][1]:02d} '
      f'Valid {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} '
      f'Test {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} ')
  
  print((f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Recall: {100 * recall(c_train):.2f}%, '
        f'Valid: {100 * acc(c_train):.2f}% '
        f'Test: {100 * acc(c_test):.2f}%'))
  
handlers = mlp_gcn_logger.handlers[:]
for handler in handlers:
    mlp_gcn_logger.removeHandler(handler)
    handler.close()

Epoch: 01, Loss: 23539.3711, Recall: 66.24%, Valid: 54.08% Test: 54.06%
Epoch: 02, Loss: 23231.8770, Recall: 67.15%, Valid: 54.96% Test: 54.99%
Epoch: 03, Loss: 22977.9414, Recall: 67.52%, Valid: 56.36% Test: 56.38%
Epoch: 04, Loss: 22712.5957, Recall: 68.31%, Valid: 57.83% Test: 57.82%
Epoch: 05, Loss: 22480.7344, Recall: 70.18%, Valid: 58.94% Test: 58.93%
Epoch: 06, Loss: 22190.2754, Recall: 73.77%, Valid: 59.88% Test: 59.91%
Epoch: 07, Loss: 21893.7715, Recall: 74.55%, Valid: 61.22% Test: 61.25%
Epoch: 08, Loss: 21591.1289, Recall: 74.45%, Valid: 63.15% Test: 63.20%
Epoch: 09, Loss: 21280.4922, Recall: 74.21%, Valid: 65.73% Test: 65.72%
Epoch: 10, Loss: 20959.5156, Recall: 73.90%, Valid: 68.79% Test: 68.80%
Epoch: 11, Loss: 20608.9453, Recall: 73.71%, Valid: 72.04% Test: 72.02%
Epoch: 12, Loss: 20241.2852, Recall: 73.38%, Valid: 75.28% Test: 75.30%
Epoch: 13, Loss: 19926.0254, Recall: 73.18%, Valid: 78.27% Test: 78.25%
Epoch: 14, Loss: 19588.2910, Recall: 73.08%, Valid: 80.75% Test:

In [32]:
best_valid_acc

0.9616983282674773

In [33]:
save_model(best_model , "../models/mlp_gcn_1_200.pt")

## node2vec

In [23]:
n2vnet_args = {
    "embedding_dim":16,
    "walk_length":10,
    "num_walks":100,
    "min_count":1,
    "batch_words":4,
    "window":5,

    "hidden_dim":16,
    "num_layers":3,
    "dropout":0.2,

    "lr":0.01,
    "epochs":1000
}

n2vnet_args

{'embedding_dim': 16,
 'walk_length': 10,
 'num_walks': 100,
 'min_count': 1,
 'batch_words': 4,
 'window': 5,
 'hidden_dim': 16,
 'num_layers': 3,
 'dropout': 0.2,
 'lr': 0.01,
 'epochs': 1000}

In [24]:
sorted_node_list = (list(data.g.nodes))
sorted_node_list.sort()
sorted_node_list = [str(node) for node in sorted_node_list]

n2v = node2vec.Node2Vec(data.g , dimensions=n2vnet_args["embedding_dim"] , walk_length=n2vnet_args["walk_length"] , num_walks=n2vnet_args["num_walks"]) 
embeddings = torch.tensor((n2v.fit(window=n2vnet_args["window"] , min_count=n2vnet_args["min_count"] , batch_words=n2vnet_args["batch_words"]).wv)[sorted_node_list] , dtype=torch.float32 , device=device)

Computing transition probabilities:   0%|          | 0/33244 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 100/100 [00:48<00:00,  2.05it/s]


In [25]:
n2vnet_model = n2vnet(data.g , input_dim = n2vnet_args["embedding_dim"], hidden_dim = n2vnet_args["hidden_dim"], output_dim = data.y.shape[1], num_layers = n2vnet_args["num_layers"], dropout = n2vnet_args['dropout']).to(device)

total_params_n2vnet = sum(
	param.numel() for param in n2vnet_model.parameters()
)
print("n2vnet number of parameters:" , total_params_n2vnet)

n2vnet number of parameters: 3944


In [28]:
n2vnet_model.reset_parameters()

optimizer = torch.optim.Adam(n2vnet_model.parameters(), lr=n2vnet_args['lr'])
loss_fn = weighted_BCE(reduction="sum" , true_weight=1.8 , false_weight=0.01)

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + n2vnet_args["epochs"]):
  
  loss = train_n2v(n2vnet_model , embeddings , data , optimizer, loss_fn)
  result = test_n2v(n2vnet_model, embeddings , data)

  c_train , c_val , c_test = result

  if acc(c_val) > best_valid_acc:
      best_valid_acc = acc(c_val)
      best_model = copy.deepcopy(n2vnet_model)
        
#   n2vnet_logger.info(f'Epoch {epoch:02d} '
#       f'Loss {loss:.4f} '
#       f'Train {c_train[0][0]:02d} {c_train[0][1]:02d} {c_train[1][0]:02d} {c_train[1][1]:02d} '
#       f'Valid {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} '
#       f'Test {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} ')
  
  print((f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Acc: {100 * acc(c_train):.2f}%, '
        f'Recall: {100 * recall(c_train):.2f}% '
        f'Test: {100 * acc(c_test):.2f}%'))
  
# handlers = n2vnet_logger.handlers[:]
# for handler in handlers:
#     n2vnet_logger.removeHandler(handler)
#     handler.close()

Epoch: 01, Loss: 74703.9375, Acc: 50.39%, Recall: 60.64% Test: 50.34%
Epoch: 02, Loss: 73192.5703, Acc: 54.09%, Recall: 67.77% Test: 54.05%
Epoch: 03, Loss: 71767.7578, Acc: 57.70%, Recall: 71.21% Test: 57.67%
Epoch: 04, Loss: 70159.2266, Acc: 61.36%, Recall: 74.14% Test: 61.36%
Epoch: 05, Loss: 68305.1250, Acc: 65.15%, Recall: 76.89% Test: 65.10%
Epoch: 06, Loss: 66139.2891, Acc: 68.82%, Recall: 79.29% Test: 68.77%
Epoch: 07, Loss: 63626.0781, Acc: 71.86%, Recall: 83.43% Test: 71.83%
Epoch: 08, Loss: 60853.7148, Acc: 75.01%, Recall: 85.86% Test: 74.99%
Epoch: 09, Loss: 57962.2656, Acc: 77.55%, Recall: 86.91% Test: 77.56%
Epoch: 10, Loss: 55235.4102, Acc: 79.18%, Recall: 86.94% Test: 79.18%
Epoch: 11, Loss: 53103.7812, Acc: 80.25%, Recall: 86.88% Test: 80.26%
Epoch: 12, Loss: 51777.7773, Acc: 80.85%, Recall: 86.79% Test: 80.87%
Epoch: 13, Loss: 51223.4844, Acc: 81.40%, Recall: 86.73% Test: 81.43%
Epoch: 14, Loss: 51124.3516, Acc: 81.91%, Recall: 86.62% Test: 81.92%
Epoch: 15, Loss: 509

KeyboardInterrupt: 

In [45]:
best_valid_acc

0.8516746554823248

# Number of communities experiment

In [17]:
run = 0

In [149]:
gcn_logger = logging.getLogger('gcn')
mlp_logger = logging.getLogger('mlp')
mlp_gcn_logger = logging.getLogger('mlp_gcn')
n2vnet_logger = logging.getLogger("n2vnet")

gcn_logger.setLevel(logging.INFO)
mlp_logger.setLevel(logging.INFO)
mlp_gcn_logger.setLevel(logging.INFO)
n2vnet_logger.setLevel(logging.INFO)

file_handler_gcn = logging.FileHandler(f'../src/logs/nc_expt/gcn_{run}.log' , mode="w")
file_handler_mlp = logging.FileHandler(f'../src/logs/nc_expt/mlp_{run}.log' , mode="w")
file_handler_mlp_gcn = logging.FileHandler(f"../src/logs/nc_expt/mlp_gcn_{run}.log" , mode="w")
file_handler_n2vnet = logging.FileHandler(f"../src/logs/nc_expt/n2vnet_{run}.log" , mode="w")

formatter = logging.Formatter('%(asctime)s - %(message)s')
file_handler_gcn.setFormatter(formatter)
file_handler_mlp.setFormatter(formatter)
file_handler_mlp_gcn.setFormatter(formatter)
file_handler_n2vnet.setFormatter(formatter)

gcn_logger.addHandler(file_handler_gcn)
mlp_logger.addHandler(file_handler_mlp)
mlp_gcn_logger.addHandler(file_handler_mlp_gcn)
n2vnet_logger.addHandler(file_handler_n2vnet)

In [18]:
dataloader_args = {
    "root":os.path.abspath("..")+"/data",
    "raw_filenames":["graph_edges.txt" , "5000_communities.txt"],
    "expt":"n_communities",
    "processed_filenames":["data_nc_0.pt" , "data_nc_1.pt" , "data_nc_2.pt" , "data_nc_3.pt" ]
}

dataloader_args

{'root': '/Users/sbhardwaj/Documents/project_2/data',
 'raw_filenames': ['graph_edges.txt', '5000_communities.txt'],
 'expt': 'n_communities',
 'processed_filenames': ['data_nc_0.pt',
  'data_nc_1.pt',
  'data_nc_2.pt',
  'data_nc_3.pt']}

In [19]:
d_cn = DBLP_dataset(root = dataloader_args["root"] , raw_filenames = dataloader_args["raw_filenames"] ,
                    expt = dataloader_args["expt"], processed_filenames = dataloader_args["processed_filenames"])

In [20]:
data = d_cn[run].to(device)
data

Data(x=[14050, 5], edge_index=[2, 85388], y=[14050, 200], dtype=torch.float32, g=Graph with 14050 nodes and 35669 edges, train_mask=[14050], val_mask=[14050], test_mask=[14050])

In [7]:
gcn_args = {
    'device': device,
    'num_layers': 4,
    'hidden_dim': 16,
    'dropout': 0.4,
    'lr': 0.01,
    'epochs': 1000,
}

gcn_args

{'device': device(type='mps'),
 'num_layers': 4,
 'hidden_dim': 16,
 'dropout': 0.4,
 'lr': 0.01,
 'epochs': 1000}

In [8]:
gcn_model = GCN(data.x.shape[1] , gcn_args['hidden_dim'] , data.y.shape[1] , gcn_args['num_layers'] , gcn_args['dropout']).to(device)

total_params_GCN = sum(
	param.numel() for param in gcn_model.parameters()
)
print("GCN model number of parameters:" , total_params_GCN)

GCN model number of parameters: 2436


In [9]:
gcn_model.reset_parameters()

optimizer = torch.optim.Adam(gcn_model.parameters(), lr=gcn_args['lr'])
loss_fn = weighted_BCE(weight=None , reduction="sum")

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + gcn_args["epochs"]):
  
  loss = train_gcn(gcn_model , data , optimizer, loss_fn)
  result = test_gcn(gcn_model, data)

  c_train , c_val , c_test = result

  if acc(c_val) > best_valid_acc:
      best_valid_acc = acc(c_val)
      best_model = copy.deepcopy(gcn_model)

  gcn_logger.info(f'Epoch {epoch:02d} '
      f'Loss {loss:.4f} '
      f'Train {c_train[0][0]:02d} {c_train[0][1]:02d} {c_train[1][0]:02d} {c_train[1][1]:02d} '
      f'Valid {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} '
      f'Test {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} ')
  
  print((f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * acc(c_train):.2f}%, '
        f'Valid: {100 * acc(c_val):.2f}% '
        f'Test: {100 * acc(c_test):.2f}%'))
  
handlers = gcn_logger.handlers[:]
for handler in handlers:
    gcn_logger.removeHandler(handler)
    handler.close()

loc("mps_not_equal"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/4e1473ee-9f66-11ee-8daf-cedaeb4cabe2/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":253:0)): error: 'anec.not_equal_zero' op Invalid configuration for the following reasons: Tensor dimensions N1D1C1H1W33561 are not within supported range, N[1-65536]D[1-16384]C[1-65536]H[1-16384]W[1-16384].
loc("mps_select"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/4e1473ee-9f66-11ee-8daf-cedaeb4cabe2/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":294:0)): error: 'anec.not_equal_zero' op Invalid configuration for the following reasons: Tensor dimensions N1D1C1H1W33561 are not within supported range, N[1-65536]D[1-16384]C[1-65536]H[1-16384]W[1-16384].


Epoch: 01, Loss: 21855.3652, Train: 65.80%, Valid: 62.88% Test: 66.22%
Epoch: 02, Loss: 21800.3984, Train: 63.99%, Valid: 61.02% Test: 63.85%
Epoch: 03, Loss: 21755.1992, Train: 65.46%, Valid: 62.71% Test: 64.36%
Epoch: 04, Loss: 21695.5957, Train: 65.38%, Valid: 62.54% Test: 65.03%
Epoch: 05, Loss: 21637.1309, Train: 65.36%, Valid: 62.54% Test: 65.03%
Epoch: 06, Loss: 21594.3398, Train: 65.29%, Valid: 62.54% Test: 65.03%
Epoch: 07, Loss: 21542.4199, Train: 65.27%, Valid: 62.54% Test: 65.03%
Epoch: 08, Loss: 21485.6699, Train: 65.27%, Valid: 62.54% Test: 65.03%
Epoch: 09, Loss: 21399.9492, Train: 65.27%, Valid: 62.54% Test: 65.03%
Epoch: 10, Loss: 21327.3555, Train: 65.27%, Valid: 62.54% Test: 65.03%
Epoch: 11, Loss: 21260.0859, Train: 65.27%, Valid: 62.54% Test: 65.03%
Epoch: 12, Loss: 21190.1484, Train: 65.29%, Valid: 62.54% Test: 65.03%
Epoch: 13, Loss: 21084.5469, Train: 65.36%, Valid: 62.54% Test: 65.03%
Epoch: 14, Loss: 21024.2617, Train: 65.40%, Valid: 62.54% Test: 65.03%
Epoch:

KeyboardInterrupt: 

In [154]:
best_valid_acc

0.9205389179755672

In [21]:
mlp_args = {
    "hidden_dim":16,
    "num_layers":4,
    "dropout":0.2,
    "lr":0.01,
    "epochs":200
}

mlp_args

{'hidden_dim': 16, 'num_layers': 4, 'dropout': 0.2, 'lr': 0.01, 'epochs': 200}

In [22]:
mlp_model = MLP(input_dim = data.x.shape[1], hidden_dim = mlp_args["hidden_dim"], output_dim = data.y.shape[1], num_layers = mlp_args["num_layers"], dropout = mlp_args['dropout']).to(device)

total_params_mlp = sum(
	param.numel() for param in mlp_model.parameters()
)
print("MLP model number of parameters:" , total_params_mlp)

MLP model number of parameters: 2340


In [23]:
mlp_model.reset_parameters()

optimizer = torch.optim.Adam(mlp_model.parameters(), lr=mlp_args['lr'])
loss_fn = torch.nn.CrossEntropyLoss(weight=None , reduction="sum")

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + mlp_args["epochs"]):
  
  loss = train_mlp(mlp_model , data , optimizer, loss_fn)
  result = test_mlp(mlp_model, data)

  c_train , c_val , c_test = result

  if acc(c_val) > best_valid_acc:
      best_valid_acc = acc(c_val)
      best_model = copy.deepcopy(mlp_model)
        
  mlp_logger.info(f'Epoch {epoch:02d} '
      f'Loss {loss:.4f} '
      f'Train {c_train[0][0]:02d} {c_train[0][1]:02d} {c_train[1][0]:02d} {c_train[1][1]:02d} '
      f'Valid {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} '
      f'Test {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} ')
  
  print((f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * acc(c_train):.2f}%, '
        f'Valid: {100 * acc(c_val):.2f}% '
        f'Test: {100 * acc(c_test):.2f}%'))
  
handlers = mlp_logger.handlers[:]
for handler in handlers:
    mlp_logger.removeHandler(handler)
    handler.close()

Epoch: 01, Loss: 21797.4688, Train: 59.47%, Valid: 58.14% Test: 60.47%
Epoch: 02, Loss: 21769.3242, Train: 57.88%, Valid: 57.46% Test: 59.80%
Epoch: 03, Loss: 21734.1836, Train: 55.75%, Valid: 55.42% Test: 57.26%
Epoch: 04, Loss: 21694.2051, Train: 61.52%, Valid: 62.54% Test: 61.82%
Epoch: 05, Loss: 21647.6953, Train: 67.70%, Valid: 66.95% Test: 67.74%
Epoch: 06, Loss: 21591.1562, Train: 64.62%, Valid: 63.22% Test: 65.20%
Epoch: 07, Loss: 21523.9414, Train: 66.73%, Valid: 65.25% Test: 66.72%
Epoch: 08, Loss: 21448.9297, Train: 65.76%, Valid: 63.05% Test: 65.37%
Epoch: 09, Loss: 21361.0430, Train: 65.76%, Valid: 63.05% Test: 65.37%
Epoch: 10, Loss: 21255.2266, Train: 65.27%, Valid: 62.54% Test: 65.03%
Epoch: 11, Loss: 21143.1562, Train: 65.27%, Valid: 62.54% Test: 65.03%
Epoch: 12, Loss: 21018.9102, Train: 65.27%, Valid: 62.54% Test: 65.03%
Epoch: 13, Loss: 20893.0820, Train: 65.27%, Valid: 62.54% Test: 65.03%
Epoch: 14, Loss: 20748.2930, Train: 65.27%, Valid: 62.54% Test: 65.03%
Epoch:

In [158]:
best_valid_acc*100

91.90038394415357

In [159]:
mlp_gcn_args = {
    "hidden_dim":16,
    "encoding_dim":16,
    "num_layers":3,
    "dropout":0.2,
    "lr":0.01,
    "epochs":200
}

mlp_gcn_args

{'hidden_dim': 16,
 'encoding_dim': 16,
 'num_layers': 3,
 'dropout': 0.2,
 'lr': 0.01,
 'epochs': 200}

In [160]:
mlp_gcn_model = mlp_GCN(input_dim = data.x.shape[1], encoding_dim = mlp_gcn_args["encoding_dim"] , hidden_dim = mlp_gcn_args["hidden_dim"], output_dim = data.y.shape[1], num_layers = mlp_gcn_args["num_layers"], dropout = mlp_gcn_args['dropout']).to(device)

total_params_mlp_gcn = sum(
	param.numel() for param in mlp_gcn_model.parameters()
)
print("GCN model with MLP layers number of parameters:" , total_params_mlp_gcn)

GCN model with MLP layers number of parameters: 9476


In [161]:
mlp_gcn_model.reset_parameters()

optimizer = torch.optim.Adam(mlp_gcn_model.parameters(), lr=mlp_gcn_args['lr'])
loss_fn = torch.nn.CrossEntropyLoss(weight=None , reduction="sum")

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + mlp_gcn_args["epochs"]):
  
  loss = train_gcn(mlp_gcn_model , data , optimizer, loss_fn)
  result = test_gcn(mlp_gcn_model, data)

  c_train , c_val , c_test = result

  if acc(c_val) > best_valid_acc:
      best_valid_acc = acc(c_val)
      best_model = copy.deepcopy(mlp_gcn_model)
        
  mlp_gcn_logger.info(f'Epoch {epoch:02d} '
      f'Loss {loss:.4f} '
      f'Train {c_train[0][0]:02d} {c_train[0][1]:02d} {c_train[1][0]:02d} {c_train[1][1]:02d} '
      f'Valid {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} '
      f'Test {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} ')
  
  print((f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * acc(c_train):.2f}%, '
        f'Valid: {100 * acc(c_val):.2f}% '
        f'Test: {100 * acc(c_test):.2f}%'))
  
handlers = mlp_gcn_logger.handlers[:]
for handler in handlers:
    mlp_gcn_logger.removeHandler(handler)
    handler.close()

Epoch: 01, Loss: 155684.2344, Train: 52.03%, Valid: 52.03% Test: 52.04%
Epoch: 02, Loss: 155064.6719, Train: 54.03%, Valid: 54.02% Test: 54.04%
Epoch: 03, Loss: 154387.7188, Train: 57.56%, Valid: 57.55% Test: 57.56%
Epoch: 04, Loss: 153664.0156, Train: 62.91%, Valid: 62.91% Test: 62.91%
Epoch: 05, Loss: 152829.0000, Train: 68.60%, Valid: 68.59% Test: 68.60%
Epoch: 06, Loss: 151871.5781, Train: 77.01%, Valid: 77.01% Test: 77.00%
Epoch: 07, Loss: 150776.1562, Train: 82.04%, Valid: 82.04% Test: 82.03%
Epoch: 08, Loss: 149709.9531, Train: 86.30%, Valid: 86.28% Test: 86.29%
Epoch: 09, Loss: 148523.2500, Train: 88.38%, Valid: 88.38% Test: 88.38%
Epoch: 10, Loss: 147324.6562, Train: 89.90%, Valid: 89.91% Test: 89.90%
Epoch: 11, Loss: 146213.3438, Train: 90.26%, Valid: 90.26% Test: 90.26%
Epoch: 12, Loss: 145205.7812, Train: 90.78%, Valid: 90.78% Test: 90.78%
Epoch: 13, Loss: 144247.4375, Train: 91.31%, Valid: 91.31% Test: 91.31%
Epoch: 14, Loss: 143440.2969, Train: 91.41%, Valid: 91.41% Test:

In [162]:
best_valid_acc

0.9212132635253054

In [21]:
n2vnet_args = {
    "embedding_dim":16,
    "walk_length":10,
    "num_walks":100,
    "min_count":1,
    "batch_words":5,
    "window":10,

    "hidden_dim":16,
    "num_layers":4,
    "dropout":0.2,

    "lr":0.01,
    "epochs":2000
}

n2vnet_args

{'embedding_dim': 16,
 'walk_length': 10,
 'num_walks': 100,
 'min_count': 1,
 'batch_words': 5,
 'window': 10,
 'hidden_dim': 16,
 'num_layers': 4,
 'dropout': 0.2,
 'lr': 0.01,
 'epochs': 2000}

In [22]:
sorted_node_list = (list(data.g.nodes))
sorted_node_list.sort()
sorted_node_list = [str(node) for node in sorted_node_list]

n2v = node2vec.Node2Vec(data.g , dimensions=n2vnet_args["embedding_dim"] , walk_length=n2vnet_args["walk_length"] , num_walks=n2vnet_args["num_walks"]) 
embeddings = torch.tensor((n2v.fit(window=n2vnet_args["window"] , min_count=n2vnet_args["min_count"] , batch_words=n2vnet_args["batch_words"]).wv)[sorted_node_list] , dtype=torch.float32 , device=device)

Computing transition probabilities:   0%|          | 0/14050 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 100/100 [00:27<00:00,  3.64it/s]


In [23]:
n2vnet_model = n2vnet(data.g , input_dim = n2vnet_args["embedding_dim"], hidden_dim = n2vnet_args["hidden_dim"], output_dim = data.y.shape[1], num_layers = n2vnet_args["num_layers"], dropout = n2vnet_args['dropout']).to(device)

total_params_n2vnet = sum(
	param.numel() for param in n2vnet_model.parameters()
)
print("n2vnet number of parameters:" , total_params_n2vnet)

n2vnet number of parameters: 4216


In [24]:
n2vnet_model.reset_parameters()

optimizer = torch.optim.Adam(n2vnet_model.parameters(), lr=n2vnet_args['lr'])
loss_fn = torch.nn.CrossEntropyLoss(weight=None , reduction="sum")

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + n2vnet_args["epochs"]):
  
  loss = train_n2v(n2vnet_model , embeddings , data , optimizer, loss_fn)
  result = test_n2v(n2vnet_model, embeddings , data)

  c_train , c_val , c_test = result

  if acc(c_val) > best_valid_acc:
      best_valid_acc = acc(c_val)
      best_model = copy.deepcopy(n2vnet_model)
        
  n2vnet_logger.info(f'Epoch {epoch:02d} '
      f'Loss {loss:.4f} '
      f'Train {c_train[0][0]:02d} {c_train[0][1]:02d} {c_train[1][0]:02d} {c_train[1][1]:02d} '
      f'Valid {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} '
      f'Test {c_val[0][0]:02d} {c_val[0][1]:02d} {c_val[1][0]:02d} {c_val[1][1]:02d} ')
  
  print((f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * acc(c_train):.2f}%, '
        f'Valid: {100 * acc(c_val):.2f}% '
        f'Test: {100 * acc(c_test):.2f}%'))
  
handlers = n2vnet_logger.handlers[:]
for handler in handlers:
    n2vnet_logger.removeHandler(handler)
    handler.close()

Epoch: 01, Loss: 61421.4961, Train: 55.05%, Valid: 56.79% Test: 55.92%
Epoch: 02, Loss: 61289.3047, Train: 61.48%, Valid: 63.64% Test: 63.36%
Epoch: 03, Loss: 61142.0234, Train: 64.89%, Valid: 67.11% Test: 66.60%
Epoch: 04, Loss: 60940.1562, Train: 64.93%, Valid: 67.11% Test: 66.87%
Epoch: 05, Loss: 60660.4023, Train: 67.93%, Valid: 69.60% Test: 68.94%
Epoch: 06, Loss: 60282.9414, Train: 72.89%, Valid: 74.24% Test: 73.55%
Epoch: 07, Loss: 59792.6523, Train: 74.36%, Valid: 76.32% Test: 74.52%
Epoch: 08, Loss: 59206.0078, Train: 73.87%, Valid: 76.04% Test: 73.76%
Epoch: 09, Loss: 58531.7969, Train: 73.33%, Valid: 75.55% Test: 73.00%
Epoch: 10, Loss: 57820.0898, Train: 73.52%, Valid: 76.52% Test: 73.69%
Epoch: 11, Loss: 57156.1289, Train: 73.43%, Valid: 76.66% Test: 73.76%
Epoch: 12, Loss: 56580.4453, Train: 72.70%, Valid: 76.18% Test: 73.07%
Epoch: 13, Loss: 56157.2812, Train: 73.25%, Valid: 76.52% Test: 73.35%
Epoch: 14, Loss: 55872.3516, Train: 72.97%, Valid: 76.25% Test: 73.00%
Epoch:

### Save best model

In [36]:
model_path = "../models/n2vnet_200.pt"
torch.save(best_model.state_dict(), model_path)

### Loading the saved model

In [28]:
load_model = n2vnet(data.g , input_dim = n2vnet_args["embedding_dim"], hidden_dim = n2vnet_args["hidden_dim"], output_dim = data.y.shape[1], num_layers = n2vnet_args["num_layers"], dropout = n2vnet_args['dropout']).to(device)
load_model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [29]:
load_model

n2vnet(
  (dense_layers): ModuleList(
    (0-2): 3 x Linear(in_features=16, out_features=16, bias=True)
    (3): Linear(in_features=16, out_features=200, bias=True)
  )
  (output): Sigmoid()
)

In [35]:
((load_model(embeddings)>0.5) == data.y).sum()/(data.y.shape[0]*data.y.shape[1])

tensor(0.9224, device='mps:0')

In [18]:
import numpy as np
a = np.array(data.g.nodes)

In [26]:
(np.eye(len(a))[a]).shape

(33380, 33380)

In [23]:
a

array([   0,    8,    7, ..., 1904, 1903, 1902])

In [33]:
degree = data.g.degree()
degree

DegreeView({0: 0, 8: 3, 7: 6, 6: 0, 5: 0, 4: 1, 3: 1, 2: 1, 1: 1, 80: 2, 79: 2, 78: 4, 77: 1, 76: 2, 75: 3, 74: 6, 73: 2, 72: 6, 71: 4, 70: 2, 69: 3, 68: 2, 67: 7, 66: 2, 65: 1, 64: 1, 63: 7, 62: 2, 61: 5, 60: 5, 59: 23, 58: 3, 57: 1, 56: 9, 55: 8, 54: 1, 53: 6, 52: 3, 51: 4, 50: 1, 49: 0, 48: 5, 47: 2, 46: 0, 45: 1, 44: 1, 43: 4, 42: 2, 41: 1, 40: 4, 39: 4, 38: 2, 37: 3, 36: 4, 35: 4, 34: 4, 33: 6, 32: 4, 31: 1, 30: 3, 29: 1, 28: 3, 27: 0, 26: 2, 25: 1, 24: 1, 23: 1, 22: 0, 21: 1, 20: 1, 19: 4, 18: 0, 17: 3, 16: 2, 15: 1, 14: 2, 13: 1, 12: 2, 11: 6, 10: 2, 9: 4, 1901: 5, 1900: 2, 1899: 8, 1898: 4, 1897: 4, 1896: 1, 1895: 1, 1894: 7, 1893: 16, 1892: 3, 1891: 2, 1890: 1, 1889: 19, 1888: 3, 1887: 7, 1886: 22, 1885: 2, 1884: 6, 1883: 3, 1882: 1, 1881: 8, 1880: 12, 1879: 5, 1878: 11, 1877: 9, 1876: 2, 1875: 6, 1874: 13, 1873: 3, 1872: 5, 1871: 6, 1870: 5, 1869: 5, 1868: 1, 1867: 6, 1866: 3, 1865: 1, 1864: 5, 1863: 1, 1862: 4, 1861: 5, 1860: 0, 1859: 4, 1858: 1, 1857: 5, 1856: 7, 1855: 4, 1

In [31]:
np.array(degree)[: , 1]

array([ 0,  3,  6, ..., 12,  1, 12])

In [35]:
import networkx as nx

In [38]:
nx.clustering(data.g)

{0: 0,
 8: 0,
 7: 0,
 6: 0,
 5: 0,
 4: 0,
 3: 0,
 2: 0,
 1: 0,
 80: 0,
 79: 1.0,
 78: 0.16666666666666666,
 77: 0,
 76: 0,
 75: 0.3333333333333333,
 74: 0.06666666666666667,
 73: 0,
 72: 0.13333333333333333,
 71: 0.6666666666666666,
 70: 1.0,
 69: 0,
 68: 1.0,
 67: 0.5714285714285714,
 66: 1.0,
 65: 0,
 64: 0,
 63: 0.19047619047619047,
 62: 0,
 61: 0.3,
 60: 0,
 59: 0.05138339920948617,
 58: 0.3333333333333333,
 57: 0,
 56: 0.05555555555555555,
 55: 0.03571428571428571,
 54: 0,
 53: 0.4,
 52: 0,
 51: 0.5,
 50: 0,
 49: 0,
 48: 0.6,
 47: 0,
 46: 0,
 45: 0,
 44: 0,
 43: 0,
 42: 0,
 41: 0,
 40: 0,
 39: 0,
 38: 1.0,
 37: 0.3333333333333333,
 36: 0.3333333333333333,
 35: 0.16666666666666666,
 34: 0,
 33: 0.26666666666666666,
 32: 0,
 31: 0,
 30: 0,
 29: 0,
 28: 0,
 27: 0,
 26: 1.0,
 25: 0,
 24: 0,
 23: 0,
 22: 0,
 21: 0,
 20: 0,
 19: 0,
 18: 0,
 17: 0.3333333333333333,
 16: 1.0,
 15: 0,
 14: 0,
 13: 0,
 12: 0,
 11: 0.06666666666666667,
 10: 0,
 9: 0,
 1901: 0.9,
 1900: 0,
 1899: 0.0357142857