In [4]:
# %load runSA1SA2.py
import ggcnn.experiment as experiment
import sys
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold

SA1DatasetSize = 0
dataFolder = ''

def load_sa1_dataset():
    global SA1DatasetSize
    keys = []
    features = []
    labels = []
    # Load SA1 Node Features
    with open(dataFolder + 'Data/2018-08-24-NSW-SA1Input-Normalised.csv', 'r') as file:
        for i, line in enumerate(file):
            if i == 0:  # Skip first line (header)
                continue
            s = line[:-1].split(',')  # Last value in line is \n
            keys.append(s[0])
            features.extend([float(v) for v in s[1:-1]])  # Last column is the outcome y
#             labels.append(np.floor(float(s[-1]) / 10).astype(int))
            labels.append(float(s[-1]))
    
    SA1DatasetSize = len(labels)
    
    # Load SA2 Node Features
    with open(dataFolder + 'Data/2018-08-28-NSW-SA2Input-Normalised.csv', 'r') as file:
        for i, line in enumerate(file):
            if i == 0:  # Skip first line (header)
                continue
            s = line[:-1].split(',')  # Last value in line is \n
            keys.append(s[0])
            features.extend([float(v) for v in s[1:-1]])  # Last column is the outcome y
            labels.append(0)

    labels = np.array(labels)
    features = np.array(features).reshape((len(keys), -1))
    
    # Load SA1 Link Features
    with open(dataFolder + 'Data/2018-08-25-NSW-NeighbourDistance.csv', 'r') as file:
        adj_mat = np.zeros((len(labels), 4, len(labels)))
        for i, line in enumerate(file):
            if i == 0:  # Skip first line (header)
                continue
            s = line[:-1].split(',')
            a = keys.index(s[0])
            b = keys.index(s[1])
            adj_mat[a, 0, b] = 1
            adj_mat[b, 0, a] = 1

    # Load SA2 Link Features
    with open(dataFolder + 'Data/Geography/2018-08-28-NSW-SA2_Neighbouring_Suburbs_With_Bridges-GCC.csv', 'r') as file:
        for i, line in enumerate(file):
            if i == 0:  # Skip first line (header)
                continue
            s = line[:-1].split(',')
            a = keys.index(s[0])
            b = keys.index(s[1])
            adj_mat[a, 1, b] = 1
            adj_mat[b, 1, a] = 1
    
    # Load SA1, SA2 Links
    with open(dataFolder + 'Data/SA1SA2Links.csv', 'r') as file:
        for i, line in enumerate(file):
            if i == 0:  # Skip first line (header)
                continue
            s = line[:-1].split(',')
            a = keys.index(s[0])
            b = keys.index(s[1])
            adj_mat[a, 2, b] = 1
            adj_mat[b, 3, a] = 1   
    
    return features, adj_mat, labels

dataset = load_sa1_dataset()

class SA1Experiment():
    def __init__(self, neurons, blocks):
        self.blocks = blocks
        self.neurons = neurons
    
    def create_network(self, net, input):
        net.create_network(input)
#        net.make_adjacency_adjustment_layer()
        net.make_embedding_layer(self.neurons)
        net.make_dropout_layer()
        
        for _ in range(self.blocks):
            net.make_graphcnn_layer(self.neurons)
            net.make_dropout_layer()
            net.make_embedding_layer(self.neurons)
            net.make_dropout_layer()
        
        net.make_embedding_layer(self.neurons)
        net.make_graphcnn_layer(1, name='final', with_bn=False, with_act_func = False)


no_folds = 5 ##
inst = KFold(n_splits = no_folds, shuffle=True, random_state=125)


l = 2
n = 64
i = 2


exp = experiment.GGCNNExperiment('2018-08-28-SA1SA2', '2018-08-28-SA1SA2', SA1Experiment(neurons = n, blocks = l))

exp.num_iterations = 2000
exp.optimizer = 'adam'
exp.loss_type = "linear"

exp.debug = True  # Was True

exp.preprocess_data(dataset)

train_idx, test_idx = list(inst.split(np.arange( SA1DatasetSize )))[i]
# print('Before: ', exp.train_idx.shape)
# exp.train_idx = np.append(exp.train_idx, np.arange( SA1DatasetSize , len(dataset[-1] )))
# exp.test_idx = np.append(exp.test_idx, np.arange( SA1DatasetSize , len(dataset[-1] )))
# print('After: ', exp.train_idx.shape)
# test_idx, train_idx = list(inst.split(np.arange(len(dataset[-1]))))[i]  # Reversed to get more samples in the test set than the training set


exp.create_data(train_idx, test_idx)
exp.build_network()
results = exp.run()


2018-08-28 01:53:35.418382 Creating training Tensorflow Tensors
2018-08-28 01:53:35.419201 Creating training network
2018-08-28 01:53:36.059083 Creating loss function and summaries
2018-08-28 01:53:36.108261 Training model "2018-08-28-SA1SA2"!
2018-08-28 01:53:36.108340 Preparing training
2018-08-28 01:53:38.607828 Starting threads
2018-08-28 01:53:38.608528 Starting training. train_batch_size: 0 test_batch_size: 0
2018-08-28 01:53:38.980436 Test Step 0 Finished
2018-08-28 01:53:38.980871 Test Step 0 "min loss" =  6.310109e+18
2018-08-28 01:53:38.980958 Test Step 0 "loss" =  6.310109e+18
2018-08-28 01:53:40.062979 Training Step 0 Finished Timing (Training: 0.743944, Test: 0.255637) after 1.45432 seconds
2018-08-28 01:53:40.063120 Training Step 0 "min loss" =  2880.5945
2018-08-28 01:53:40.063186 Training Step 0 "loss" =  2880.5945
2018-08-28 01:53:40.643767 Test Step 5 Finished
2018-08-28 01:53:40.643907 Test Step 5 "min loss" =  2658.7876
2018-08-28 01:53:40.644467 Test Step 5 "loss" 

2018-08-28 01:53:53.477274 Test Step 95 Finished
2018-08-28 01:53:53.477413 Test Step 95 "min loss" =  540.2878
2018-08-28 01:53:53.477559 Test Step 95 "loss" =  680.7856
2018-08-28 01:53:53.611170 Training Step 95 Finished Timing (Training: 0.918532, Test: 0.0793237) after 0.709064 seconds
2018-08-28 01:53:53.611535 Training Step 95 "min loss" =  186.27339
2018-08-28 01:53:53.612276 Training Step 95 "loss" =  206.00185
2018-08-28 01:53:54.193250 Test Step 100 Finished
2018-08-28 01:53:54.193788 Test Step 100 "min loss" =  540.2878
2018-08-28 01:53:54.193858 Test Step 100 "loss" =  634.4383
2018-08-28 01:53:54.317971 Training Step 100 Finished Timing (Training: 0.918572, Test: 0.0792467) after 0.70545 seconds
2018-08-28 01:53:54.318079 Training Step 100 "min loss" =  182.44594
2018-08-28 01:53:54.318180 Training Step 100 "loss" =  194.979
2018-08-28 01:53:54.927756 Test Step 105 Finished
2018-08-28 01:53:54.927904 Test Step 105 "min loss" =  540.2878
2018-08-28 01:53:54.927969 Test Ste

2018-08-28 01:54:07.719083 Test Step 195 Finished
2018-08-28 01:54:07.719609 Test Step 195 "min loss" =  540.2878
2018-08-28 01:54:07.720145 Test Step 195 "loss" =  822.9792
2018-08-28 01:54:07.850623 Training Step 195 Finished Timing (Training: 0.916985, Test: 0.0810743) after 0.697891 seconds
2018-08-28 01:54:07.850708 Training Step 195 "min loss" =  119.25749
2018-08-28 01:54:07.851358 Training Step 195 "loss" =  138.53381
2018-08-28 01:54:08.418955 Test Step 200 Finished
2018-08-28 01:54:08.419103 Test Step 200 "min loss" =  540.2878
2018-08-28 01:54:08.419956 Test Step 200 "loss" =  846.001
2018-08-28 01:54:08.555300 Training Step 200 Finished Timing (Training: 0.916919, Test: 0.0810602) after 0.703273 seconds
2018-08-28 01:54:08.555441 Training Step 200 "min loss" =  114.784836
2018-08-28 01:54:08.555503 Training Step 200 "loss" =  114.784836
2018-08-28 01:54:09.148443 Test Step 205 Finished
2018-08-28 01:54:09.148588 Test Step 205 "min loss" =  540.2878
2018-08-28 01:54:09.14913

2018-08-28 01:54:22.046587 Test Step 295 Finished
2018-08-28 01:54:22.046731 Test Step 295 "min loss" =  540.2878
2018-08-28 01:54:22.046791 Test Step 295 "loss" =  932.48016
2018-08-28 01:54:22.187765 Training Step 295 Finished Timing (Training: 0.918524, Test: 0.0796015) after 0.724405 seconds
2018-08-28 01:54:22.187893 Training Step 295 "min loss" =  87.52433
2018-08-28 01:54:22.187958 Training Step 295 "loss" =  97.013985
2018-08-28 01:54:22.764053 Test Step 300 Finished
2018-08-28 01:54:22.764191 Test Step 300 "min loss" =  540.2878
2018-08-28 01:54:22.764258 Test Step 300 "loss" =  889.1042
2018-08-28 01:54:22.903003 Training Step 300 Finished Timing (Training: 0.918452, Test: 0.0796741) after 0.714976 seconds
2018-08-28 01:54:22.903077 Training Step 300 "min loss" =  87.52433
2018-08-28 01:54:22.903143 Training Step 300 "loss" =  102.954544
2018-08-28 01:54:23.490811 Test Step 305 Finished
2018-08-28 01:54:23.490952 Test Step 305 "min loss" =  540.2878
2018-08-28 01:54:23.491657

2018-08-28 01:54:36.359350 Test Step 395 Finished
2018-08-28 01:54:36.359478 Test Step 395 "min loss" =  540.2878
2018-08-28 01:54:36.360029 Test Step 395 "loss" =  985.1378
2018-08-28 01:54:36.495801 Training Step 395 Finished Timing (Training: 0.918893, Test: 0.0788909) after 0.710955 seconds
2018-08-28 01:54:36.496292 Training Step 395 "min loss" =  73.41544
2018-08-28 01:54:36.496406 Training Step 395 "loss" =  83.0519
2018-08-28 01:54:37.074055 Test Step 400 Finished
2018-08-28 01:54:37.074266 Test Step 400 "min loss" =  540.2878
2018-08-28 01:54:37.075022 Test Step 400 "loss" =  970.807
2018-08-28 01:54:37.199804 Training Step 400 Finished Timing (Training: 0.918849, Test: 0.078873) after 0.702645 seconds
2018-08-28 01:54:37.199939 Training Step 400 "min loss" =  73.41544
2018-08-28 01:54:37.200552 Training Step 400 "loss" =  87.67885
2018-08-28 01:54:37.769964 Test Step 405 Finished
2018-08-28 01:54:37.770516 Test Step 405 "min loss" =  540.2878
2018-08-28 01:54:37.770864 Test S

AttributeError: 'GraphCNNNetwork' object has no attribute 'dist_beta'