In [3]:
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
import sys
import h5py


In [4]:
valnet_data = h5py.File('/Users/valentinwolf/Documents/Studium/Machine_Learning/SoftwareProjekt/data/val_mount/data-val-all-combined.h5', 'r')
polnet_data = h5py.File('/Users/valentinwolf/Documents/Studium/Machine_Learning/SoftwareProjekt/data/pol_point/data-pol-gogod-tygem-combined.h5', 'r')

In [3]:
vX_train = valnet_data['X']
vY_train = valnet_data['Y']

vX_val = valnet_data['X']
vY_val = valnet_data['Y']

pX_train = polnet_data['X']
pY_train = polnet_data['Y']

pX_val = polnet_data['X']
pY_val = polnet_data['Y']

In [4]:
vX

<HDF5 dataset "X": shape (22993072, 49, 19, 19), type "|b1">

In [8]:
vY

<HDF5 dataset "Y": shape (22993072,), type "|b1">

In [17]:
pX

<HDF5 dataset "X": shape (79327539, 49, 19, 19), type "|b1">

In [18]:
pY

<HDF5 dataset "Y": shape (79327539,), type "<i8">

In [16]:
pY[0:100]

array([267, 131, 192, 145, 117,  10, 135, 273,  73, 328,  39,  65, 106,
       122, 228, 141, 202, 281,  39, 179, 235,  62, 291,  60,  61, 218,
       315, 197,   9,  94, 143, 159,  87, 249, 197,  47, 115,  73,  87,
       258, 349, 233,   3, 120, 131,  42, 149, 292, 335,  83, 240, 140,
        87,  30,  56,  93, 288, 158, 294, 338, 124, 357, 278,  47, 112,
       255, 104, 271, 242, 286, 213, 126, 245, 315,  30,  33, 267,  80,
       158, 255, 301, 296,  12, 117, 314,  42, 156,  94, 302, 123, 177,
       327, 355, 248,  25, 129,  92,  44, 195, 196])

In [19]:
ctx = mx.cpu() #mx.gpu()
batch_size = 128

piter = mx.io.NDArrayIter(pX, pY, batch_size=batch_size, last_batch_handle='roll_over')
viter = mx.io.NDArrayIter(vX, vY, batch_size=batch_size, last_batch_handle='roll_over')

def _conv3x3(channels, stride, in_channels, kernel_size, groups, padding):
    return gluon.nn.Conv2D(channels, kernel_size=kernel_size, strides=stride, padding=padding,
                     use_bias=False, in_channels=in_channels, groups=groups)

class BasicBlockV2(gluon.HybridBlock):
    def __init__(self, channels, stride=1, in_channels=0, **kwargs):
        super(BasicBlockV2, self).__init__(**kwargs)
        self.convs = gluon.nn.HybridSequential()
        self.convs.add(gluon.nn.BatchNorm())
        self.convs.add(gluon.nn.LeakyReLU(alpha=0.3))
        self.convs.add(_conv3x3(channels, stride, in_channels, 3, 1, 1))
        self.convs.add(gluon.nn.BatchNorm())
        self.convs.add(gluon.nn.LeakyReLU(alpha=0.3))
        self.convs.add(_conv3x3(channels, 1, channels, 3, 1, 1))

    def hybrid_forward(self, F, x):
        residual = x
        x = self.convs(x)
        return x + residual

class CombinedNet(gluon.HybridBlock):
    def __init__(self, num_filters, num_blocks, **kwargs):
        super(CombinedNet, self).__init__(**kwargs)

        with self.name_scope():
            self.convs = gluon.nn.HybridSequential()
            self.convs.add(gluon.nn.Conv2D(num_filters, 3, padding=1))
            self.convs.add(gluon.nn.LeakyReLU(alpha=0.3))

            for _ in range(num_blocks):
                self.convs.add(BasicBlockV2(num_filters))

            self.convs.add(gluon.nn.Conv2D(num_filters, 3, padding=1))
            self.convs.add(gluon.nn.LeakyReLU(alpha=0.3))

            self.value = gluon.nn.HybridSequential()
            self.value.add(gluon.nn.Conv2D(2, 3, padding=1))
            self.value.add(gluon.nn.LeakyReLU(alpha=0.3))
            self.value.add(gluon.nn.Flatten())
            self.value.add(gluon.nn.Dense(num_filters))
            self.value.add(gluon.nn.LeakyReLU(alpha=0.3))
            self.value.add(gluon.nn.Dense(1))

            self.policy = gluon.nn.HybridSequential()
            self.policy.add(gluon.nn.Conv2D(2, 3, padding=1))
            self.policy.add(gluon.nn.LeakyReLU(alpha=0.3))
            self.policy.add(gluon.nn.Flatten())
            self.policy.add(gluon.nn.Dense((19 * 19 + 1) * 2))
            self.policy.add(gluon.nn.LeakyReLU(alpha=0.3))
            self.policy.add(gluon.nn.Dense(19 * 19 + 1))

    def hybrid_forward(self, F, x):
        x = self.convs(x)
        p = self.policy(x)
        v = self.value(x)

        return F.softmax(p), F.sigmoid(v), p, v

net = CombinedNet(64, 4)
net.collect_params().initialize(mx.init.MSRAPrelu(), ctx=ctx)
net.hybridize()

policy_loss = gluon.loss.SoftmaxCrossEntropyLoss() #sparse_label=False
policy_loss.hybridize()

value_loss = gluon.loss.SigmoidBCELoss()
value_loss.hybridize()

trainer = gluon.Trainer(net.collect_params(), 'NAG', {'learning_rate': .1, 'momentum':.9, 'wd': 1e-10})

def rmean(series, win=1000):
    return np.mean(series[-win:])

closses = []
vlosses = []
plosses = []
vaccs = []
paccs = []

sigmoid = gluon.nn.Activation('sigmoid')



In [None]:
def wrap_iter(it):
    while True:
        try:
            yield it.next()
        except StopIteration:
            it.reset()

for i, (pbatch, vbatch) in enumerate(zip(wrap_iter(piter), wrap_iter(viter))):
    # use AlphaGo Zero feature planes
    vx = mx.ndarray.concat(vbatch.data[0][:, :4, :, :], vbatch.data[0][:, 37:45, :, :], dim=1).as_in_context(ctx)
    vy = vbatch.label[0].as_in_context(ctx)

    px = mx.ndarray.concat(pbatch.data[0][:, :4, :, :], pbatch.data[0][:, 37:45, :, :], dim=1).as_in_context(ctx)
    py = pbatch.label[0].as_in_context(ctx)

    with autograd.record():
        _, _, _, vp = net(vx)
        vloss = value_loss(vp, vy)

        _, _, pp, _ = net(px)
        ploss = policy_loss(pp, py)
        
        #Future:
        #_, _, pp, vp = net(x)
        #vloss = value_loss(vp, vy)
        #ploss = policy_loss(pp, py)

        combined_loss = (vloss.mean() + ploss.mean())
        combined_loss.backward()

    closses.append(combined_loss.as_in_context(mx.cpu()).asnumpy()[0])
    vlosses.append(vloss.as_in_context(mx.cpu()).asnumpy()[0])
    plosses.append(ploss.as_in_context(mx.cpu()).asnumpy()[0])
    paccs.append((pp.argmax(axis=-1) == py).mean().as_in_context(mx.cpu()).asnumpy()[0] * 100)
    vaccs.append(((sigmoid(vp) > .5).flatten()[:, 0] == vy).mean().as_in_context(mx.cpu()).asnumpy()[0] * 100)

    trainer.step(batch_size)

    sys.stdout.write('\r{}: C:{:.3f}, V:{:.3f} ({:.1f}%), P:{:.3f} ({:.1f}%)'.format(
        i, rmean(closses), rmean(vlosses), rmean(vaccs), rmean(plosses), rmean(paccs)))

    if i == 1000000:
        break

In [1]:
net.export('net.json')

NameError: name 'net' is not defined

In [7]:
x = np.asarray(range(10))
x[:-2]

array([0, 1, 2, 3, 4, 5, 6, 7])