Skip to content

Commit

Permalink
Fixed converter script for minigo removing bias.
Browse files Browse the repository at this point in the history
Fixes #2020.

Pull request #2133.
  • Loading branch information
sethtroisi authored and gcp committed Apr 2, 2019
1 parent d10ab94 commit 406bb61
Showing 1 changed file with 175 additions and 56 deletions.
231 changes: 175 additions & 56 deletions training/minigo/convert_minigo.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,151 @@
#!/usr/bin/env python3
import tensorflow as tf
import numpy as np
import re
import os
import sys

if len(sys.argv) < 2:
print('Model filename without extension needed as an argument.')
exit()
import numpy as np
import tensorflow as tf

sess = tf.Session()
saver = tf.train.import_meta_graph(sys.argv[1]+'.meta')
saver.restore(sess, sys.argv[1])
# Hide boring TF log statements
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # or any {'0', '1', '2'}

if 0:
# Exports graph to tensorboard
with tf.Session() as sess:
writer = tf.summary.FileWriter('logs', sess.graph)
writer.close()

trainable_names = []
for v in tf.trainable_variables():
trainable_names.append(v.name)

weights = []
for v in tf.global_variables():
if v.name in trainable_names:
weights.append(v)
elif 'batch_normalization' in v.name:
# Moving mean and variance are not trainable, but are needed for the model
if 'moving_mean' in v.name or 'moving_variance' in v.name:

def matches(name, parts):
return all(part in name for part in parts)

def deduped(names):
names = [re.sub('_\d+', '', name) for name in names]
return sorted([(n, names.count(n)) for n in set(names)])

def getMinigoWeightsV1(model):
"""Load and massage Minigo weights to Leela format.
This version works on older models (v9 or before)
But was broken when conv bias was removed in v10
See: https://github.com/tensorflow/minigo/pull/292 and
https://github.com/gcp/leela-zero/issues/2020
"""
sess = tf.Session()
saver = tf.train.import_meta_graph(model+'.meta')
saver.restore(sess, model)

trainable_names = []
for v in tf.trainable_variables():
trainable_names.append(v.name)

weights = []
for v in tf.global_variables():
if v.name in trainable_names:
weights.append(v)
elif 'batch_normalization' in v.name:
# Moving mean and variance are not trainable, but are needed for the model
if 'moving_mean' in v.name or 'moving_variance' in v.name:
weights.append(v)

if 0:
# To match the format of V2
weights_v2_format = []
for w in weights:
print(w.name)
nparray = w.eval(session=sess)
weights_v2_format.append((w.name, nparray))
return weights_v2_format

def getMinigoWeightsV2(model):
"""Load and massage Minigo weights to Leela format.
This version works on older models (v9 or before)
But was broken when conv bias was removed in v10
See: https://github.com/tensorflow/minigo/pull/292 and
https://github.com/gcp/leela-zero/issues/2020
"""
var_names = tf.train.load_checkpoint(model).get_variable_to_dtype_map()

# count() overcounts by 3 from policy/value head and each layer has two convolutions.
layers = (max([count for n, count in deduped(var_names)]) - 3) // 2
print (layers, 'layers')

has_conv_bias = any(matches(name, ('conv2d', 'bias')) for name in var_names.keys())
if not has_conv_bias:
print('Did not find conv bias in this model, using all zeros')
empty_conv_bias = tf.constant([], name='placeholder_for_conv_bias')

# 2 * layer copies of
# 6*n + 0: conv2d/kernel:0
# 6*n + 1: conv2d/bias:0
# 6*n + 2: batch_normalization/gamma:0
# 6*n + 3: batch_normalization/beta:0
# 6*n + 4: batch_normalization/moving_mean:0
# 6*n + 5: batch_normalization/moving_variance:0
# at the end 2x
# conv2d_39/kernel:0
# conv2d_39/bias:0
# batch_normalization_39/moving_mean:0
# batch_normalization_39/moving_variance:0
# dense/kernel:0
# dense/bias:0
# final value dense
# dense_2/kernel:0
# dense_2/bias:0

weight_names = []

def tensor_number(number):
return '' if number ==0 else '_' + str(number)

def add_conv(number, with_gamma=True):
number = tensor_number(number)
weight_names.append('conv2d{}/kernel:0'.format(number))
weight_names.append('conv2d{}/bias:0'.format(number))
if with_gamma:
weight_names.append('batch_normalization{}/gamma:0'.format(number))
weight_names.append('batch_normalization{}/beta:0'.format(number))
weight_names.append('batch_normalization{}/moving_mean:0'.format(number))
weight_names.append('batch_normalization{}/moving_variance:0'.format(number))

def add_dense(number):
number = tensor_number(number)
weight_names.append('dense{}/kernel:0'.format(number))
weight_names.append('dense{}/bias:0'.format(number))

# This blindly builds the correct names for the tensors.
for l in range(2 * layers + 1):
add_conv(l)

add_conv(2 * layers + 1, with_gamma=False)
add_dense(0)
add_conv(2 * layers + 2, with_gamma=False)
add_dense(1)
add_dense(2)

# This tries to load the data for each tensors.
weights = []
for i, name in enumerate(weight_names):
if matches(name, ('conv2d', 'bias')) and not has_conv_bias:
w = np.zeros(weights[-1][1].shape[-1:])
else:
w = tf.train.load_variable(model, name)

# print ("{:45} {} {}".format(name, type(w), w.shape))
weights.append((name, w))
return weights

def merge_gammas(weights):
out_weights = []
skip = 0
for e, w in enumerate(weights):
for e, (name, w) in enumerate(weights):
if skip > 0:
skip -= 1
continue
if 'kernel' in w.name and 'conv2d' in w.name and 'gamma' in weights[e+2].name:

if matches(name, ('conv2d', 'kernel')) and 'gamma' in weights[e+2][0]:
kernel = w
bias = weights[e+1]
gamma = weights[e+2]
beta = weights[e+3]
mean = weights[e+4]
var = weights[e+5]

new_kernel = kernel * tf.reshape(gamma, (1, 1, 1, -1))
new_bias = gamma * bias + beta * tf.sqrt(var + tf.constant(1e-5))
bias = weights[e+1][1]
gamma = weights[e+2][1]
beta = weights[e+3][1]
mean = weights[e+4][1]
var = weights[e+5][1]

new_kernel = kernel * np.reshape(gamma, (1, 1, 1, -1))
new_bias = gamma * bias + beta * np.sqrt(var + 1e-5)
new_mean = mean * gamma

out_weights.append(new_kernel)
Expand All @@ -59,14 +154,15 @@ def merge_gammas(weights):
out_weights.append(var)

skip = 5
elif 'dense' in w.name and 'kernel' in w.name:

elif matches(name, ('dense', 'kernel')):
# Minigo uses channels last order while LZ uses channels first,
# Do some surgery for the dense layers to make the output match.
planes = w.shape[0].value//361
planes = w.shape[0] // 361
if planes > 0:
w1 = tf.reshape(w, [19, 19, planes, -1])
w2 = tf.transpose(w1, [2, 0, 1, 3])
new_kernel = tf.reshape(w2, [361*planes, -1])
w1 = np.reshape(w, [19, 19, planes, -1])
w2 = np.transpose(w1, [2, 0, 1, 3])
new_kernel = np.reshape(w2, [361*planes, -1])
out_weights.append(new_kernel)
else:
out_weights.append(w)
Expand All @@ -76,44 +172,67 @@ def merge_gammas(weights):
return out_weights

def save_leelaz_weights(filename, weights):
with open(filename, "w") as file:
with open(filename, 'w') as file:
# Version tag
# Minigo outputs winrate from blacks point of view (same as ELF)
file.write("2")
file.write('2')
for e, w in enumerate(weights):
# Newline unless last line (single bias)
file.write("\n")
file.write('\n')
work_weights = None
if w.shape.ndims == 4:
if len(w.shape) == 4:
# Convolution weights need a transpose
#
# TF (kYXInputOutput)
# [filter_height, filter_width, in_channels, out_channels]
#
# Leela/cuDNN/Caffe (kOutputInputYX)
# [output, input, filter_size, filter_size]
work_weights = tf.transpose(w, [3, 2, 0, 1])
elif w.shape.ndims == 2:
work_weights = np.transpose(w, [3, 2, 0, 1])
elif len(w.shape) == 2:
# Fully connected layers are [in, out] in TF
#
# [out, in] in Leela
#
work_weights = tf.transpose(w, [1, 0])
work_weights = np.transpose(w, [1, 0])
else:
# Biases, batchnorm etc
work_weights = w
nparray = work_weights.eval(session=sess)
if e == 0:
# Fix input planes

#
# Add zero weights for white to play input plane
nparray = np.pad(nparray, ((0, 0), (0, 1), (0, 0), (0, 0)), 'constant', constant_values=0)
work_weights = np.pad(work_weights, ((0, 0), (0, 1), (0, 0), (0, 0)), 'constant', constant_values=0)

# Permutate weights
p = [0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, 16, 17]

nparray = nparray[:, p, :, :]
wt_str = [str(wt) for wt in np.ravel(nparray)]
file.write(" ".join(wt_str))
work_weights = work_weights[:, p, :, :]
wt_str = ["{:0.8g}".format(wt) for wt in np.ravel(work_weights)]
file.write(' '.join(wt_str))


if len(sys.argv) < 2:
print('Model filename without extension needed as an argument.')
exit()

model = sys.argv[1]

print ('loading ', model)
print ()

# Can be used for v9 or before models.
# weights = getMinigoWeightsV1(model)
weights = getMinigoWeightsV2(model)

if 0:
for name, variables in [
('load_checkpoint', var_names.keys()),
# ('trainable_names', trainable_names),
# ('global_variable', [v.name for v in tf.global_variables()])
]:
print (name, len(variables))
print (deduped(variables))
print ()

save_leelaz_weights(sys.argv[1]+'_converted.txt', merge_gammas(weights))
save_leelaz_weights(model + '_converted.txt', merge_gammas(weights))

0 comments on commit 406bb61

Please sign in to comment.