In [2]:
import tensorflow as tf
import pickle
from tensorflow.keras import layers, models, optimizers, losses, utils
from tensorflow.keras.datasets import mnist

# 1) Data prep (same as before) …
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train[..., None].astype("float32")/255.0
x_test  = x_test[...,  None].astype("float32")/255.0
x_val, x_train = x_train[50000:], x_train[:50000]
y_val, y_train = y_train[50000:], y_train[:50000]
y_train = utils.to_categorical(y_train, 10)
y_val   = utils.to_categorical(y_val,   10)
y_test  = utils.to_categorical(y_test,  10)

# 2) Model with explicit layer names ------------------------------------------
model = models.Sequential([
    layers.Input(shape=(28,28,1)),

    layers.Conv2D(16, (5,5), activation="relu", name="conv1"), #24 x 24
    layers.MaxPooling2D(2,2), # 12 x 12

    layers.Conv2D(16, (5,5), activation="relu", name="conv2"), # 8 x 8
    layers.MaxPooling2D(2,2), # 4 x 4 x 16

    layers.Flatten(),
    layers.Dense(32, activation="relu", name="fc1"),
    layers.Dense(10, name="fc2"),
])

model.compile(
    optimizer=optimizers.Adam(1e-3),
    loss=losses.CategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

# 3) Train …
model.fit(
    x_train, y_train,
    epochs=5, batch_size=50,
    validation_data=(x_val, y_val),
    verbose=2
)
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f"Test Accuracy: {test_acc:.4f}")

# 4) Save weights & biases only for layers that have them --------------------
def save_layer(name):
    layer = model.get_layer(name)
    weights = layer.get_weights()
    if len(weights) == 2:
        w, b = weights
        with open(f"/content/params/{name}_w.param", "wb") as fw: pickle.dump(w, fw)
        with open(f"/content/params/{name}_b.param", "wb") as fb: pickle.dump(b, fb)
    else:
        print(f"→ layer '{name}' has no trainable weights.")

for name in ["conv1","conv2","fc1","fc2"]:
    save_layer(name)




Epoch 1/5
1000/1000 - 28s - 28ms/step - accuracy: 0.9174 - loss: 0.2618 - val_accuracy: 0.9743 - val_loss: 0.0883
Epoch 2/5
1000/1000 - 44s - 44ms/step - accuracy: 0.9762 - loss: 0.0760 - val_accuracy: 0.9811 - val_loss: 0.0641
Epoch 3/5
1000/1000 - 39s - 39ms/step - accuracy: 0.9825 - loss: 0.0561 - val_accuracy: 0.9831 - val_loss: 0.0583
Epoch 4/5
1000/1000 - 41s - 41ms/step - accuracy: 0.9851 - loss: 0.0461 - val_accuracy: 0.9846 - val_loss: 0.0501
Epoch 5/5
1000/1000 - 27s - 27ms/step - accuracy: 0.9881 - loss: 0.0382 - val_accuracy: 0.9865 - val_loss: 0.0480
Test Accuracy: 0.9883


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# def save_first_data(df, save_path='./input.param'):
#     first_data = df.iloc[0]
#     with open(save_path, 'wb') as f:
#         pickle.dump(first_data, f)

# temp = x_train[0].reshape(28, 28)
# with open('./input.param', 'wb') as f:
#     pickle.dump(temp, f)

# print(y_train[0])

# model.predict(x_train[0].reshape(1,28,28,1))

[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step


array([[ -4.49 ,  -4.723,  -5.584,  10.667, -13.109,  14.655,  -9.066,  -6.806,   1.337,  -1.913]], dtype=float32)

In [3]:
# 예시: x_test[0] 이미지를 input0.param 으로 저장
import pickle
idx = 3

# x_test는 이미 (num, 28, 28, 1) 형태로 로드되어 있다고 가정
sample_img = x_test[idx]  # shape=(28,28,1), 또는 batch 차원 포함하려면 x_test[0:1]

with open("/content/params/input.param", "wb") as f:
    pickle.dump(sample_img, f)

print("→ input.param에 이미지 저장 완료")


→ input.param에 이미지 저장 완료


In [5]:
# @title
import os
import argparse
import pickle
import numpy as np
from scipy import signal

file_path = None

class Graph:
   class Node:
      def __init__(self, name, op, a, b):
         self.name = name
         self.op = op
         self.a = a
         self.b = b

   def __init__(self):
      self.nodes = {}

   def append(self, name, op, a=None, b=None):
      if isinstance(a, str):
         a = self.nodes[a]
      if isinstance(b, str):
         b = self.nodes[b]
      self.nodes[name] = Graph.Node(name, op, a, b)

   def eval(self, ref):
      node = ref
      if isinstance(ref, str):
         node = self.nodes[ref]

      if node.a is None and node.b is None:
         return node.op(node.name)
      elif node.b is None:
         return node.op(node.name, self.eval(node.a))
      else:
         return node.op(node.name, self.eval(node.a), self.eval(node.b))

FIXED_ENABLE = 1
F_N = 5
F_ONE = 1 << F_N
F_K = 1 << (F_N - 1)
to_fixed = lambda x: float(int(x * F_ONE))
to_fixed_vectorized = np.vectorize(to_fixed)
fixed_round = lambda x: float(int(x + F_K) >> F_N)
fixed_round_vectorized = np.vectorize(fixed_round)
to_float = lambda x: float(x) / F_ONE
to_float_vectorized = np.vectorize(to_float)

if not FIXED_ENABLE:
   identity = lambda x: x
   to_fixed_vectorized = np.vectorize(identity)
   fixed_round_vectorized = np.vectorize(identity)
   to_float_vectorized = np.vectorize(identity)

def weight(name):
   global file_path
   path = os.path.join(file_path, name + '.param')
   with open(path, 'rb') as f:
      weight = to_fixed_vectorized(pickle.load(f))
      print(name + ' ' + str(weight.shape))
      return weight;

def relu(name, a):
   return np.fabs(np.multiply(a, a > 0))

def flatten(name, a):
   flattened = a.flatten()
   rows = flattened.shape[0]
   return np.reshape(flattened, [rows, 1])

def shuffle_indices(layers, rows, cols):
   size = layers * rows * cols
   layer_size = rows * cols
   idx = 0
   offset = 0
   count = 0
   permutation = [0] * size
   while count != size:
      if idx > size - layers:
         offset += 1
         idx = 0
      permutation[offset + idx] = count
      idx += layers
      count += 1
   return permutation

def shuffle(name, a):
   permutation = np.argsort(shuffle_indices(100, 4, 4))
   return a[:,permutation]

def mul(name, a, b):
   return fixed_round_vectorized(np.multiply(a, b))

def add(name, a, b):
   if len(b.shape) == 1:
      b = np.reshape(b, [b.shape[0], 1])
   return np.add(a, b)

def conv_add(name, a, b):
   layers = a.shape[0]
   stack = []
   for l in range(layers):
      stack.append(np.add(a[l], b[l]))
   return np.stack(stack, axis=0)

def mmul(name, a, b):
    # a: (in_dim, units), b: (in_dim, batch)
    # we want (units, in_dim) · (in_dim, batch) → (units, batch)
    return fixed_round_vectorized(np.dot(a, b))


from scipy import signal

def conv(name, a, b):
   filter = a
   layers = filter.shape[0]
   stack = []
   distrib = len(filter.shape) == len(b.shape)
   for l in range(layers):
      if distrib:
         stack.append(signal.correlate(b[l], filter[l], mode='valid'))
      else:
         stack.append(signal.correlate(b, filter[l], mode='valid'))
   ret = fixed_round_vectorized(np.stack(stack, axis=0))
   print(ret[0][0])
   return ret;

def pooling(mat, ksize, method='max', pad=False):
   m, n = mat.shape[:2]
   ky,kx = ksize

   _ceil = lambda x, y: int(np.ceil(x/float(y)))

   if pad:
      ny = _ceil(m, ky)
      nx = _ceil(n, kx)
      size = (ny * ky, nx * kx) + mat.shape[2:]
      mat_pad = np.full(size, np.nan)
      mat_pad[:m,:n,...] = mat
   else:
      ny = m // ky
      nx = n // kx
      mat_pad = mat[:ny * ky, :nx * kx, ...]

   new_shape=(ny, ky, nx, kx) + mat.shape[2:]

   if method == 'max':
      result = np.nanmax(mat_pad.reshape(new_shape), axis=(1,3))
   else:
      result = np.nanmean(mat_pad.reshape(new_shape), axis=(1,3))

   return result

def maxpool2x2(name, a):
   layers = a.shape[0]
   stacks = []
   for l in range(layers):
      stacks.append(pooling(np.squeeze(a[l]), (2, 2)))
   return np.stack(stacks, axis=0)

def transpose(name, a):
   return a.T

def permute(name, a):
   ret = np.transpose(a, axes=[3, 2, 0, 1])
   return ret

def permute_vh(name, a):
   return np.transpose(a, axes=[2, 3, 0, 1])

def squeeze(name, a):
   return np.squeeze(a)

def input_reshape(name, a):
   return np.reshape(a, [1, 28, 28])

def arg_max(name, a):
   return np.argmax(a.flatten())

def change_c_wh(name, a):
   temp = np.reshape(a, [32, 16, 4, 4])
  #  와NOTE 32->fc1의 output 개수가 32개
  # 16->input feature map의 depth
  # 4,4 input feature map 의 height width
   # N, C, W, H --> N, W, H, C
   temp = np.transpose(temp, axes=[0, 2, 3, 1])
   # reshape(32, 256)
   return np.reshape(temp, [32, 256])

def main():
    import os
    import numpy as np
    from tensorflow.keras.datasets import mnist

    global file_path
    file_path = '/content/params/'

    # # ——— 1) MNIST 테스트 이미지 로드 ———
    # (_, _), (x_test, y_test) = mnist.load_data()
    # x_test = x_test.astype('float32') / 255.0
    # x_test = x_test[..., None]  # (N,28,28,1)
    # idx = args.img_index
    # img = x_test[idx]           # (28,28,1)
    # img_fixed = to_fixed_vectorized(np.squeeze(img))  # (28,28)

    # ——— 2) 그래프 구성: Sequential 모델과 1:1 대응 ———
    graph = Graph()
    # 입력
    graph.append('input', weight)
    graph.append('input_reshape', input_reshape, 'input')

    # conv1 → bias → relu → pool
    graph.append('conv1_w', weight)
    graph.append('conv1_w_', permute, 'conv1_w')
    graph.append('conv1_b', weight)
    graph.append('conv1',    conv,       'conv1_w_',   'input_reshape')
    graph.append('conv1b',   conv_add,   'conv1',     'conv1_b')
    graph.append('conv1r',   relu,       'conv1b')
    graph.append('conv1p',   maxpool2x2, 'conv1r')

    # conv2 → bias → relu → pool
    graph.append('conv2_w', weight)
    graph.append('conv2_w_', permute, 'conv2_w')
    graph.append('conv2_b', weight)
    graph.append('conv2',    conv,       'conv2_w_',   'conv1p')
    graph.append('conv2b',   conv_add,   'conv2',     'conv2_b')
    graph.append('conv2r',   relu,       'conv2b')
    graph.append('conv2p',   maxpool2x2, 'conv2r')

    # flatten
    graph.append('flat',     flatten,    'conv2p')

    # fc1 → bias → relu
    graph.append('fc1_w',    weight)
    graph.append('fc1_b',    weight)
    graph.append('fc1_wt', transpose, 'fc1_w')
    graph.append('fc1_whc', change_c_wh, 'fc1_wt')
    graph.append('fc1',      mmul,       'fc1_whc',     'flat')
    graph.append('fc1b',     add,        'fc1',       'fc1_b')
    graph.append('fc1r',     relu,       'fc1b')

    # fc2 → bias → logits → prediction
    graph.append('fc2_w',    weight)
    graph.append('fc2_wt', transpose, 'fc2_w')
    graph.append('fc2_b',    weight)
    graph.append('fc2',      mmul,       'fc2_wt',     'fc1r')
    graph.append('fc2b',     add,        'fc2',       'fc2_b')
    graph.append('predict',  arg_max,    'fc2b')

    # ——— 3) 실행 및 출력 ———
    np.set_printoptions(precision=3, linewidth=200, suppress=True)
    logits = graph.eval('fc2b').flatten()
    print('Logits:', to_float_vectorized(logits).tolist())
    pred   = graph.eval('predict')
    print(f'Prediction: {pred}, True label: {y_test[idx]}')


if __name__ == '__main__':
   ##parser = argparse.ArgumentParser()
   #parser.add_argument('--src_dir', type=str, help='Source directory')
   #args = parser.parse_args()
   main()


fc2_w (32, 10)
fc1_w (256, 32)
conv2_w (5, 5, 16, 16)
conv1_w (5, 5, 1, 16)
input (28, 28, 1)
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.   1.   3.   6.   5.  -1.  -5.  -3.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   4.  10.  15.  10.  -2. -12. -11.  -2.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   2.  11.  22.  27.  12. -12. -23. -21.  -6.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   2.   9.  23.  36.  35.   8. -23. -33. -29. -11.  -2.  -1.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   3.   8.  21.  37.  41.  33.   3. -28. -37. -32. -14.  -8.  -5.  -1.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   1.   9.  20.  34.  42.  30.  21.  -1. -24. -29. -27. -17. -14. -15.  -5.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   4.  19.  36.  44.  34.  16.   8.  -8. -18. -19. -18. -18. -20. -23. -14.  -2.   0.   0.   0.]
 [  0.   0.   0.   0.   2.  12.  34.  46.  40

In [7]:
import os
import pickle
import argparse
import numpy as np
import scipy

param_dir = None
header_dir = None

f_lit = lambda x: 'F_LIT(' + str(x) + ')'

def write_header(name, mats):
   contents = '#ifndef ' + name.upper() + '_H\n'
   contents += '#define ' + name.upper() + '_H\n'
   contents += '#include \'<libfixed/fixed.h>\'\n'
   contents += '#include \'<libdnn/mem.h>\'\n\n'
   for mat_name, mat, layer, sparse in mats:
      if layer == 'CONV' and sparse:
         mat_str = ''
         offsets_str = ''
         sizes_str = ''
         size = 0
         mat = mat.reshape(mat.shape[0], -1)
         for m in mat:
            data = m[m != 0.0].astype(dtype=str)

            idx = np.where(m != 0.0)[0]
            offsets = np.diff(idx).flatten()
            if data.shape[0] > 0:
               data_size = data.flatten().shape[0]
               str_mat = str(map(f_lit, data.flatten().tolist()))
               mat_str += str_mat.replace('[', '').replace(']', '') + ','

               str_offsets = str([idx[0]] + offsets.flatten().tolist())
               offsets_str += str_offsets.replace('[', '').replace(']', '') + ','

               sizes_str += str(data_size) + ','
               size += data_size
            else:
               sizes_str += '0,'

         mat_str = mat_str[:-1]
         offsets_str = offsets_str[:-1]
         sizes_str = sizes_str[:-1]
         layers = mat.shape[0]

         contents += '#define ' + mat_name.upper() + '_LEN ' + str(size) + '\n\n'

         contents += '__ro_hifram fixed ' + mat_name + \
            '[' + str(size) + '] = {' + mat_str + '};\n\n'

         contents += '__ro_hifram fixed ' + mat_name + '_offsets[' + \
            str(size) + '] = {' + offsets_str + '};\n\n'

         contents += '__ro_hifram fixed ' + mat_name + '_sizes[' + \
            str(layers) + '] = {' + sizes_str + '};\n\n'

      elif layer == 'FC' and sparse:
         csr = scipy.sparse.csr_matrix(mat)
         data, indices, indptr = csr.data, csr.indices, csr.indptr
         mat_str = str(map(f_lit, data.flatten().tolist()))
         mat_str = mat_str.replace('[', '{').replace(']', '}')
         indices_str = str(indices.flatten().tolist())
         indices_str = indices_str.replace('[', '{').replace(']', '}')
         indptr_str = str(indptr.flatten().tolist())
         indptr_str = indptr_str.replace('[', '{').replace(']', '}')

         contents += '#define ' + mat_name.upper() + '_LEN ' + \
            str(len(data)) + '\n\n'

         contents += '__ro_hifram fixed ' + mat_name + '[' + \
            str(len(data)) + '] = ' + mat_str + ';\n\n'

         contents += '__ro_hifram uint16_t ' + mat_name + '_offsets[' + \
            str(len(indices)) + '] = ' + indices_str + ';\n\n'

         contents += '__ro_hifram uint16_t ' + mat_name + '_sizes[' + \
            str(len(indptr)) + '] = ' + indptr_str + ';\n\n'
      else:
          # … your existing setup …
          flat = mat.flatten().tolist()
          # build a list of "F_LIT(x)" strings, coercing exact zeros to integer 0 if you like:
          lits = [f'F_LIT({int(x)})' if x == 0.0 else f'F_LIT({x})' for x in flat]
          mat_str = '{ ' + ', '.join(lits) + ' }'
          shape_str = ''.join(f'[{s}]' for s in mat.shape)

          contents += f'__ro_hifram fixed {mat_name}{shape_str} = {mat_str};\n\n'
        #  print(mat.flatten().tolist())
        #  mat_str = str(map(f_lit, mat.flatten().tolist()))
        #  mat_str = mat_str.replace('[', '{').replace(']', '}')
        #  shape_str = ''
        #  for s in mat.shape:
        #     shape_str += '[' + str(s) + ']'

        #  contents += '__ro_hifram fixed ' + mat_name + \
        #     shape_str + ' = ' + mat_str + ';\n\n'

   contents = contents.replace("'", '')
   contents += '#endif'
   path = os.path.join(header_dir, name + '.h')
   with open(path, 'w+') as f:
      f.write(contents)

def weight(name):
   global param_dir
   path = os.path.join(param_dir, name + '.param')
   with open(path, 'rb') as f:
      data = pickle.load(f)
      return data

def main():
   global header_dir, param_dir
   header_dir = '/content/headers'
   param_dir = '/content/params'

   graph = Graph()

   graph.append('input', weight)
   graph.append('input_reshape', input_reshape, 'input')

   graph.append('conv1_w', weight)
   graph.append('conv1_wp', permute, 'conv1_w')
   graph.append('conv1_b', weight)

   graph.append('conv2_w', weight)
   graph.append('conv2_wp', permute, 'conv2_w')
   graph.append('conv2_b', weight)

   graph.append('fc1_w', weight)
   graph.append('fc1_wt', transpose, 'fc1_w')
   graph.append('fc1_whc', change_c_wh, 'fc1_wt')
   graph.append('fc1_b', weight)

   graph.append('fc2_w', weight)
   graph.append('fc2_wt', transpose, 'fc2_w')
   graph.append('fc2_b', weight)

   write_header('input', [
      ('input', graph.eval('input_reshape'), 'FC', False)])

   write_header('conv1', [
      ('conv1_w', graph.eval('conv1_wp'), 'CONV', False),
      ('conv1_b', graph.eval('conv1_b'), 'FC', False)])

   write_header('conv2', [
      ('conv2_w', graph.eval('conv2_wp'), 'CONV', False),
      ('conv2_b', graph.eval('conv2_b'), 'FC', False)])

   write_header('fc1', [
      ('fc1_w', graph.eval('fc1_whc'), 'FC', False),
      ('fc1_b', graph.eval('fc1_b'), 'FC', False)])

   write_header('fc2', [
      ('fc2_w', graph.eval('fc2_wt'), 'FC', False),
      ('fc2_b', graph.eval('fc2_b'), 'FC', False)])

  #  write_header('fc1_sparse', [
  #     ('fc1_w', graph.eval('fc1_whc'), 'FC', True),
  #     ('fc1_b', graph.eval('fc1_b'), 'FC', False)])

  #  write_header('fc2_sparse', [
  #     ('fc2_w', graph.eval('fc2_wt'), 'FC', True),
  #     ('fc2_b', graph.eval('fc2_b'), 'FC', False)])


if __name__ == '__main__':
   main()

