In [5]:
# @title Setup
# !pip install --upgrade pip
# !pip install --upgrade git+https://github.com/google/flax.git
# !pip install --upgrade "jax[cuda]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html

In [13]:
#@title Imports { form-width: "10%" }

from typing import Callable, Any, Optional
import flax
import flax.training.common_utils
import flax.linen as nn
import jax
import jax.numpy as jnp
import numpy as np
import optax
import pdb

from absl import logging
from flax.training import train_state
from jax import lax
from matplotlib import pyplot as plt

import tensorflow as tf
import functools
import os
import time

logging.set_verbosity(logging.INFO)
print(jax.devices())
print(jax.default_backend())

[CpuDevice(id=0)]
cpu


In [58]:
DATA_PATH = 'data.npz'

with open(DATA_PATH, 'rb') as f:
  data = np.load(f)
  RAW_DATA = data["arr_0"]

# D_RAW_DATA = RAW_DATA[:,1:,:] - RAW_DATA[:,:-1,:]
# D_RAW_DATA = np.pad(D_RAW_DATA, [(0,0), (1,0), (0,0)])

print(RAW_DATA.shape)

(200, 1001, 2)


In [57]:
MAX_LEN = 1024
TRAIN_SIZE = 200 - 64
PAD_VALUE = -1e10
is_pad = lambda x : np.isclose(x, PAD_VALUE)

def get_datasets_np(data=RAW_DATA, max_len=MAX_LEN, train_size=TRAIN_SIZE):
  b, l, _ = data.shape
  assert max_len >= l
  data = np.pad(data, ([0,0], [0, max_len-l], [0,0]))

  return {
      'x': data[:TRAIN_SIZE, :MAX_LEN, 0][...,None],
      'y': data[:TRAIN_SIZE, :MAX_LEN, 1][...,None],
  }, {
      'x': data[TRAIN_SIZE:, :MAX_LEN, 0][...,None],
      'y': data[TRAIN_SIZE:, :MAX_LEN, 1][...,None],
  }

def get_datasets_tf(batch_size):
  train_np, test_np = get_datasets_np()
  train_ds = tf.data.Dataset.from_tensor_slices(train_np)
  train_ds = train_ds.cache()
  train_ds = train_ds.repeat(None)
  train_ds = train_ds.batch(batch_size)
  train_ds = train_ds.prefetch(4)
  eval_ds = tf.data.Dataset.from_tensor_slices(test_np)
  eval_ds = eval_ds.cache()
  eval_ds = eval_ds.repeat(1)
  eval_ds = eval_ds.batch(batch_size)
  eval_ds = eval_ds.prefetch(4)
  return train_ds, eval_ds


def create_learning_rate_scheduler(
    factors='constant * linear_warmup * rsqrt_decay',
    base_learning_rate=0.5,
    warmup_steps=8000,
    decay_factor=0.5,
    steps_per_decay=20000,
    steps_per_cycle=100000):
  """creates learning rate schedule.
  Interprets factors in the factors string which can consist of:
  * constant: interpreted as the constant value,
  * linear_warmup: interpreted as linear warmup until warmup_steps,
  * rsqrt_decay: divide by square root of max(step, warmup_steps)
  * decay_every: Every k steps decay the learning rate by decay_factor.
  * cosine_decay: Cyclic cosine decay, uses steps_per_cycle parameter.
  Args:
    factors: a string with factors separated by '*' that defines the schedule.
    base_learning_rate: float, the starting constant for the lr schedule.
    warmup_steps: how many steps to warm up for in the warmup schedule.
    decay_factor: The amount to decay the learning rate by.
    steps_per_decay: How often to decay the learning rate.
    steps_per_cycle: Steps per cycle when using cosine decay.
  Returns:
    a function learning_rate(step): float -> {'learning_rate': float}, the
    step-dependent lr.
  """
  factors = [n.strip() for n in factors.split('*')]

  def step_fn(step):
    """Step to learning rate functio."""
    ret = 1.0
    for name in factors:
      if name == 'constant':
        ret *= base_learning_rate
      elif name == 'linear_warmup':
        ret *= jnp.minimum(1.0, step / warmup_steps)
      elif name == 'rsqrt_decay':
        ret /= jnp.sqrt(jnp.maximum(step, warmup_steps))
      elif name == 'rsqrt_normalized_decay':
        ret *= jnp.sqrt(warmup_steps)
        ret /= jnp.sqrt(jnp.maximum(step, warmup_steps))
      elif name == 'decay_every':
        ret *= (decay_factor**(step // steps_per_decay))
      elif name == 'cosine_decay':
        progress = jnp.maximum(0.0,
                               (step - warmup_steps) / float(steps_per_cycle))
        ret *= jnp.maximum(0.0,
                           0.5 * (1.0 + jnp.cos(jnp.pi * (progress % 1.0))))
      else:
        raise ValueError('Unknown factor %s.' % name)
    return ret

  return step_fn


In [47]:
@flax.struct.dataclass
class TransformerConfig:
  """Global hyperparameters used to minimize kwarg plumbing."""
  output_size: int = 1
  max_len: int = MAX_LEN
  num_layers: int = 2
  hidden_dim: int = 16
  mlp_dim: int = 64
  num_heads: int = 4
  dropout_rate: float = 0.0
  attention_dropout_rate: float = 0.0
  deterministic: bool = False
  decode: bool = False
  causal_x: bool = True
  physics_decoder: bool = False
  kernel_init: Callable = nn.initializers.xavier_uniform()
  bias_init: Callable = nn.initializers.normal(stddev=1e-6)
  posemb_init: Callable = nn.initializers.normal(stddev=0.02)


def shift_right(x, axis=1):
  """Shift the input to the right by padding and slicing on axis."""
  pad_widths = [(0, 0)] * len(x.shape)
  pad_widths[axis] = (1, 0)
  padded = jnp.pad(
      x, pad_widths, mode='constant', constant_values=x.dtype.type(0))
  return lax.dynamic_slice_in_dim(padded, 0, padded.shape[axis] - 1, axis)



class AddPositionEmbs(nn.Module):
  """Adds (optionally learned) positional embeddings to the inputs.

  Attributes:
    config: TransformerConfig dataclass containing hyperparameters.
  """
  config: TransformerConfig

  @nn.compact
  def __call__(self, inputs):
    """Applies AddPositionEmbs module.

    By default this layer uses a fixed sinusoidal embedding table. If a
    learned position embedding is desired, pass an initializer to
    posemb_init in the configuration.

    Args:
      inputs: input data.
    Returns:
      output: `(bs, timesteps, in_dim)`
    """
    cfg = self.config
    # inputs.shape is (batch_size, seq_len, hidden_dim)
    assert inputs.ndim == 3, ('Number of dimensions should be 3,'
                              ' but it is: %d' % inputs.ndim)
    length = inputs.shape[1]
    pos_emb_shape = (1, cfg.max_len, inputs.shape[-1])
    pos_embedding = self.param('pos_embedding',
                               nn.initializers.normal(stddev=0.02),
                               pos_emb_shape)
    return inputs + pos_embedding[:, :length, :]


class MlpBlock(nn.Module):
  """Transformer MLP / feed-forward block.
  Attributes:
    config: TransformerConfig dataclass containing hyperparameters.
    out_dim: optionally specify out dimension.
  """
  config: TransformerConfig
  out_dim: Optional[int] = None

  @nn.compact
  def __call__(self, inputs, deterministic=True):
    """Applies Transformer MlpBlock module."""
    cfg = self.config
    actual_out_dim = (inputs.shape[-1] if self.out_dim is None
                      else self.out_dim)
    x = nn.Dense(cfg.mlp_dim,
                   kernel_init=cfg.kernel_init,
                   bias_init=cfg.bias_init)(inputs)
    x = nn.relu(x)
    x = nn.Dropout(rate=cfg.dropout_rate)(x, deterministic=deterministic)
    output = nn.Dense(actual_out_dim,
                         kernel_init=cfg.kernel_init,
                         bias_init=cfg.bias_init)(x)
    output = nn.Dropout(rate=cfg.dropout_rate)(
        output, deterministic=deterministic)
    return output


class EncoderDecoder1DBlock(nn.Module):
  """Transformer encoder-decoder layer.

  Args:
    config: TransformerConfig dataclass containing hyperparameters.
  """
  config: TransformerConfig

  @nn.compact
  def __call__(self,
               inputs,
               deterministic,
               decoder_mask=None):
    """Applies EncoderDecoder1DBlock module.

    Args:
      inputs: input data for decoder
      decoder_mask: decoder self-attention mask.

    Returns:
      output after transformer encoder-decoder block.
    """
    cfg = self.config

    # Decoder block.
    assert inputs.ndim == 3, ('Number of dimensions should be 3,'
                              ' but it is: %d' % inputs.ndim)
    x = nn.LayerNorm()(inputs)
    x = nn.SelfAttention(
        num_heads=cfg.num_heads,
        qkv_features=cfg.hidden_dim,
        kernel_init=cfg.kernel_init,
        bias_init=cfg.bias_init,
        use_bias=False,
        broadcast_dropout=False,
        dropout_rate=cfg.attention_dropout_rate,
        deterministic=deterministic)(x, mask=decoder_mask)
    x = nn.Dropout(rate=cfg.dropout_rate)(
        x, deterministic=deterministic)
    x = x + inputs

    # MLP block.
    z = nn.LayerNorm()(x)
    z = MlpBlock(config=cfg)(z)

    return x + z


class PhysicsDecoder(nn.Module):
  """Transformer Model Decoder for sequence to sequence translation.

  Args:
    config: TransformerConfig dataclass containing hyperparameters.
  """
  config: TransformerConfig

  @nn.compact
  def __call__(self,
               inputs,
               deterministic,
               decoder_mask=None):
    """Applies Transformer model on the inputs.

    Args:
      encoded: encoded input data from encoder.
      inputs: input data.
      decoder_mask: decoder self-attention mask.

    Returns:
      output of a transformer decoder.
    """
    x = inputs['x']
    dx = build_deltas(x)

    cfg = self.config
    assert cfg.alpha == 0.0

    x = nn.Dense(
        cfg.hidden_dim // 2,
        kernel_init=cfg.kernel_init,
        bias_init=cfg.bias_init,
        name='embed_x')(x)
    dx = nn.Dense(
        cfg.hidden_dim // 2,
        kernel_init=cfg.kernel_init,
        bias_init=cfg.bias_init,
        name='embed_dx')(dx)
    x = jnp.concatenate([x, dx], axis=-1)
    assert x.shape[-1] == cfg.hidden_dim, f"{x.shape[-1]} != {cfg.hidden_dim}"

    x = nn.Dropout(rate=cfg.dropout_rate)(x, deterministic=deterministic)
    x = AddPositionEmbs(config=cfg, name='posembed_output')(x)
    x = nn.Dropout(rate=cfg.dropout_rate)(x, deterministic=deterministic)

    # Target-Input Decoder
    for lyr in range(cfg.num_layers):
      x = EncoderDecoder1DBlock(
          config=cfg, name=f'encoderdecoderblock_{lyr}')(
              x,
              deterministic=deterministic,
              decoder_mask=decoder_mask)
    alphas = nn.LayerNorm(name='alphas')(x)

    dfdx = jax.jacfwd(self.f, argnums=0)
    dfdalpha = jax.jacfwd(self.f, argnums=1)

    x = dfdx(inputs['x'], alphas, cfg)
    x = jnp.einsum('abcdbf->abf', x)
    dfda = dfdalpha(inputs['x'], alphas, cfg)  # [B,S,C]
    dfda = jnp.einsum('abcdbf->abf', dfda)

    delta_a = alphas[:,1:,:] - alphas[:,:-1,:]  # [B,S-1,C]
    first_delta_a = jnp.zeros([delta_a.shape[0], 1, delta_a.shape[-1]])
    delta_a = jnp.concatenate([first_delta_a, delta_a], axis=1)  # [B,S-1,C]
    aux = - jnp.sum(dfda * delta_a, axis=-1)  # [B,S]

    # prediction (y/sigma), aux value > 0
    return x, jnp.zeros_like(x), aux

  def f(self, x, alphas, cfg):
    """alphas [B,S,C], x [X,S,1]"""
    x = jnp.concatenate([alphas, x], axis=-1)
    for _ in range(2):
      x = nn.Dense(
          16,
          kernel_init=cfg.kernel_init,
          bias_init=cfg.bias_init)(x)
      x = nn.LayerNorm()(x)
    x = nn.Dense(
          1,
          kernel_init=cfg.kernel_init,
          bias_init=cfg.bias_init)(x)
    return x



class Decoder(nn.Module):
  """Transformer Model Decoder for sequence to sequence translation.

  Args:
    config: TransformerConfig dataclass containing hyperparameters.
  """
  config: TransformerConfig

  @nn.compact
  def __call__(self,
               inputs,
               deterministic,
               decoder_mask=None):
    """Applies Transformer model on the inputs.

    Args:
      encoded: encoded input data from encoder.
      inputs: input data.
      decoder_mask: decoder self-attention mask.

    Returns:
      output of a transformer decoder.
    """
    x = inputs['x']
    dx = build_deltas(x)
    # y = inputs['y']

    cfg = self.config
    # if not cfg.decode:
      # y = shift_right(y)

    x = nn.Dense(
        cfg.hidden_dim // 2,
        kernel_init=cfg.kernel_init,
        bias_init=cfg.bias_init,
        name='embed_x')(x)
    dx = nn.Dense(
        cfg.hidden_dim // 2,
        kernel_init=cfg.kernel_init,
        bias_init=cfg.bias_init,
        name='embed_dx')(dx)
    x = jnp.concatenate([x, dx], axis=-1)
    assert x.shape[-1] == cfg.hidden_dim, f"{x.shape[-1]} != {cfg.hidden_dim}"

    x = nn.Dropout(rate=cfg.dropout_rate)(x, deterministic=deterministic)
    x = AddPositionEmbs(config=cfg, name='posembed_output')(x)
    x = nn.Dropout(rate=cfg.dropout_rate)(x, deterministic=deterministic)

    # Target-Input Decoder
    for lyr in range(cfg.num_layers):
      x = EncoderDecoder1DBlock(
          config=cfg, name=f'encoderdecoderblock_{lyr}')(
              x,
              deterministic=deterministic,
              decoder_mask=decoder_mask)
    x = nn.LayerNorm(name='encoderdecoder_norm')(x)
    logits_x = nn.Dense(
        cfg.output_size,
        kernel_init=cfg.kernel_init,
        bias_init=cfg.bias_init,
        name='logits_x')(x)
    logits_dx = nn.Dense(
        cfg.output_size,
        kernel_init=cfg.kernel_init,
        bias_init=cfg.bias_init,
        name='logits_dx')(x)
    return logits_x, logits_dx, None


class Transformer(nn.Module):
  """Transformer pure decoder stack for language modelling.

  Args:
    config: TransformerConfig dataclass containing hyperparameters.
  """
  config: TransformerConfig

  @nn.compact
  def __call__(self,
               inputs,
               train):
    """Applies Transformer on the inputs.

    N.b. does not support masking for incomplete sequences.

    Args:
      inputs: target data.

    Returns:
      logits array from transformer decoder.
    """
    assert inputs['x'].ndim == 3  # (batch, len, channels)
    assert inputs['y'].ndim == 3  # (batch, len, channels)
    assert inputs['x'].shape[1] == inputs['y'].shape[1]

    cfg = self.config
    decoder_mask = None
    if cfg.causal_x:
      decoder_mask = nn.make_causal_mask(inputs['x'][...,0])
    if cfg.physics_decoder:
      logits_x, logits_dx, aux = PhysicsDecoder(config=cfg, name='decoder')(
          inputs,
          deterministic=not train,
          decoder_mask=decoder_mask)
    else:
      logits_x, logits_dx, aux = Decoder(config=cfg, name='decoder')(
          inputs,
          deterministic=not train,
          decoder_mask=decoder_mask)

    return logits_x, logits_dx, aux


In [48]:
def compute_l2(predictions, targets, padding):
  """Compute weighted cross entropy and entropy for log probs and targets.
  Args:
   predictions: [batch, length, dim] float array.
   targets: categorical targets [batch, length, dim] float array.
   padding: [batch, length] padding mask (1=padding)
  Returns:
    Tuple of scalar loss and batch normalizing factor.
  """
  if predictions.ndim != targets.ndim :
    raise ValueError('Incorrect shapes. Got shape %s predictions and %s targets' %
                     (str(predictions.shape), str(targets.shape)))

  predictions = predictions * (1 - padding[...,None])
  targets = targets * (1 - padding[...,None])
  loss = jnp.sum((predictions - targets) ** 2, axis=-1)  # Sum over channels
  return loss.mean()   # Per-timestep average loss.


def compute_hinge(values):
  """Compute hinge loss.
  Args:
   predictions: [batch, length] float array.
  Returns:
    Loss
  """
  assert len(values.shape) == 2, f"{len(values.shape)} != 2"
  loss = jnp.clip(values, a_min=0)
  return loss.mean()   # Mean over time and batch.


def compute_loss(p1, p2, aux, labels1, labels2, pad, alpha=0.0, aux_loss_weight=0.0):
  l1 = compute_l2(p1, labels1, pad)
  l2 = compute_l2(p2, labels2, pad)
  l2_loss = (1 - alpha) * l1 + alpha * l2
  aux_loss = 0.0
  if aux is not None:
    aux_loss = compute_hinge(aux)
  else:
    assert aux_loss_weight == 0.0
  return l2_loss + aux_loss_weight * aux_loss, l2_loss, aux_loss


def compute_metrics(p1, p2, aux, labels1, labels2, pad, alpha, aux_loss_weight):
  """Compute summary metrics."""
  loss, l2, aux = compute_loss(p1, p2, aux, labels1, labels2, pad, alpha, aux_loss_weight)
  metrics ={
      'loss': loss,
      'l2_loss': l2,
      'aux_loss': aux,
  }
  metrics = jax.lax.psum(metrics, axis_name="batch")
  return metrics


def build_deltas(x):
  dx = x[:, 1:, :] - x[:, :-1, :]
  b, _, c = x.shape
  first_dx = jnp.zeros(shape=(b, 1, c), dtype=jnp.float32)
  dx = jnp.concatenate([first_dx, dx], axis=1)
  return dx


def train_step(state, inputs, model, learning_rate_fn, alpha, aux_loss_weight,
               dropout_rng=None):
  """Perform a single training step."""

  dropout_rng = jax.random.fold_in(dropout_rng, state.step)
  y = inputs['y']
  dy = build_deltas(y)
  pad = inputs['pad']

  def loss_fn(params):
    """loss function used for training."""
    py, pdy, aux = model.apply(
        {'params': params},
        inputs,
        train=True,
        rngs={"dropout": dropout_rng})
    loss = compute_loss(py, pdy, aux, y, dy, pad, alpha, aux_loss_weight)[0]
    return loss, (py, pdy, aux)

  step = state.step
  lr = learning_rate_fn(step)
  grad_fn = jax.value_and_grad(loss_fn, has_aux=True)
  aux, grads = grad_fn(state.params)
  _, (p1, p2, aux) = aux
  grads = jax.lax.pmean(grads, 'batch')
  new_state = state.apply_gradients(grads=grads)

  metrics = compute_metrics(p1, p2, aux, y, dy, pad, alpha, aux_loss_weight)
  metrics['learning_rate'] = lr

  return new_state, metrics


def eval_step(params, inputs, model, alpha, aux_loss_weight):
  """Calculate evaluation metrics on a inputs."""
  y = inputs['y']
  dy = build_deltas(y)
  pad = inputs['pad']
  py, pdy, aux = model.apply({'params': params}, inputs, train=False)
  return compute_metrics(py, pdy, aux, y, dy, pad, alpha, aux_loss_weight)


# Call a jitted initialization function to get the initial parameter tree.
@functools.partial(jax.jit, static_argnums=[0, 1])
def initialize_variables(config, model, init_rng):
  example = jnp.ones((1, config.max_len, 1), jnp.float32)
  init_batch = {'x': example, 'y': example}
  init_variables = model.init(init_rng, inputs=init_batch, train=False)
  return init_variables

In [49]:
hparams = {}

# Setup
hparams['model_dir'] = '/tmp/test'

# Model params
hparams['physics_decoder'] = False
hparams['max_len'] = MAX_LEN
hparams['num_layers'] = 4
hparams['hidden_dim'] = 16
hparams['mlp_dim'] = 4 * hparams['hidden_dim']
hparams['num_heads'] = 2
hparams['dropout_rate'] = 0.0
hparams['attention_dropout_rate'] = 0.0
hparams['alpha'] = 0.0
hparams['aux_loss_weight'] = 0.0
hparams['causal_x'] = True

# Training params
hparams['batch_size'] = 16
hparams['learning_rate'] = 0.01
hparams['num_train_steps'] = 1001
hparams['eval_freq'] = 100
hparams['weight_decay'] = 0.0
hparams['random_seed'] = 0

# Make sure tf does not allocate gpu memory.
tf.config.experimental.set_visible_devices([], 'GPU')

if hparams['batch_size'] % jax.device_count() > 0:
  raise ValueError('Batch size must be divisible by the number of devices')
device_batch_size = hparams['batch_size'] // jax.device_count()

In [50]:
train_ds, eval_ds = get_datasets_tf(batch_size=hparams['batch_size'])
train_iter = iter(train_ds)

config = TransformerConfig(
    max_len=hparams['max_len'],
    num_layers=hparams['num_layers'],
    hidden_dim=hparams['hidden_dim'],
    mlp_dim=hparams['mlp_dim'],
    num_heads=hparams['num_heads'],
    dropout_rate=hparams['dropout_rate'],
    attention_dropout_rate=hparams['attention_dropout_rate'],
    causal_x=hparams['causal_x'],
    physics_decoder=hparams['physics_decoder'],
)

rng = jax.random.PRNGKey(hparams['random_seed'])
rng, init_rng = jax.random.split(rng)

model = Transformer(config)
init_variables = initialize_variables(config, model, init_rng)

learning_rate_fn = create_learning_rate_scheduler(
    base_learning_rate=hparams['learning_rate'])

optimizer = optax.adamw(
    learning_rate_fn, b1=0.9, b2=0.98, eps=1e-9,
    weight_decay=1e-1)
state = train_state.TrainState.create(
    apply_fn=model.apply,
    params=init_variables["params"],
    tx=optimizer)
state = flax.jax_utils.replicate(state)

p_train_step = jax.pmap(
    functools.partial(
        train_step,
        model=model,
        learning_rate_fn=learning_rate_fn,
        alpha=hparams['alpha'],
        aux_loss_weight=hparams['aux_loss_weight']),
    axis_name='batch',
    donate_argnums=(0,))  # pytype: disable=wrong-arg-types
p_eval_step = jax.pmap(
    functools.partial(
        eval_step,
        model=model,
        alpha=hparams['alpha'],
        aux_loss_weight=hparams['aux_loss_weight']),
    axis_name='batch')

# We init the first set of dropout PRNG keys, but update it afterwards inside
# the main pmap'd training update for performance.
dropout_rngs = jax.random.split(rng, jax.local_device_count())
del rng

metrics_all = []
tick = time.time()
best_dev_score = 1e6
for step, batch in zip(range(hparams['num_train_steps']), train_iter):
  batch = flax.training.common_utils.shard(
      jax.tree.map(lambda x: x._numpy(), batch))  # pylint: disable=protected-access

  state, metrics = p_train_step(state, batch, dropout_rng=dropout_rngs)
  metrics_all.append(metrics)

  if step % hparams['eval_freq'] == 0:
    metrics_all = flax.training.common_utils.get_metrics(metrics_all)
    lr = metrics_all.pop('learning_rate').mean()
    summary = jax.tree.map(jnp.mean, metrics_all)
    summary['learning_rate'] = lr
    metrics_all = []

    if jax.process_index() == 0:
      tock = time.time()
      steps_per_sec = hparams['eval_freq'] / (tock - tick)
      logging.info('Steps per second: %.1f', steps_per_sec)
      tick = tock

    eval_metrics = []
    eval_iter = iter(eval_ds)
    for i, eval_batch in enumerate(eval_iter):
      eval_batch = jax.tree.map(lambda x: x._numpy(), eval_batch)  # pylint: disable=protected-access
      # TODO: Handle final odd-sized batch by padding instead of dropping it.
      cur_pred_batch_size = eval_batch['x'].shape[0]
      if cur_pred_batch_size != hparams['batch_size']:
        continue
      eval_batch = flax.training.common_utils.shard(eval_batch)

      metrics_from_eval = p_eval_step(state.params, eval_batch)
      eval_metrics.append(metrics_from_eval)
    eval_metrics = flax.training.common_utils.get_metrics(eval_metrics)
    eval_summary = jax.tree.map(jnp.mean, eval_metrics)

    print('Step: %04d,\ttrain loss %0.4f,\ttrain l2 %0.4f,\ttrain aux %0.4f,\teval loss %0.4f,\teval l2 %0.4f,\teval aux %0.4f,\tsteps/s %0.1f' % (
        step,
        summary['loss'],
        summary['l2_loss'],
        summary['aux_loss'],
        eval_summary['loss'],
        eval_summary['l2_loss'],
        eval_summary['aux_loss'],
        steps_per_sec))

    if best_dev_score > eval_summary['loss']:
      best_dev_score = eval_summary['loss']
      best_state = state

    eval_summary['best_dev_score'] = best_dev_score

ValueError: object __array__ method not producing an array

In [59]:
train_np, test_np = get_datasets_np()
train_ds = tf.data.Dataset.from_tensor_slices(train_np)
# train_np

ValueError: object __array__ method not producing an array

In [64]:
import numpy as np
try:
    x_array = np.array(train_np['x'])
    print("Successfully converted to numpy array")
    print("Shape:", x_array.shape)
    print("Data type:", x_array.dtype)
except Exception as e:
    print("Error converting to numpy array:", e)

Successfully converted to numpy array
Shape: (136, 1024, 1)
Data type: float64


In [75]:
print("NumPy version:", np.__version__)
print("TensorFlow version:", tf.__version__)

NumPy version: 2.0.0
TensorFlow version: 2.17.0


In [76]:
new_array = np.array(train_np['x'])

In [77]:
tf_tensor = tf.convert_to_tensor(new_array)

ValueError: object __array__ method not producing an array

In [78]:
new_array = np.array(train_np['x'])
try:
    tf_tensor = tf.convert_to_tensor(new_array)
    print("Successfully converted new array to TensorFlow tensor")
except Exception as e:
    print("Error converting new array to TensorFlow tensor:", e)

Error converting new array to TensorFlow tensor: object __array__ method not producing an array


In [79]:
new_array_float32 = new_array.astype(np.float32)
try:
    tf_tensor = tf.convert_to_tensor(new_array_float32)
    print("Successfully converted float32 array to TensorFlow tensor")
except Exception as e:
    print("Error converting float32 array to TensorFlow tensor:", e)

Error converting float32 array to TensorFlow tensor: object __array__ method not producing an array


In [80]:
data = RAW_DATA.copy()
b, l, _ = data.shape
assert MAX_LEN >= l
data = np.pad(data, ([0,0], [0, MAX_LEN-l], [0,0]))

try:
    tf_tensor = tf.convert_to_tensor(data[:TRAIN_SIZE, :MAX_LEN, 0])
    print("Successfully converted slice of data to TensorFlow tensor")
except Exception as e:
    print("Error converting slice of data to TensorFlow tensor:", e)

try:
    tf_tensor = tf.convert_to_tensor(data[:TRAIN_SIZE, :MAX_LEN, 0][...,None])
    print("Successfully converted final reshaped data to TensorFlow tensor")
except Exception as e:
    print("Error converting final reshaped data to TensorFlow tensor:", e)

Error converting slice of data to TensorFlow tensor: object __array__ method not producing an array
Error converting final reshaped data to TensorFlow tensor: object __array__ method not producing an array


In [82]:
try:
    random_array = np.random.rand(*new_array.shape).astype(new_array.dtype)
    tf_tensor = tf.convert_to_tensor(random_array)
    print("Successfully converted random array to TensorFlow tensor")
except Exception as e:
    print("Error converting random array to TensorFlow tensor:", e)

Error converting random array to TensorFlow tensor: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject


In [83]:
import sys
import numpy as np
import tensorflow as tf

print("Python version:", sys.version)
print("NumPy version:", np.__version__)
print("TensorFlow version:", tf.__version__)

Python version: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]
NumPy version: 2.0.0
TensorFlow version: 2.17.0
