### Tensors flow from operation to operation (hence the name Tensorflow)

similar to Numpy ndarray, a tensor can also hold a scalar.

Tensors allows for a more custom solution 

In [1]:
import tensorflow as tf

t = tf.constant([[1.,2.,3.], [4.,5.,6.]])
t

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [2]:
t.shape

TensorShape([2, 3])

In [3]:
t.dtype

tf.float32

In [4]:
t[:, 1:]

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[2., 3.],
       [5., 6.]], dtype=float32)>

In [5]:
t[..., 1, tf.newaxis]

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[2.],
       [5.]], dtype=float32)>

Use `tf.reshape()` when you need to rearrange elements without adding or removing data.

Use `tf.newaxis` when you need to expand dimensions (e.g., add batch/channel axes).

In [9]:
t + 10

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[11., 12., 13.],
       [14., 15., 16.]], dtype=float32)>

In [10]:
tf.square(t)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 1.,  4.,  9.],
       [16., 25., 36.]], dtype=float32)>

In [11]:
t @ tf.transpose(t)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[14., 32.],
       [32., 77.]], dtype=float32)>

In [14]:
tf.constant(69)

<tf.Tensor: shape=(), dtype=int32, numpy=69>

numpy transpose `t.T` is different than `tf.transpose(t)` numpy modifies the same data and tensorflow returns a new one

In [15]:
import numpy as np

a = np.array([2.,4.,5.])
tf.constant(a)

<tf.Tensor: shape=(3,), dtype=float64, numpy=array([2., 4., 5.])>

In [16]:
t.numpy()

array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)

In [17]:
tf.square(a)

<tf.Tensor: shape=(3,), dtype=float64, numpy=array([ 4., 16., 25.])>

In [18]:
np.square(t)

array([[ 1.,  4.,  9.],
       [16., 25., 36.]], dtype=float32)

NumPy uses 64-bit precision by default, while TensorFlow uses 32-bit. 32-bit precision is generally more than enough for neural networks, plus it runs faster and uses less
RAM. When you create a tensor from a NumPy array, make sure to set dtype=tf.float32.

In [21]:
# These throws errors because we cannot convert datatyes
# tf.constant(2.) + tf.constant(40)
# tf.constant(2.) + tf.constant(40, dtype=tf.float64)

In [22]:
t2 = tf.constant(40., dtype=tf.float64)
tf.constant(2.0) + tf.cast(t2, tf.float32)

<tf.Tensor: shape=(), dtype=float32, numpy=42.0>

In [23]:
v = tf.Variable([[1.,2.,3.], [4.,5.,6.]])
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

TensorFlow Variables allows us to modify operations in place

In [24]:
v.assign(2 * v)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [25]:
v[0, 1].assign(69)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 69.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [26]:
v[:, 2].assign([0.,1.])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 69.,  0.],
       [ 8., 10.,  1.]], dtype=float32)>

In [27]:
v.scatter_nd_update(indices=[[0,0], [1,2]], updates=[100., 200.]) # updating slices

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[100.,  69.,   0.],
       [  8.,  10., 200.]], dtype=float32)>

In [28]:
# direct assignment won't work
# v[1] = [7.,8.,9.]

### Custom Loss function

used when you need something custom say when your data is noisy and MSE is not enough

`tf.keras.losses.Huber` is built in but here we can define our own

In [29]:
def huber_fn(y_true, y_pred):
    error = y_true - y_pred
    is_small_error = tf.abs(error) < 1
    squared_loss = tf.square(error) / 2
    linear_loss = tf.abs(error) - 0.5
    return tf.where(is_small_error, squared_loss, linear_loss)

its not recommended to return the mean loss as it makes it impossible to use class weights or sample weights when you need them

In [31]:
# model.compile(loss=huber_fn, optimizer="nadam")
# model.fit(X_train, y_train, [...])

In [32]:
# Loading a model containing custom objects
# model = tf.keras.models.load_model("my_model_with_a_custom_loss", custom_objects={"huber_fn": huber_fn})

any error between -1 and 1 is considered small, if we want a different threshold we can create a custom loss function

In [34]:
def create_huber(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < threshold
        squared_loss = tf.square(error) / 2
        linear_loss = threshold * tf.abs(error) - threshold ** 2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn

# model.compile(loss=create_huber(2.0), optimizer="nadam")

In [35]:
# loading the model with custom threshold
# model = tf.keras.models.load_model("my_model_loss_threshold_2", custom_objects={"huber_fn": create_huber(2.0)}

when saving a model Keras calls the loss instance `get_config()`

In [36]:
class HuberLoss(tf.keras.losses.Loss):
    def __init__(self, threshold=1.0, **kwargs):
        self.threshold = threshold
        super().__init__(**kwargs)

    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < threshold
        squared_loss = tf.square(error) / 2
        linear_loss = threshold * tf.abs(error) - threshold ** 2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}

# model.compile(loss=HuberLoss(2.), optimizer="nadam")
# model = tf.keras.models.load_model("model_custom_loss_class", custom_objects={"HuberLoss": HuberLoss})

### Custom activations, initializers, regularizers, and contraints

- activation
   softplus: equivalent to `tf.keras.acitivations.softplus()` or `tf.nn.softplus()`
- initializer
  glorot: equivalent to `tf.keras.initializers.glorot_normal()`
- regularizer
  L1: `tf.keras.regularizers.l1(0.01))`
- constraints
  weights: `tf.keras.contraints.nonneg()` or `tf.nn.relu()`

In [37]:
def my_softplus(z):
    return tf.math.log(1.0 + tf.exp(z))

def my_glorot_initializer(shape, dtype=tf.float32):
    stddev = tf.sqrt(2. / (shape[0] + shape[1]))
    return tf.random.normal(shape, stddev=stddev, dtype=dtype)

def my_l1_regularizer(weights):
    return tf.reduce_sum(tf.abs(0.01 * weights))

def my_positive_weights(weights):
    return tf.where(weights < 0., tf.zeros_like(weights), weights)

In [39]:
layer = tf.keras.layers.Dense(1, activation=my_softplus,
                              kernel_initializer=my_glorot_initializer,
                              kernel_regularizer=my_l1_regularizer,
                              kernel_constraint=my_positive_weights)

### hyperparams saved with model

subclass correctly from `Initializer, Layer, Constraint, Regulatizer`

In [40]:
class MyL1Regularizer(tf.keras.regularizers.Regularizer):
    def __init__(self, factor):
        self.factor = factor

    def __call__(self, weights):
        return tf.reduce_sum(tf.abs(self.factor * weights))

    def get_config(self):
        return {"factor": self.factor}

In [41]:
# custom metric:
# model.compile(loss="mse", optimizer="nadam", metrics=[create_huber(2.0)])

### Binary classifier precision

1. **First batch:**
   - **True Positives (TP)** = 4  
   - **False Positives (FP)** = 1  
   - **Precision** =  
     $$ \frac{TP}{TP + FP} = \frac{4}{5} = 80\% $$

2. **Second batch:**
   - **True Positives (TP)** = 0  
   - **False Positives (FP)** = 3  
   - **Precision** =  
     $$ \frac{0}{3} = 0\% $$

### Why the Mean is Wrong (Macro Averaging)
If you simply average the two batch precisions:

$$
\frac{80\% + 0\%}{2} = 40\%
$$

This is **macro averaging**, which treats both batches equally regardless of the number of predictions in each.

### Why 50% is Correct (Micro Averaging)
The correct way to calculate **overall precision** is to sum up all **true positives** and divide by all **positive predictions** across both batches:

$$
\frac{\text{Total True Positives}}{\text{Total Positive Predictions}} = \frac{4 + 0}{5 + 3} = \frac{4}{8} = 50\%
$$

This is **micro averaging**, which takes into account the actual number of predictions.

### Key Takeaway
- **Macro averaging** (simple mean) can be misleading because it gives equal weight to each batch, regardless of size.
- **Micro averaging** (overall TP / overall predictions) is usually the right way to calculate precision when looking at the model’s actual performance.

That's why the correct answer is **50%**, not **40%**!

In [42]:
# Micro averaging
precision = tf.keras.metrics.Precision()
precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1])

<tf.Tensor: shape=(), dtype=float32, numpy=0.800000011920929>

In [43]:
precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0])

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

^ This is overall percision not just the second batch! (streaming metric, stateful metric) updated batch after batch

In [44]:
precision.result()

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

In [45]:
precision.variables

[<Variable path=precision/true_positives, shape=(1,), dtype=float32, value=[4.]>,
 <Variable path=precision/false_positives, shape=(1,), dtype=float32, value=[4.]>]

In [47]:
precision.reset_state()

In [48]:
# custom streaming metric
class HuberMetric(tf.keras.metrics.Metric):
    def __init__(self, threshold=1.0, **kwargs):
        super().__init__(**kwargs)
        self.threshold = threshold
        self.huber_fn = create_huber(threshold)
        self.total = self.add_weight("total", initializer="zeros")
        self.count = self.add_weight("count", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        sample_metrics = self.huber_fn(y_true, y_pred)
        self.total.assign_add(tf.reduce_sum(sample_metrics))
        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))

    def result(self):
        return self.total / self.count

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}

^ Keras will take care of variable persistence no action is required.

#### layers with no weights `tf.keras.layers.Flatten`
#### layers with no weights `tf.keras.layers.ReLU`

custom layer without weights:

In [50]:
# activation="exponential"
exponential_layer = tf.keras.layers.Lambda(lambda x: tf.exp(x))

In [51]:
# custom stateful layer (simplified Dense layer)
class MyDense(tf.keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = tf.keras.activations.get(activation)

    def build(self, batch_input_shape):
        self.kernel = self.add_weight(name="kernel", shape=[batch_input_shape[-1], self.units],
                                      initializer="glorot_normal")
        self.bias = self.add_weight(name="bias", shape=[self.units], initializer="zeros")

    def call(self, X):
        return self.activation(X @ self.kernel + self.bias)

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "units": self.units, "activation": tf.keras.activations.serialize(self.activation)}

In [52]:
# A layer that adds Gaussian noise during trianing but nothing for testing
# much like tf.keras.layers.GaussianNoise
class MyGaussianNoise(tf.keras.layers.Layer):
    def __init__(self, stddev, **kwargs):
        super().__init__(**kwargs)
        self.stddev = stddev

    def call(self, X, training=False):
        if training:
            noise = tf.random.normal(tf.shape(X), stddev=self.stddev)
            return X + noise
        else:
            return X

Custom Model:

creating identical blocks, its more effecient to create the layers needed for the model separately. Now we have the tools to create any model we need w/ Sequential

In [54]:
class ResidualBlock(tf.keras.layers.Layer):
    def __init__(self, n_layers, n_neurons, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [tf.keras.layers.Dense(
            n_neurons, activation="relu", kernel_initializer="he_normal"
        ) for _ in range(n_layers)]

    def call(self, inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        return inputs + Z

In [55]:
class ResidualRegressor(tf.keras.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden1 = tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal")
        self.block1 = ResidualBlock(2, 30)
        self.block2 = ResidualBlock(2, 30)
        self.out = tf.keras.layers.Dense(output_dim)

    def call(self, inputs):
        Z = self.hidden1(inputs)
        for _ in range(1 + 3):
            Z = self.block1(Z)
        Z = self.block2(Z)
        return self.out(Z)

Custom reconstruction loss (mean squared difference between the reconstruction and inputs) adding reconstruction loss to main loss

In [56]:
class ReconstructingRegressor(tf.keras.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal")
                       for _ in range(5)]
        self.out = tf.keras.layers.Dense(output_dim)
        self.reconstruction_mean = tf.keras.metrics.Mean(name="reconstruction_error")
        
    def build(self, batch_input_shape):
        n_inputs = batch_input_shape[-1]
        self.reconstruct = tf.keras.layers.Dense(n_inputs)

    def call(self, inputs, training=False):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        reconstruction = self.reconstruct(Z)
        recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))
        self.add_loss(0.05 * recon_loss)
        if training:
            result = self.reconstruction_mean(recon_loss)
            self.add_metric(result)
        return self.out(Z)