<a href="https://colab.research.google.com/github/rawatinder1/learning_tensorFlow/blob/main/00_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## In this noteBook , we are going to cover some of the most fundamental concepts of tensors using TensorFlow.

## Introduction to Tensors

In [None]:
# import TensorFlow
import tensorflow as tf
print(tf.__version__);

In [None]:
# Create tensors with tf.constant()
scalar = tf.constant(7)
scalar

In [None]:
import tensorflow as tf

#  1. Scalars (0D tensor)
scalar = tf.constant(7)
print("Scalar:")
print(scalar)
print("Shape:", scalar.shape, "Rank:", tf.rank(scalar).numpy())
print("-" * 40)

#  2. Vector (1D tensor)
vector = tf.constant([1, 2, 3])
print("Vector:")
print(vector)
print("Shape:", vector.shape, "Rank:", tf.rank(vector).numpy())
print("-" * 40)

#  3. Matrix (2D tensor)
matrix = tf.constant([[1, 2, 3],
                      [4, 5, 6]])
print("Matrix:")
print(matrix)
print("Shape:", matrix.shape, "Rank:", tf.rank(matrix).numpy())
print("-" * 40)

#  4. 3D Tensor
tensor3d = tf.constant([[[1, 2], [3, 4]],
                        [[5, 6], [7, 8]]])
print("3D Tensor:")
print(tensor3d)
print("Shape:", tensor3d.shape, "Rank:", tf.rank(tensor3d).numpy())
print("-" * 40)

#  5. Example: image-like 4D tensor (batch, height, width, channels)
image_tensor = tf.random.uniform(shape=(4, 28, 28, 3))  # 4 RGB images, 28x28 each
print("4D Tensor (like images):")
print(image_tensor)
print("Shape:", image_tensor.shape, "Rank:", tf.rank(image_tensor).numpy())
print("-" * 40)

#  6. Tensor properties
print("Data type:", image_tensor.dtype)
print("Number of elements:", tf.size(image_tensor).numpy())

#  Summary
print("""
 Tensors are:
- Multi-dimensional arrays (scalars, vectors, matrices, higher-D)
- Defined by rank (number of dimensions) and shape
- Backbone of deep learning data & model weights
- GPU/TPU-compatible and support automatic differentiation
""")


In [None]:
# creating a multidimensional tensor;
tensor = tf.constant(7,shape=(3,2,3,3,2),
dtype=tf.float32)

print(tensor.ndim);
tensor

In [None]:
tensor1=tf.constant([6,7])
tensor2=tf.constant([9,8])
result=tensor1*tensor2;
print(result)

## creating tensors with `tf.variable()`

In [None]:
# tf.variable is the mutable counterpart of tf.constant..
#  tf.Variable(
#     initial_value,
#     trainable=True,
#     dtype=None,
#     shape=None,
#     name=None
# )
# Key parameters:

# initial_value – The starting value (can be a number, list, NumPy array, or another tensor).

# trainable (default=True) – If True, TensorFlow will update it during training (e.g., by gradient descent).

# dtype – Data type (inferred if not given).

# shape – Shape of the variable (inferred if not given).

# name – Optional name.

mat = tf.Variable(tf.ones((2, 3)) * 7, dtype=tf.float32)
mat[1,2].assign(52)
mat


In [None]:
import tensorflow as tf
changeable_tensor=tf.Variable(trainable=True,dtype=tf.float32,initial_value=tf.ones((2,3))*7)
changeable_tensor

In [None]:
g = tf.random.Generator.from_seed(42)

random_variable=g.truncated_normal(shape=(3,2),mean=5.0,stddev=1.0)
trainable_variable=tf.Variable(random_variable,trainable=True)
print(trainable_variable)

In [None]:
## they are different ways you can create random tensors some of them are:->
# using uniform distribution this will randomly generate numbers using a unifrm distribution or rectagular distribution the probability of selecting any number between a range [a,b] is fixed.
random_tensor=tf.random.Generator.from_seed(42); # set seed for reproducibility.
trainable_tensor=random_tensor.uniform(shape=(3,2),minval=0,maxval=100,dtype=tf.int32) # generate only int data type values for tensor;
print(trainable_tensor);
# using normal distributio or gaussian where probability is maximum around mean and decays exponentially in every dimensions that moves away from that mean value.
trainable_tensor2=random_tensor.normal(shape=(3,2),mean=5.0,stddev=1.0)
print(trainable_tensor2);
# using trancated normal thats same as gaussian but outputs are truncated if theu are away from mean by +- 2*stddev.
trainable_tensor3=random_tensor.truncated_normal(shape=(3,2),mean=5.0,stddev=1.0)
print(trainable_tensor3);


##shuffle order of elements in a tensor
> It looks like if you want the same order for your shuffled tensor you should set the global seed not the operation level seed


In [None]:
## shuffle a tensor(valuable for when you want to shuffle your data so that the inherent order doesnt effect learning )
# Shuffling the order of elements in a tensor is a valuable technique in machine learning for several reasons:

# Preventing bias: In machine learning, the order of data can sometimes influence the learning process, leading to biased models. Shuffling helps to break any inherent order or patterns in the data, ensuring that the model doesn't learn from the order itself.
# Improving generalization: By presenting data in a random order, shuffling helps the model to generalize better to unseen data. It prevents the model from memorizing the training data's order and encourages it to learn the underlying patterns.
# Ensuring independent and identically distributed (IID) data: Many machine learning algorithms assume that the data is IID. Shuffling helps to approximate this assumption by making the data points more independent of each other's position in the dataset.
# Better convergence: In some optimization algorithms used for training models, shuffling the data can lead to faster and more stable convergence.
# Essentially, shuffling helps to make the training process more robust and leads to better-performing models.

import tensorflow as tf
not_shuffled=tf.constant([[10,7],
                          [3,4],
                          [2,5]])
tf.random.set_seed(42); # global level seed
shuffled=tf.random.shuffle(not_shuffled ,seed=42) # operation level seed
print(shuffled)



 ## Turning numpy arrays into tensors
 > NumPy arrays and TensorFlow tensors are fundamental data structures. While they can look similar and you can convert between them (as shown in your notebook), there are some key differences:

 > GPU/TPU Acceleration: TensorFlow tensors can be seamlessly moved to and run on GPUs (Graphics Processing Units) or TPUs (Tensor Processing Units) for significant speedups in numerical computations, especially for large-scale deep learning tasks. NumPy operations primarily run on the CPU.


 > Automatic Differentiation: TensorFlow tensors are designed to work with TensorFlow's automatic differentiation system. This is crucial for training neural networks, where gradients need to be calculated efficiently. NumPy does not have built-in automatic differentiation capabilities.


 > Immutability vs. Mutability: By default, tf.constant tensors are immutable (their values cannot be changed after creation), whereas tf.Variable tensors are mutable. NumPy arrays are generally mutable.


 > Distributed Computing: TensorFlow is built to handle distributed computing, allowing you to run computations across multiple machines or devices. NumPy is primarily for single-machine processing.


 > Use Case: While NumPy is a general-purpose library for numerical computing and is excellent for tasks like data analysis and scientific computing, TensorFlow tensors are specifically designed and optimized for building and running deep learning models.


 > In summary, while you can perform similar operations on both, TensorFlow tensors are specifically engineered for the demands of deep learning, offering acceleration, automatic differentiation, and scalability that are not inherent in NumPy arrays.



In [None]:
import numpy as np
import tensorflow as tf
array=np.arange(1,25,dtype=np.int32)
print(array);
# try experimenting with shapes make sure dimproduct doesnt exceed the total number of
# elements in the numpy array.
A=tf.constant(array,shape=(2,3,2,2))
print("Dimensions of tensor A : " , A.ndim)
print(A)


## Getting information from our Tensor

In [None]:
# * Shape -> the length (number of elements) of each of the dimension of a tensor.
# * Rank  -> the number of tensor dimensions.(tensor.ndim)
# * Axis or Dimension -> A particular dimenion of a tensor
# * Size -> The total number of items in the tensor
import tensorflow as tf;
def getInfo(tensor):
  print(tensor)
  print("size : ", tf.size(tensor).numpy())
  print("Rank : ", tensor.ndim)
  print("Shape : ", tensor.shape)
  print("axis 0 : ", tensor[0]);
  print("number of elements on 0th axis",tensor.shape[0])
  print("number of elements on last axis",tensor.shape[-1])

tensor=tf.constant([[1,2],
                    [5,7],
                    [5,9]]);
getInfo(tensor)

  # The shape (3, 2) tells us that the tensor has 2 dimensions.
  # The first number in the shape, 3, represents the number of elements along the 0th axis (often thought of as rows).
  # The second number in the shape, 2, represents the number of elements along the 1st axis (often thought of as columns).
  # So, for your tensor tf.constant([[1,2],[5,7],[5,9]]):

  # Along the 0th axis, you have three "groups" or "rows": [1, 2], [5, 7], and [5, 9]. That's why there are 3 elements on the 0th axis.
  # Along the 1st axis, within each of those groups, you have two elements: 1 and 2, 5 and 7, 5 and 9. That's why there are 2 elements on the 1st axis.
  # The output "number of elements on 0th axis 3" is correct based on the tensor's shape of (3, 2).

  # Does that explanation help clarify how the shape relates to the number of elements on each axis?




## Indexing in Python Lists and TensorFlow Tensors

Both Python lists and TensorFlow tensors support indexing, which allows you to access individual elements or subsets of elements. While the syntax can be similar, it's important to understand the nuances of each.

**Python Lists:**

*   Lists are ordered collections of items.
*   Indexing is used to access elements based on their position (starting from 0).
*   You can use single indices, slicing (`[start:stop:step]`), and negative indices.

**TensorFlow Tensors:**

*   Tensors are multi-dimensional arrays.
*   Indexing is used to access elements or slices along each dimension.
*   Similar to NumPy arrays, you can use single indices, slicing, and advanced indexing techniques.
*   Key difference: When slicing tensors, the resulting tensor retains its rank unless you explicitly squeeze a dimension of size 1.

Let's look at some examples.

In [None]:
import tensorflow as tf

# Python List
my_list = [10, 20, 30, 40, 50]

# Accessing elements in a list
print("Python List Indexing:")
print("First element:", my_list[0])
print("Third element:", my_list[2])
print("Last element:", my_list[-1])
print("Slice from index 1 to 3:", my_list[1:4])
print("Every other element:", my_list[::2])

print("-" * 45)

# TensorFlow Tensor
my_tensor = tf.constant([[1, 2, 3],
                         [4, 5, 6],
                         [7, 8, 9]])

# Accessing elements in a tensor
print("TensorFlow Tensor Indexing:")
print("First row:", my_tensor[0])
print("Third row:", my_tensor[2])
print("Last row:", my_tensor[-1])
print("Element at row 1, column 2:", my_tensor[1, 2])
print("Slice of rows from 0 to 1:", my_tensor[0:2, :])
print("Slice of columns from 1 onwards:", my_tensor[:, 1:])
print("Element at row 0, column 0:", my_tensor[0, 0].numpy()) # Use .numpy() to get the scalar value

# Similarities:
# - Both use square brackets [] for indexing.
# - Both support slicing with start:stop:step.
# - Both support negative indexing to access elements from the end.

# Differences:
# - Tensors support multi-dimensional indexing (e.g., [row, column]).
# - Slicing a tensor generally retains the original number of dimensions (rank).

## Indexing in tensors

In [None]:
my_list=[2,2,5,7,2,10,4,2,4]
print(my_list[:4]);

## Expanding tensor and operations on tensors
> `+`,`-`,`*`,`\`





In [None]:
tensor_rank_4=tensor;
tensor_rank_5=tf.expand_dims(tensor_rank_4,axis=-1)
#tensor_rank_5=tensor[... , tf.newaxis]
#print(tensor_rank_5.shape)
print(tensor_rank_5)
print(tf.multiply(tensor,10))
print(tf.add(tensor,10000))


## Broadcasting in TensorFlow

TensorFlow uses broadcasting to perform element-wise operations on tensors with different, but compatible, shapes. This allows operations like addition and division without requiring the tensors to have the exact same shape and size.

When shapes are compatible, TensorFlow "stretches" or "copies" the smaller tensor along dimensions to match the shape of the larger tensor, without actually duplicating the data.

**Broadcasting Rules (simplified):**

1.  Compare shapes from right to left.
2.  Dimensions are compatible if they are equal or one of them is 1.
3.  If a dimension is missing in one tensor, it's padded with a dimension of size 1 on the left.
4.  If dimensions are incompatible, an error occurs.

**Examples:**

*   Adding a scalar to a tensor:

In [None]:
import tensorflow as tf
tensor_A = tf.constant([[1], [2], [3]]) # Shape (3, 1)
tensor_B = tf.constant([[10, 20, 30]]) # Shape (1, 3)
result = tensor_A + tensor_B         # Result shape (3, 3)
print(result)

In [None]:
import numpy as np
import tensorflow as tf
tensor_C=tf.constant(np.arange(1,13,dtype=np.int32),shape=(2,1,2,3));
tensor_D=tf.constant(np.arange(1,145,dtype=np.int32),shape=(3,2,4,3,2));

tensor_result=tf.matmul(tensor_C , tensor_D)
tensor_result

## Broadcasting with `tf.matmul`

When using `tf.matmul` to perform matrix multiplication on tensors, TensorFlow supports broadcasting not only on the last two dimensions (the matrices being multiplied) but also on the dimensions *before* the last two. These preceding dimensions are often referred to as "batch" dimensions.

This means you can perform matrix multiplication between tensors of different ranks, provided their shapes are compatible according to broadcasting rules for these batch dimensions, and the inner dimensions of the last two dimensions match for the matrix multiplication itself.

**How `tf.matmul` Broadcasting Works:**

1.  **Matrix Dimensions:** The last two dimensions of the tensors are treated as the matrices. For the operation to be valid, the inner dimensions of these matrices must match (the number of columns in the first matrix must equal the number of rows in the second matrix).
2.  **Batch Dimensions:** The dimensions before the last two are considered batch dimensions. TensorFlow applies standard broadcasting rules to these batch dimensions from left to right.
3.  **Result Shape:** The shape of the resulting tensor will have the broadcasted batch dimensions followed by the outer dimensions of the matrix multiplication result.

**Example:**

Consider `tensor_C` with shape `(2, 2, 2, 3)` (Rank 4) and `tensor_D` with shape `(1, 2, 1, 3, 2)` (Rank 5).

*   **Matrix Dimensions:** `(2, 3)` from `tensor_C` and `(3, 2)` from `tensor_D`. The inner dimensions (3 and 3) match. The result of the matrix multiplication on these dimensions will have shape `(2, 2)`.
*   **Batch Dimensions:** `(2, 2)` from `tensor_C` and `(1, 2, 1)` from `tensor_D`. Applying broadcasting rules:
    *   Dimension 0: `2` vs `1` -> Broadcasts to `2`
    *   Dimension 1: `2` vs `2` -> Compatible, result is `2`
    *   Dimension 2 (from tensor_D only): `1` -> Broadcasts to `1`
    *   The broadcasted batch shape is effectively `(2, 2, 1)`. This, combined with the matrix multiplication result shape `(2, 2)`, gives a final shape of `(2, 2, 1, 2, 2)`. The output shape `(1, 2, 2, 2, 2)` seen previously is likely due to the initial dimension of 1 in `tensor_D` influencing the broadcasted batch shape.

The final result shape combines the broadcasted batch dimensions and the matrix multiplication result shape.

This broadcasting capability in `tf.matmul` is powerful as it allows for efficient operations across batches of matrices without explicit loops.
👇

🧠 Note on tf.matmul broadcasting rules:

✅ Inner dimensions must match exactly:
The last dimension of A and the second-to-last of B must be equal.

If A is (..., m, n) and B is (..., n, p) → result is (..., m, p)

✅ Leading (batch) dimensions can broadcast, but only if they are either equal or 1.

Example: (1, m, n) × (b, n, p) → (b, m, p) ✅

Example: (b, m, n) × (b, n, p) → (b, m, p) ✅

❌ If any leading dimension is different and neither is 1, broadcasting fails.

Example: (b, m, n) × (c, n, p) → ❌ fails if b ≠ c and neither is 1.

✅ If B has no batch dimension (e.g., shape (n, p)), it is treated as shared across all batches.

Example: (b, m, n) × (n, p) → (b, m, p)



🧠 Note – Why tf.matmul Worked With (2,2,2,3) × (3,2,1,3,2)

Even though the shapes of the two tensors look incompatible, tf.matmul did not break its rules. Here’s why the operation succeeds:

Inner dimensions must match

Last two dims of tensor_C: (2, 3)

Last two dims of tensor_D: (3, 2)
✅ 3 == 3 → inner matmul is valid → (2, 3) × (3, 2) → (2, 2)

Batch dimensions can broadcast

tensor_C batch dims: (2, 2)

tensor_D batch dims: (3, 2, 1)

Aligning from right:

2 vs 1 → ✅ 1 broadcasts to 2

2 vs 2 → ✅ equal

(missing) vs 3 → ✅ missing dims treated as 1, then broadcast to 3

✅ Resulting batch shape: (3, 2, 2)

Final result shape
Combine broadcasted batch dims (3, 2, 2) with the matmul result (2, 2)
→ Final tensor shape: (3, 2, 2, 2, 2)

💡 Key Takeaways:

TensorFlow treats missing batch dimensions as 1, allowing broadcasting.

A 1 in any batch dimension makes a tensor more flexible — it can “expand” to match larger shapes.

The matmul rule ((..., m, n) × (..., n, p) → (..., m, p)) is always respected — this example just shows broadcasting at work.

In [None]:
import tensorflow as tf
import numpy as np
tensor_A=tf.constant(np.arange(1,49,dtype=np.int32),shape=(2,2,2,3,2))
#print(tensor_A)
tensor_B = tf.transpose(tensor_A, perm=[0, 1, 2, 4, 3])
# using perm you can specify exactly which axes you wanna reorder.

print(tensor_B.shape)


#print(tf.transpose(tensor_A))


## Tensor Transpose and Reshape in TensorFlow

Understanding `tf.transpose()` and `tf.reshape()` is crucial for manipulating tensor dimensions effectively. While both change the arrangement of a tensor's elements, they do so in fundamentally different ways.

### 1️⃣ `tf.transpose()` – Reordering Axes

*   **Purpose:** Reorders the axes (dimensions) of a tensor according to a specified permutation.
*   **Syntax:**

In [None]:
tf.transpose(tensor, perm=None)

*   `tensor`: the input tensor
*   `perm`: list specifying the desired order of axes
*   **Default behavior:** If `perm=None`, it reverses all axes.

### 2️⃣ Example – High-rank tensor

Let's use a tensor with shape `(2, 2, 2, 3, 2)` to illustrate:

In [None]:
import numpy as np
import tensorflow as tf

tensor_A = tf.constant(np.arange(1,49).reshape(2,2,2,3,2))
print("Original tensor shape:", tensor_A.shape)

Axes positions:

| Axis | Meaning                   |
| :--- | :------------------------ |
| 0    | batch dim 1               |
| 1    | batch dim 2               |
| 2    | batch dim 3               |
| 3    | rows of inner 2D matrix   |
| 4    | columns of inner 2D matrix|

Applying `tf.transpose()` with default `perm=None`:

In [None]:
tensor_B = tf.transpose(tensor_A)
print("Shape after default transpose:", tensor_B.shape)
# Default tf.transpose reversed all axes: (0,1,2,3,4) → (4,3,2,1,0)

This explains why the shape changed: the original axes `(0, 1, 2, 3, 4)` were reordered to `(4, 3, 2, 1, 0)`.

### 3️⃣ Flipping only the last two dimensions

Often, we want to transpose the inner 2D matrices without touching batch dimensions. Use the `perm` argument:

In [None]:
tensor_C = tf.transpose(tensor_A, perm=[0, 1, 2, 4, 3])
print("Shape after transposing last two axes:", tensor_C.shape)

Here, `perm=[0, 1, 2, 4, 3]` means:

*   `0, 1, 2`: The first three batch dimensions remain in their original positions.
*   `4, 3`: The last two dimensions (rows and columns of the inner matrix) are swapped.

✅ Only the 2D matrices (originally 3×2) are flipped to 2×3.

---

### 4️⃣ `tf.reshape()` – Changing shape without changing data

*   **Purpose:** Rearrange tensor into a new shape without altering the elements or their order in the flattened view.
*   **Syntax:**

*   `tensor`: the input tensor
*   `new_shape`: a list or tuple specifying the desired new shape. One dimension can be `-1`, which TensorFlow will calculate based on the total number of elements.
*   **Key rules:**
    *   The total number of elements must remain the same.
    *   `product of original shape` = `product of new shape`

*   Works independently of batch or matrix dimensions; it just repackages the existing elements into the new shape.

### 5️⃣ When to use `transpose` vs `reshape`

| Operation     | Purpose                         | Typical use                                     |
| :------------ | :------------------------------ | :---------------------------------------------- |
| `tf.transpose`| Reorder axes                    | Flip rows/columns of inner matrices, move batch dims |
| `tf.reshape`  | Change tensor shape             | Flatten, merge, or split dimensions while keeping data order |

### 6️⃣ Key Takeaways

*   Default `tf.transpose()` reverses all axes → may be confusing for high-rank tensors.
*   Use the `perm` argument with `tf.transpose()` for precise axis reordering.
*   `tf.reshape()` changes the tensor's view but not the underlying data order; the total number of elements must match the new shape.

## Tensor Transpose and Reshape in TensorFlow

Understanding `tf.transpose()` and `tf.reshape()` is crucial for manipulating tensor dimensions effectively. While both change the arrangement of a tensor's elements, they do so in fundamentally different ways.

### 1️⃣ `tf.transpose()` – Reordering Axes

*   **Purpose:** Reorders the axes (dimensions) of a tensor according to a specified permutation.
*   **Syntax:**

In [None]:
tensor_A = tf.constant(np.arange(1,49).reshape(2,2,2,3,2))
tensor_A.shape  # Output: (2, 2, 2, 3, 2)

In [None]:
tensor_B = tf.transpose(tensor_A)
tensor_B.shape  # Output: (2, 3, 2, 2, 2)

In [None]:
tensor_C = tf.transpose(tensor_A, perm=[0, 1, 2, 4, 3])
tensor_C.shape  # Output: (2, 2, 2, 2, 3)

Changing Datatype of tensors


In [None]:
import tensorflow as tf;
tensor_A=tf.constant([[1,2,3],
                      [4,5,6]],shape=(2,3),dtype=tf.int32);
tensor_B=tf.cast(tensor_A,dtype=tf.int16);
tensor_B
#cast float32->float16 or float16->int32 or int32->int16(16 bit) and so on

## Aggregating Tensors
` What “aggregating tensors” means `

> Aggregating a tensor means reducing multiple elements into one (or fewer) elements, usually by applying a mathematical operation across one or more axes.

In [None]:
import tensorflow as tf;
import tensorflow_probability as tfp;
tensor = tf.constant([[1, 2, 3],
                      [4, 5, 6]])

# Sum of all elements
sumA=tf.reduce_sum(tensor)  # 21

# Mean of all elements
sumB=tf.reduce_mean(tensor)  # 3.5

# Sum along rows (axis=1)
sumC=tf.reduce_sum(tensor, axis=1)  # [6, 15]

# Sum along columns (axis=0)
sumD=tf.reduce_sum(tensor, axis=0)  # [5, 7, 9]
max=tf.reduce_max(tensor)
min=tf.reduce_min(tensor)
variance=tfp.stats.variance(tensor)
mean=tf.cast(tf.reduce_mean(tensor),dtype=tf.float32)
mean_axis_1=tf.reduce_mean(tensor,axis=0)

stddev=tf.math.reduce_std(tf.cast(tensor,dtype=tf.float32),axis=1)
sumA,sumB,sumC,sumD,max , min , variance,stddev,mean,mean_axis_1



 ## **Positional Maximum & Minimum — The Core Idea**

> A maximum/minimum tells you what the largest or smallest value is.

> A positional maximum/minimum tells you where it is — i.e. its index or position in the tensor.

> In TensorFlow, you do this with:

 `tf.argmax() → position of maximum value`

 `tf.argmin() → position of minimum value`

In [None]:
# ================================================
# Positional Maximum & Minimum in TensorFlow
# ================================================

import tensorflow as tf

# -------------------------------
# 1. Simple 1D tensor example
# -------------------------------
tensor_1d = tf.constant([3, 7, 2, 9, 5])
max_pos_1d = tf.argmax(tensor_1d)  # position of maximum
min_pos_1d = tf.argmin(tensor_1d)  # position of minimum

print("1D Tensor:", tensor_1d.numpy())
print("Max position:", max_pos_1d.numpy(), "Value:", tensor_1d[max_pos_1d].numpy())
print("Min position:", min_pos_1d.numpy(), "Value:", tensor_1d[min_pos_1d].numpy())

# -------------------------------
# 2. 2D tensor example
# -------------------------------
tensor_2d = tf.constant([[1, 8, 3],
                         [7, 2, 5]])
# By default, axis=0
max_pos_2d = tf.argmax(tensor_2d, axis=0)  # max along rows for each column
min_pos_2d = tf.argmin(tensor_2d, axis=0)  # min along rows for each column

print("\n2D Tensor:\n", tensor_2d.numpy())
print("Max positions along axis 0:", max_pos_2d.numpy())
print("Min positions along axis 0:", min_pos_2d.numpy())

# You can also find max along axis=1 (columns)
max_pos_2d_axis1 = tf.argmax(tensor_2d, axis=1)
min_pos_2d_axis1 = tf.argmin(tensor_2d, axis=1)

print("Max positions along axis 1:", max_pos_2d_axis1.numpy())
print("Min positions along axis 1:", min_pos_2d_axis1.numpy())

# -------------------------------
# 3. Higher-dimensional tensors
# -------------------------------
# Example 4D tensor
g = tf.random.Generator.from_seed(42)
tensor_4d = g.truncated_normal(shape=(5, 2, 4, 9), dtype=tf.float32, mean=5.0, stddev=1.0)

# Positional maximum along axis 0 (first dimension)
max_pos_4d = tf.argmax(tensor_4d, axis=0)
min_pos_4d = tf.argmin(tensor_4d, axis=0)

print("\n4D Tensor shape:", tensor_4d.shape)
print("Positional max shape (axis 0 reduced):", max_pos_4d.shape)
print("Positional min shape (axis 0 reduced):", min_pos_4d.shape)

# -------------------------------
# 4. Understanding what tf.argmax returns
# -------------------------------
# If tensor has shape (5, 2, 5, 6, 6, 8, 8)
# and you call tf.argmax(tensor) without specifying axis, axis=0 is default
# result shape will be (2, 5, 6, 6, 8, 8)
# Each element is an integer from 0 to 4 (position along axis 0)
# It indicates the index where the maximum value occurred along axis 0
#
# Conceptually:
# - Original tensor: 5 stacked 6D "blocks"
# - tf.argmax -> returns a 6D tensor with the same shape as remaining axes
# - Each element tells which of the 5 blocks had the max at that position

# Example with smaller tensor for clarity
tensor_small = tf.constant([
    [[1,2],[3,4]],  # axis 0, index 0
    [[5,0],[1,6]],  # axis 0, index 1
    [[2,1],[4,0]]   # axis 0, index 2
])  # shape (3, 2, 2)

max_pos_small = tf.argmax(tensor_small, axis=0)  # reduces axis 0
print("\nSmall tensor shape:", tensor_small.shape)
print("Max positions along axis 0:\n", max_pos_small.numpy())
# max_pos_small shape is (2,2)
# Each element indicates which slice along axis 0 had the maximum

# -------------------------------
# 5. Key points to remember
# -------------------------------
# 1. tf.argmax / tf.argmin returns **positions**, not the values themselves
# 2. By default, axis=0; you can specify any axis
# 3. Output shape is **original shape without the reduced axis**
# 4. For n-dimensional tensors, visualize as:
#    - "axis 0" = stacked blocks
#    - max/min along axis = which block has max/min at each coordinate
# 5. For multi-dimensional tensors, each element of result tensor is an **integer index**
#    pointing to where the max/min occurred along the reduced axis



## **One Hot Encoding**

In [None]:
# ================================================
# One-Hot Encoding in TensorFlow
# ================================================

import tensorflow as tf

# -------------------------------
# 1. What is one-hot encoding?
# -------------------------------
# One-hot encoding converts categorical integer labels into a binary vector.
# For example, if we have 3 classes (0, 1, 2),
# label 1 becomes [0, 1, 0]

# Example labels
labels = tf.constant([0, 2, 1, 2])

print("Original labels:", labels.numpy())

# -------------------------------
# 2. Using tf.one_hot
# -------------------------------
# tf.one_hot(indices, depth, on_value=1, off_value=0, axis=-1)
# Depth=>no of classes for each label if 3-> 0 1 0 if 4-> 0 1 0 0
# Depth >= total number of labels or largest index +1

one_hot_labels = tf.one_hot(labels, depth=3)
print("One-hot encoded labels:\n", one_hot_labels.numpy())

# Shape explanation:
# - Original labels shape: (4,)
# - One-hot encoded shape: (4, 3)
# Each row corresponds to a label as a one-hot vector

# -------------------------------
# 3. Specifying axis
# -------------------------------
# By default, axis=-1, meaning new dimension added at the end
# You can insert the one-hot dimension anywhere

one_hot_labels_axis0 = tf.one_hot(labels, depth=3, axis=0)
print("\nOne-hot with axis=0 shape:", one_hot_labels_axis0.shape)
print(one_hot_labels_axis0.numpy())

# -------------------------------
# 4. Changing on/off values
# -------------------------------
one_hot_custom = tf.one_hot(labels, depth=3, on_value=5, off_value=-1)
print("\nCustom one-hot values:\n", one_hot_custom.numpy())

# -------------------------------
# 5. Using one-hot with higher-dimensional tensors
# -------------------------------
# Suppose we have a batch of categorical labels for a mini-batch
batch_labels = tf.constant([[0,1], [2,0]])
# shape: (2,2)

one_hot_batch = tf.one_hot(batch_labels, depth=3)
# shape: (2,2,3)
print("\nBatch one-hot shape:", one_hot_batch.shape)
print(one_hot_batch.numpy())

# -------------------------------
# 6. Why is one-hot useful in ML/DL?
# -------------------------------
# 1. Classification tasks: Output layer of neural networks often predicts class probabilities.
# 2. Loss computation: Many loss functions (like categorical cross-entropy) require one-hot labels.
# 3. Tensor operations: You can multiply, sum, or mask tensors using one-hot vectors.
#
# Example: Masking
predictions = tf.constant([[0.1, 0.7, 0.2],
                           [0.3, 0.2, 0.5]])
# Suppose true label is 1
mask = tf.one_hot([1,2], depth=3)
masked_pred = predictions * mask
print("\nMasked predictions:\n", masked_pred.numpy())


## **Sequeezing a Tensor**

In [None]:
# @title
import tensorflow as tf;
tensor=tf.random.uniform(shape=(1,2,4,1),dtype=tf.float32);
squeezed_tensor=tf.squeeze(tensor);
print(tensor)
print(squeezed_tensor)


## **some more math operation with tensor**

In [None]:
import tensorflow as tf;
tensor=tf.constant([3,1,2,41,23,42]);
# square;
squared_tensor=tf.square(tensor)
#sqared_root;
squared_root_tensor=tf.sqrt(tf.cast(tensor,dtype=tf.float32));
#log;
log_tensor=tf.math.log(tf.cast(tensor,dtype=tf.float32));
#exp;
exp_tensor=tf.exp(tf.cast(tensor,dtype=tf.float32));
print(tensor)
print(squared_tensor)
print(squared_root_tensor)
print(log_tensor)
print(exp_tensor)
