**Understanding Numpy**

In [2]:
# import all neccsarry libraries

import numpy as np
import matplotlib.pyplot as plt
import time

# Check Numpy version
print(f"Numpy version: {np.__version__}")

# Display settings for cleaner output
np.set_printoptions(precision=3, suppress=True)


Numpy version: 2.3.2


### Creating Numpy Arrays

In [13]:
# Creating arrays from python lists
# 1D array: A siple sequence of numbers
arr1d = np.array([1,2,3, 4, 5])

# 2D array: Think of this as a matrix or table with rows and columns
arr2d = np.array([[1, 2, 3],
                  [4, 5, 6]])

# 3d array: lik a stack of 2D arrays - useful for images, time series, etc.
arr3d = np.array([[[1, 2], [3, 4]],
                  [[5, 6], [7, 8]]])

print("1D array:", arr1d)
print("2D array:", arr2d)
print("3D array:", arr3d)

1D array: [1 2 3 4 5]
2D array: [[1 2 3]
 [4 5 6]]
3D array: [[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


### Creating Special Arrays in Numpy

In [14]:
# creating arrays filled with zeros - useful for initializing arrays
# Shape (3, 4) means 3 rows and columns
zeros = np.zeros((3, 4))

# Creating arrays filled with ones - ofter used as starting points
ones = np.ones((2, 3, 4))          # 3D array: 2 layers, 3 rows, 4 columns

# Empty array - faster than zeros/ones but contains random values
# Use when you'll immediatelly fill the array with real data
empty = np.empty((2, 2))

print("zeros array (3x4):\n", zeros)
print("ones arry shape:", ones.shape)
print("Empty array (contains random values):\n", empty)


zeros array (3x4):
 [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
ones arry shape: (2, 3, 4)
Empty array (contains random values):
 [[0. 0.]
 [0. 0.]]


In [2]:
# Range arrays - like Python's range() but more powerful
import numpy as np

range_arr = np.arange(0, 10, 2)   # Start, stop, step: [0, 2, 4, 6, 8]
print("Range array:", range_arr)

# Linearly spaced arrays - divide a range into equal parts
# From 0 to 1 with exactly 5 points (including endpoints)
linspace_arr = np.linspace(0, 1, 5)  
print("Linspace array:", linspace_arr)

# Logarithmically spaced arrays - useful for scientific data
# From 10^0 to 10^2 (1 to 100) with 5 points
logspace_arr = np.logspace(0, 2, 5)  
print("Logspace array:", logspace_arr)

Range array: [0 2 4 6 8]
Linspace array: [0.   0.25 0.5  0.75 1.  ]
Logspace array: [  1.           3.16227766  10.          31.6227766  100.        ]


In [3]:
# Identity matrix - diagonal of ones, zeros elsewhere
# Essential for linear algebra operations
identity = np.eye(4)              # 4x4 identity matrix

# Diagonal matrix - put values on the diagonal
diagonal = np.diag([1, 2, 3, 4])  

# Array filled with a specific value
full_arr = np.full((3, 3), 7)     # 3x3 array filled with 7

print("Identity matrix:\n", identity)
print("Diagonal matrix:\n", diagonal)
print("Full array (filled with 7):\n", full_arr)

Identity matrix:
 [[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
Diagonal matrix:
 [[1 0 0 0]
 [0 2 0 0]
 [0 0 3 0]
 [0 0 0 4]]
Full array (filled with 7):
 [[7 7 7]
 [7 7 7]
 [7 7 7]]


### Numpy Data Types (dtypes)
- Understanding data types is crucial for memory efficiency and numerical precision.

In [4]:
# Explicit data types - control memory usage and precision
int_arr = np.array([1, 2, 3], dtype=np.int32)       # 32-bit integers
float_arr = np.array([1, 2, 3], dtype=np.float64)   # 64-bit floats (double precision)
bool_arr = np.array([True, False, True], dtype=np.bool_)  # Boolean values

# Type conversion - change dtype of existing array
converted = int_arr.astype(np.float32)  # Convert to 32-bit float

print("Integer array dtype:", int_arr.dtype)
print("Float array dtype:", float_arr.dtype)
print("Boolean array dtype:", bool_arr.dtype)
print("Converted array dtype:", converted.dtype)

# Memory usage comparison
print(f"int32 uses {int_arr.itemsize} bytes per element")
print(f"float64 uses {float_arr.itemsize} bytes per element")

Integer array dtype: int32
Float array dtype: float64
Boolean array dtype: bool
Converted array dtype: float32
int32 uses 4 bytes per element
float64 uses 8 bytes per element


### Array Properties & Attributes
- Understanding array properties helps you work effectively with your data and debug issue

In [5]:
# Create a sample 3D array for demonstration
# Think of this as 3 layers, each with 4 rows and 5 columns
arr = np.random.randn(3, 4, 5)

# Shape: The dimensions of the array (layers, rows, columns)
print("Shape:", arr.shape)           # Output: (3, 4, 5)

# Size: Total number of elements (3 × 4 × 5 = 60)
print("Size:", arr.size)             

# Ndim: Number of dimensions (3D in this case)
print("Ndim:", arr.ndim)             

# Dtype: Data type of elements
print("Dtype:", arr.dtype)           # Usually float64 for random numbers

# Itemsize: Memory size of each element in bytes
print("Itemsize:", arr.itemsize)     # 8 bytes for float64

# Total memory usage in bytes
print("Memory usage:", arr.nbytes, "bytes")  # size × itemsize
print("Memory usage:", arr.nbytes / 1024, "KB")  # Convert to KB

Shape: (3, 4, 5)
Size: 60
Ndim: 3
Dtype: float64
Itemsize: 8
Memory usage: 480 bytes
Memory usage: 0.46875 KB


### Array lndexing & Slicing

**Basic lndexing - Acccessing lndiviidual Element**
- NumPy indexing is similar to Python lists but more powerful for multi-dimensional arrays

In [6]:
# 1D array indexing - similar to Python lists
arr1d = np.array([10, 20, 30, 40, 50])

print("First element:", arr1d[0])     # Index 0: 10
print("Last element:", arr1d[-1])     # Negative indexing: 50  
print("Slice [1:4]:", arr1d[1:4])     # Elements 1, 2, 3: [20, 30, 40]
print("Every 2nd element:", arr1d[::2])  # Step of 2: [10, 30, 50]

First element: 10
Last element: 50
Slice [1:4]: [20 30 40]
Every 2nd element: [10 30 50]


In [7]:
# 2D array indexing - row and column access
arr2d = np.array([[1, 2, 3, 4],
                  [5, 6, 7, 8],
                  [9, 10, 11, 12]])

# Access specific element: [row, column]
print("Element at row 1, column 2:", arr2d[1, 2])        # 7

# Access entire rows or columns
print("First row:", arr2d[0, :])               # All columns of row 0
print("Second column:", arr2d[:, 1])           # All rows of column 1

# Subarray slicing: [row_start:row_end, col_start:col_end]
print("Subarray (rows 1-2, cols 1-2):\n", arr2d[1:3, 1:3])

Element at row 1, column 2: 7
First row: [1 2 3 4]
Second column: [ 2  6 10]
Subarray (rows 1-2, cols 1-2):
 [[ 6  7]
 [10 11]]


### Advanced Indexing - Powerful Selection Methods

In [8]:
# Fancy indexing - use arrays of indices to select elements
arr = np.array([10, 20, 30, 40, 50])
indices = np.array([0, 2, 4])  # Select elements at positions 0, 2, 4
print("Fancy indexing:", arr[indices])         # [10, 30, 50]

# This is much more flexible than simple slicing
random_indices = np.array([4, 1, 3, 1])  # Can repeat and reorder
print("Random order:", arr[random_indices])   # [50, 20, 40, 20]

Fancy indexing: [10 30 50]
Random order: [50 20 40 20]


In [9]:
# 2D fancy indexing - select specific row/column combinations
arr2d = np.arange(12).reshape(3, 4)  # 3x4 array: [[0,1,2,3], [4,5,6,7], [8,9,10,11]]
print("Original 2D array:\n", arr2d)

# Select elements at (row, col) pairs: (0,1) and (2,3)
rows = np.array([0, 2])
cols = np.array([1, 3])
print("Elements at (0,1) and (2,3):", arr2d[rows, cols])  # [1, 11]

# Select entire rows using fancy indexing
selected_rows = arr2d[[0, 2], :]  # Rows 0 and 2, all columns
print("Selected rows:\n", selected_rows)

Original 2D array:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Elements at (0,1) and (2,3): [ 1 11]
Selected rows:
 [[ 0  1  2  3]
 [ 8  9 10 11]]


### Array Reshaping & manipulation 
- Reshaping changes how the same data is organized in memory without changing the actual values.

In [10]:
# Start with a 1D array
arr = np.arange(12)  # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
print("Original 1D array:", arr)

# Reshape to 2D: 3 rows × 4 columns
reshaped_2d = arr.reshape(3, 4)
print("Reshaped to 3x4:\n", reshaped_2d)

# Reshape to 3D: 2 layers × 2 rows × 3 columns  
reshaped_3d = arr.reshape(2, 2, 3)
print("Reshaped to 2x2x3:\n", reshaped_3d)

# Use -1 to let NumPy calculate one dimension automatically
auto_reshape = arr.reshape(4, -1)  # 4 rows, NumPy calculates columns
print("Auto-reshaped to 4x?:\n", auto_reshape)

Original 1D array: [ 0  1  2  3  4  5  6  7  8  9 10 11]
Reshaped to 3x4:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Reshaped to 2x2x3:
 [[[ 0  1  2]
  [ 3  4  5]]

 [[ 6  7  8]
  [ 9 10 11]]]
Auto-reshaped to 4x?:
 [[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [11]:
# Flattening - convert multi-dimensional array to 1D
arr2d = np.array([[1, 2, 3], [4, 5, 6]])

# flatten() always returns a copy
flattened = arr2d.flatten()                 
print("Flattened (copy):", flattened)

# ravel() returns a view if possible (faster, memory efficient)
ravel = arr2d.ravel()                       
print("Ravel (view if possible):", ravel)

# Demonstrate the difference
arr2d[0, 0] = 999
print("After modifying original:")
print("Flattened (unchanged):", flattened)  # Copy is independent
print("Ravel (changed):", ravel)            # View reflects changes

Flattened (copy): [1 2 3 4 5 6]
Ravel (view if possible): [1 2 3 4 5 6]
After modifying original:
Flattened (unchanged): [1 2 3 4 5 6]
Ravel (changed): [999   2   3   4   5   6]


### Transposing and Swapping Axes
- Transposing is essential for matrix operations and changing data oritentation

In [12]:
# 2D transposition - flip rows and columns
arr2d = np.array([[1, 2, 3], 
                  [4, 5, 6]])
print("Original shape:", arr2d.shape)      # (2, 3)
print("Original:\n", arr2d)

print("Transposed shape:", arr2d.T.shape)   # (3, 2)
print("Transposed:\n", arr2d.T)

# Alternative transpose methods
print("Transpose method:\n", arr2d.transpose())

Original shape: (2, 3)
Original:
 [[1 2 3]
 [4 5 6]]
Transposed shape: (3, 2)
Transposed:
 [[1 4]
 [2 5]
 [3 6]]
Transpose method:
 [[1 4]
 [2 5]
 [3 6]]


In [13]:
# Higher-dimensional transposition
arr3d = np.arange(24).reshape(2, 3, 4)  # 2 layers, 3 rows, 4 columns
print("Original 3D shape:", arr3d.shape)

# Specify new axis order: (axis0, axis1, axis2) → (axis2, axis0, axis1)
transposed_3d = arr3d.transpose(2, 0, 1)
print("Transposed 3D shape:", transposed_3d.shape)  # (4, 2, 3)

# moveaxis is another way to rearrange axes
moved = np.moveaxis(arr3d, 0, -1)  # Move first axis to last position
print("Moveaxis result shape:", moved.shape)

Original 3D shape: (2, 3, 4)
Transposed 3D shape: (4, 2, 3)
Moveaxis result shape: (3, 4, 2)


### Concatenating And Splitting Arrays
- Combining and dividing arrays is fundamental for data manipulation

In [14]:
# Concatenation - joining arrays along existing axes
arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])

# Concatenate along different axes
concat_rows = np.concatenate([arr1, arr2], axis=0)    # Stack vertically (add rows)
concat_cols = np.concatenate([arr1, arr2], axis=1)    # Stack horizontally (add columns)

print("Original arrays:")
print("Array 1:\n", arr1)
print("Array 2:\n", arr2)
print("Concatenated vertically (axis=0):\n", concat_rows)
print("Concatenated horizontally (axis=1):\n", concat_cols)

Original arrays:
Array 1:
 [[1 2]
 [3 4]]
Array 2:
 [[5 6]
 [7 8]]
Concatenated vertically (axis=0):
 [[1 2]
 [3 4]
 [5 6]
 [7 8]]
Concatenated horizontally (axis=1):
 [[1 2 5 6]
 [3 4 7 8]]


In [16]:
# Convenient stacking functions
vstack_result = np.vstack([arr1, arr2])     # Same as concatenate with axis=0
hstack_result = np.hstack([arr1, arr2])     # Same as concatenate with axis=1
dstack_result = np.dstack([arr1, arr2])     # Stack along depth (3rd dimension)

print("vstack (vertical):\n", vstack_result)
print("hstack (horizontal):\n", hstack_result)
print("dstack shape:", dstack_result.shape)  # Creates 3D array

# Splitting arrays - opposite of concatenation
arr = np.arange(12).reshape(3, 4)
split_arrays = np.split(arr, 3, axis=0)        # Split into 3 parts along rows
print("Original array for splitting:\n", arr)
print("Split into 3 parts along rows:")
for i, split_part in enumerate(split_arrays):
    print(f"Part {i}:\n", split_part)

vstack (vertical):
 [[1 2]
 [3 4]
 [5 6]
 [7 8]]
hstack (horizontal):
 [[1 2 5 6]
 [3 4 7 8]]
dstack shape: (2, 2, 2)
Original array for splitting:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Split into 3 parts along rows:
Part 0:
 [[0 1 2 3]]
Part 1:
 [[4 5 6 7]]
Part 2:
 [[ 8  9 10 11]]


### Mathematical Operations 

**Element-wise Operations - The Power of Vectorization**

- NumPy's biggest advantage is performing operations on entire arrays without writing loo

In [17]:
# Basic arithmetic operations work element-by-element
arr1 = np.array([1, 2, 3, 4])
arr2 = np.array([10, 20, 30, 40])

print("Array 1:", arr1)
print("Array 2:", arr2)

# All operations happen element-wise automatically
print("Addition:", arr1 + arr2)               # [11, 22, 33, 44]
print("Subtraction:", arr2 - arr1)            # [9, 18, 27, 36]
print("Multiplication:", arr1 * arr2)         # [10, 40, 90, 160]
print("Division:", arr2 / arr1)               # [10, 10, 10, 10]
print("Power:", arr1 ** 2)                    # [1, 4, 9, 16]
print("Modulo:", arr2 % 3)                    # [1, 2, 0, 1]

Array 1: [1 2 3 4]
Array 2: [10 20 30 40]
Addition: [11 22 33 44]
Subtraction: [ 9 18 27 36]
Multiplication: [ 10  40  90 160]
Division: [10. 10. 10. 10.]
Power: [ 1  4  9 16]
Modulo: [1 2 0 1]


In [18]:
# Operations with scalars - broadcasting in action
print("Scalar operations:")
print("Add 10 to all elements:", arr1 + 10)          # [11, 12, 13, 14]
print("Multiply all by 3:", arr1 * 3)               # [3, 6, 9, 12]
print("Divide all by 2:", arr1 / 2)                 # [0.5, 1, 1.5, 2]

# Compound operations
result = (arr1 + 5) * 2 - 1                         # ((arr1 + 5) * 2) - 1
print("Compound operation (arr1 + 5) * 2 - 1:", result)

Scalar operations:
Add 10 to all elements: [11 12 13 14]
Multiply all by 3: [ 3  6  9 12]
Divide all by 2: [0.5 1.  1.5 2. ]
Compound operation (arr1 + 5) * 2 - 1: [11 13 15 17]


### Mathematical Functions - Beyond Basic Arithmetic

- NumPy provides vectorized versions of most mathematical function

In [19]:
# Common mathematical functions
arr = np.array([1, 4, 9, 16, 25])
print("Original array:", arr)

# Square roots and powers
print("Square root:", np.sqrt(arr))           # [1, 2, 3, 4, 5]
print("Square:", np.square(arr))              # [1, 16, 81, 256, 625]
print("Cube root:", np.cbrt(arr))

# Exponential and logarithmic functions
small_arr = np.array([1, 2, 3])
print("Exponential:", np.exp(small_arr))      # [e^1, e^2, e^3]
print("Natural log:", np.log(arr))            # ln(arr)
print("Log base 10:", np.log10(arr))
print("Log base 2:", np.log2(arr))

Original array: [ 1  4  9 16 25]
Square root: [1. 2. 3. 4. 5.]
Square: [  1  16  81 256 625]
Cube root: [1.         1.58740105 2.08008382 2.5198421  2.92401774]
Exponential: [ 2.71828183  7.3890561  20.08553692]
Natural log: [0.         1.38629436 2.19722458 2.77258872 3.21887582]
Log base 10: [0.         0.60205999 0.95424251 1.20411998 1.39794001]
Log base 2: [0.         2.         3.169925   4.         4.64385619]


In [20]:
# Trigonometric functions - essential for signal processing and geometry
angles = np.array([0, np.pi/4, np.pi/2, np.pi])
print("Angles (radians):", angles)
print("Sine:", np.sin(angles))                # [0, √2/2, 1, 0]
print("Cosine:", np.cos(angles))              # [1, √2/2, 0, -1]
print("Tangent:", np.tan(angles))             # [0, 1, ∞, 0]

# Convert degrees to radians
degrees = np.array([0, 45, 90, 180])
radians = np.deg2rad(degrees)
print("Degrees to radians:", radians)

Angles (radians): [0.         0.78539816 1.57079633 3.14159265]
Sine: [0.00000000e+00 7.07106781e-01 1.00000000e+00 1.22464680e-16]
Cosine: [ 1.00000000e+00  7.07106781e-01  6.12323400e-17 -1.00000000e+00]
Tangent: [ 0.00000000e+00  1.00000000e+00  1.63312394e+16 -1.22464680e-16]
Degrees to radians: [0.         0.78539816 1.57079633 3.14159265]


In [21]:
# Rounding and comparison functions
decimals = np.array([1.234, 5.678, 9.999, -2.345])
print("Original decimals:", decimals)

print("Round to 2 places:", np.round(decimals, 2))
print("Floor (round down):", np.floor(decimals))     # [1, 5, 9, -3]
print("Ceiling (round up):", np.ceil(decimals))      # [2, 6, 10, -2]
print("Truncate (toward zero):", np.trunc(decimals)) # [1, 5, 9, -2]

# Absolute values and sign
print("Absolute values:", np.abs(decimals))
print("Sign (-1, 0, or 1):", np.sign(decimals))

Original decimals: [ 1.234  5.678  9.999 -2.345]
Round to 2 places: [ 1.23  5.68 10.   -2.35]
Floor (round down): [ 1.  5.  9. -3.]
Ceiling (round up): [ 2.  6. 10. -2.]
Truncate (toward zero): [ 1.  5.  9. -2.]
Absolute values: [1.234 5.678 9.999 2.345]
Sign (-1, 0, or 1): [ 1.  1.  1. -1.]


### Aggregate Functions - Summarizing Your Data

    - Aggregate functions reduce arrays to summary statistics

In [22]:
# Create a 2D array for demonstration
arr = np.array([[1, 2, 3], 
                [4, 5, 6], 
                [7, 8, 9]])
print("Sample array:\n", arr)

# Aggregation across entire array
print("Sum of all elements:", np.sum(arr))        # 45
print("Mean of all elements:", np.mean(arr))      # 5.0
print("Standard deviation:", np.std(arr))         # 2.58
print("Minimum value:", np.min(arr))              # 1
print("Maximum value:", np.max(arr))              # 9

Sample array:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
Sum of all elements: 45
Mean of all elements: 5.0
Standard deviation: 2.581988897471611
Minimum value: 1
Maximum value: 9


In [23]:
# Axis-specific aggregation - this is where NumPy shines!
print("Sum along axis 0 (columns):", np.sum(arr, axis=0))  # [12, 15, 18]
print("Sum along axis 1 (rows):", np.sum(arr, axis=1))     # [6, 15, 24]

print("Mean along axis 0:", np.mean(arr, axis=0))          # [4, 5, 6]
print("Mean along axis 1:", np.mean(arr, axis=1))          # [2, 5, 8]

# Finding positions of extreme values
print("Position of max (flattened):", np.argmax(arr))      # 8 (element 9 at position 8)
print("Position of max along axis 0:", np.argmax(arr, axis=0))  # [2, 2, 2]
print("Position of max along axis 1:", np.argmax(arr, axis=1))  # [2, 2, 2]

Sum along axis 0 (columns): [12 15 18]
Sum along axis 1 (rows): [ 6 15 24]
Mean along axis 0: [4. 5. 6.]
Mean along axis 1: [2. 5. 8.]
Position of max (flattened): 8
Position of max along axis 0: [2 2 2]
Position of max along axis 1: [2 2 2]


### Broadcasting
- Broadcasting is NumPy's way of performing operations on arrays with different shapes without explicitly reshaping them. This is one of NumPy's most powerful features

In [24]:
# Broadcasting examples - arrays don't need the same shape!
scalar = 5
arr1d = np.array([1, 2, 3, 4])
arr2d = np.array([[10], [20], [30]])  # Column vector

print("Scalar:", scalar)
print("1D array:", arr1d)  
print("2D array (column vector):\n", arr2d)

# Scalar broadcasts to any shape
result1 = scalar + arr1d
print("Scalar + 1D array:", result1)         # [6, 7, 8, 9]

# 2D + 1D broadcasting
result2 = arr2d + arr1d
print("2D + 1D broadcasting:\n", result2)

Scalar: 5
1D array: [1 2 3 4]
2D array (column vector):
 [[10]
 [20]
 [30]]
Scalar + 1D array: [6 7 8 9]
2D + 1D broadcasting:
 [[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [25]:
# Visualizing broadcasting step by step
a = np.arange(4).reshape(4, 1)                # Shape: (4, 1)
b = np.arange(5).reshape(1, 5)                # Shape: (1, 5)

print("Array a (4x1):\n", a)
print("Array b (1x5):\n", b)

# Broadcasting creates a 4x5 result
result = a + b                                # Result shape: (4, 5)
print("Broadcasting result (4x5):\n", result)
print("Result shape:", result.shape)

Array a (4x1):
 [[0]
 [1]
 [2]
 [3]]
Array b (1x5):
 [[0 1 2 3 4]]
Broadcasting result (4x5):
 [[0 1 2 3 4]
 [1 2 3 4 5]
 [2 3 4 5 6]
 [3 4 5 6 7]]
Result shape: (4, 5)


In [26]:
# Manual broadcasting with newaxis
arr = np.array([1, 2, 3])
print("Original array shape:", arr.shape)           # (3,)

# Convert to column vector
column_vector = arr[:, np.newaxis]                  # Same as arr.reshape(-1, 1)
print("Column vector shape:", column_vector.shape)   # (3, 1)
print("Column vector:\n", column_vector)

# Convert to row vector (usually not needed - 1D arrays broadcast as rows)
row_vector = arr[np.newaxis, :]                     # Same as arr.reshape(1, -1)
print("Row vector shape:", row_vector.shape)        # (1, 3)

Original array shape: (3,)
Column vector shape: (3, 1)
Column vector:
 [[1]
 [2]
 [3]]
Row vector shape: (1, 3)


### Common Broadcasting Patterns


In [27]:
# Pattern 1: Centering data (subtract mean from each column)
data = np.random.randn(5, 3)  # 5 samples, 3 features
print("Original data shape:", data.shape)
print("Original data:\n", data)

# Calculate mean for each column
column_means = np.mean(data, axis=0)              # Shape: (3,)
print("Column means:", column_means)

# Subtract mean from each column (broadcasting!)
centered_data = data - column_means              # (5,3) - (3,) broadcasts
print("Centered data:\n", centered_data)
print("New column means (should be ~0):", np.mean(centered_data, axis=0))

Original data shape: (5, 3)
Original data:
 [[-1.28595024  0.44478756  0.26181966]
 [-0.68782798  1.35563544 -0.70255966]
 [-0.75327929  0.53535804 -0.79326629]
 [ 0.62113082 -0.61149021 -0.4716012 ]
 [-0.37994149 -0.0663182   1.23368474]]
Column means: [-0.49717363  0.33159452 -0.09438455]
Centered data:
 [[-0.7887766   0.11319304  0.35620422]
 [-0.19065434  1.02404091 -0.60817511]
 [-0.25610566  0.20376351 -0.69888174]
 [ 1.11830445 -0.94308474 -0.37721665]
 [ 0.11723215 -0.39791272  1.32806929]]
New column means (should be ~0): [-9.99200722e-17 -2.22044605e-17  4.44089210e-17]


In [None]:
# Pattern 2: Normalizing by row sums (useful for probabilities)
data = np.random.rand(4, 3)  # Random data
print("Random data:\n", data)

# Calculate row sums
row_sums = np.sum(data, axis=1, keepdims=True)    # Shape: (4, 1)
print("Row sums shape:", row_sums.shape)
print("Row sums:\n", row_sums)

# Normalize each row to sum to 1
normalized = data / row_sums                      # (4,3) / (4,1) broadcasts
print("Normalized data (rows sum to 1):\n", normalized)
print("Row sums after normalization:", np.sum(normalized, axis=1))V

Random data:
 [[0.84161201 0.54157746 0.04404538]
 [0.83891694 0.76046882 0.76175105]
 [0.630538   0.58533767 0.23963621]
 [0.40461646 0.63656206 0.16363501]]
Row sums shape: (4, 1)
Row sums:
 [[1.42723485]
 [2.36113681]
 [1.45551188]
 [1.20481352]]
Normalized data (rows sum to 1):
 [[0.58968012 0.37945924 0.03086064]
 [0.35530213 0.32207741 0.32262046]
 [0.43320704 0.40215245 0.16464051]
 [0.33583326 0.52834903 0.13581771]]
Row sums after normalization: [1. 1. 1. 1.]
