In [1]:
import numpy as np

"""
Broadcasting in NumPy allows arrays with different shapes to be used together
in arithmetic operations without explicitly reshaping them.

Rules for broadcasting:
1. Dimensions are aligned from the right.
2. Dimensions are compatible if they are equal or one of them is 1.
3. New dimensions (with size 1) can be added on the left to match dimension count.

If all dimensions are compatible, the arrays can be broadcast together.
"""

# ----------------------------
# Example 1: Basic Broadcasting
# ----------------------------

# Create a 2D array (shape: (2, 3))
a = np.array([[1, 2, 3],
              [4, 5, 6]])

# Create a 1D array (shape: (3,))
b = np.array([10, 20, 30])

print("Example 1: Broadcasting b over rows of a")
print("Array a:\n", a)
print("Array b:", b)

# Add a + b
# b is 'virtually' stretched to shape (2, 3):
# [[10, 20, 30],
#  [10, 20, 30]]
result = a + b
print("\nResult of a + b:\n", result)
print("Explanation: b was broadcast across each row of a.")

# ----------------------------
# Example 2: Broadcasting in Columns
# ----------------------------

# Create a 2D array (shape: (2, 3))
c = np.array([[1, 2, 3],
              [4, 5, 6]])

# Create a 1D array (shape: (2,))
d = np.array([100, 200])

print("\nExample 2: Broadcasting d over columns of c")
print("Array c:\n", c)
print("Array d:", d)

# Add c + d
# d has shape (2,), c has shape (2, 3)
# d is reshaped to (2, 1), then broadcast to (2, 3):
# [[100],
#  [200]] -> becomes ->
# [[100, 100, 100],
#  [200, 200, 200]]
result2 = c + d[:, np.newaxis]
print("\nResult of c + d[:, np.newaxis]:\n", result2)
print("Explanation: d was broadcast down each column of c.")

# ----------------------------
# Example 3: Broadcasting with Scalars
# ----------------------------

# Create a 2D array (shape: (2, 3))
e = np.array([[1, 2, 3],
              [4, 5, 6]])

# Scalar value
f = 100

print("\nExample 3: Broadcasting scalar f over e")
print("Array e:\n", e)
print("Scalar f:", f)

# Add e + f
# f is treated as if it had shape (1, 1), and is broadcast to shape (2, 3)
result3 = e + f
print("\nResult of e + f:\n", result3)
print("Explanation: scalar was broadcast to every element of e.")

# ----------------------------
# Summary of What Happened
# ----------------------------

"""
Broadcasting doesn't actually copy data — it just adjusts how NumPy accesses memory
by manipulating the `strides` of the smaller array.

In short:
- It lets you write clean code without loops.
- It avoids unnecessary memory usage by reusing existing data via views.
- It makes vectorized operations more powerful and expressive.
"""

Example 1: Broadcasting b over rows of a
Array a:
 [[1 2 3]
 [4 5 6]]
Array b: [10 20 30]

Result of a + b:
 [[11 22 33]
 [14 25 36]]
Explanation: b was broadcast across each row of a.

Example 2: Broadcasting d over columns of c
Array c:
 [[1 2 3]
 [4 5 6]]
Array d: [100 200]

Result of c + d[:, np.newaxis]:
 [[101 102 103]
 [204 205 206]]
Explanation: d was broadcast down each column of c.

Example 3: Broadcasting scalar f over e
Array e:
 [[1 2 3]
 [4 5 6]]
Scalar f: 100

Result of e + f:
 [[101 102 103]
 [104 105 106]]
Explanation: scalar was broadcast to every element of e.


"\nBroadcasting doesn't actually copy data — it just adjusts how NumPy accesses memory\nby manipulating the `strides` of the smaller array.\n\nIn short:\n- It lets you write clean code without loops.\n- It avoids unnecessary memory usage by reusing existing data via views.\n- It makes vectorized operations more powerful and expressive.\n"

In [None]:
import numpy as np
import time

size = 1_000_000
a = np.random.rand(size)
b = np.array([10])

# --- With broadcasting ---
start = time.time()
res1 = a + b
broadcast_time = time.time() - start

# --- Without broadcasting (manual loop) ---
start = time.time()
res2 = np.empty_like(a)
for i in range(size):
    res2[i] = a[i] + b[0]
loop_time = time.time() - start

print(f"Broadcast time: {broadcast_time:.5f} sec")
print(f"Loop time:    {loop_time:.5f} sec")
print(f"Speedup:      {loop_time / broadcast_time:.2f}x")

Broadcast time: 0.00315 sec
Loop time:    0.54252 sec
Speedup:      172.43x




---

# 🔬 NumPy Internals and Broadcasting Rules  
## A Deep Dive into ndarray, dtypes, Memory, and Vectorization

This document explores the **core internals of NumPy**, including:
- The structure of `ndarray`
- Data types (`dtypes`)
- Memory layout and management
- Vectorization and performance benefits
- Broadcasting rules with examples
- When to use NumPy (and when not to)

---

## 📦 1. The `ndarray` Object

The core of NumPy is the `ndarray` — an n-dimensional array object that provides fast access and efficient operations on large datasets.

### Key Attributes:
| Attribute | Description |
|----------|-------------|
| `data` | Pointer to raw memory buffer |
| `dtype` | Data type of elements |
| `shape` | Dimensions (e.g., `(2, 3)`) |
| `strides` | Bytes to step in each dimension |
| `flags` | Memory layout flags (C/F-contiguous, writeable, etc.) |

### Example: Explore `ndarray` attributes

```python
import numpy as np
import ctypes

def explore_ndarray_attributes():
    arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], dtype=np.int32)
    print("Array:\n", arr)
    print("\nData Buffer Address:", arr.ctypes.data)
    print("Data Type:", arr.dtype)
    print("Shape:", arr.shape)
    print("Strides:", arr.strides)
    print("Flags:\n", arr.flags)

    # Slicing creates non-contiguous views
    sliced_arr = arr[::2, 1:3]
    print("\nSliced Array (Non-contiguous):\n", sliced_arr)
    print("Sliced Array Shape:", sliced_arr.shape)
    print("Sliced Array Strides:", sliced_arr.strides)
    print("Sliced Array Flags:\n", sliced_arr.flags)

    # Manual memory access (for illustration only)
    offset_bytes = arr.strides[0] * 1 + arr.strides[1] * 1
    address_of_element_11 = arr.ctypes.data + offset_bytes
    int_pointer = ctypes.cast(address_of_element_11, ctypes.POINTER(ctypes.c_int32))
    value_at_address = int_pointer.contents.value
    print(f"\nAccessing arr[1,1] via strides: {value_at_address}")
```

---

## 💾 2. NumPy Data Types (`dtypes`)

NumPy supports a wide range of data types optimized for performance and size.

### Common dtypes and Their Sizes:

| Dtype        | Description       | Size (bytes) |
|-------------|-------------------|--------------|
| `np.int8`   | Signed 8-bit int  | 1            |
| `np.int16`  | Signed 16-bit int | 2            |
| `np.int32`  | Signed 32-bit int | 4            |
| `np.int64`  | Signed 64-bit int | 8            |
| `np.float32`| 32-bit float      | 4            |
| `np.float64`| 64-bit float      | 8            |
| `np.complex64` | Complex number | 8            |
| `np.complex128`| Complex number | 16           |
| `np.bool_`  | Boolean (True/False) | 1         |
| `np.string_`| Fixed-length string | user-defined|

### Example: Print dtype info

```python
def explore_numpy_dtypes():
    arr = np.array([1, 2, 3], dtype=np.int32)
    print(f"int32: {arr.dtype}, size: {arr.itemsize} bytes")
```

> ⚠️ Choosing the right dtype can save memory and speed up computation.

---

## 🧠 3. Memory Layout: C vs. Fortran Contiguous

### C-contiguous:
- Row-major order
- Default in Python
- Last index changes fastest

### F-contiguous:
- Column-major order
- Used in languages like MATLAB and Fortran
- First index changes fastest

### Example:

```python
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int32)
f_arr = np.asfortranarray(arr)
print("F-contiguous strides:", f_arr.strides)
print("Contiguous Check (C):", f_arr.flags['C_CONTIGUOUS'])
print("Contiguous Check (F):", f_arr.flags['F_CONTIGUOUS'])
```

> ✅ Use `.copy(order='C')` or `.copy(order='F')` if you need a specific layout.

---

## 🧰 4. Memory Management in NumPy

### Functions:
- `np.zeros()` → Allocate and initialize to zero
- `np.ones()` → Allocate and initialize to one
- `np.empty()` → Allocate without initialization (fastest)
- `np.copy()` → Create a new copy

### Views vs Copies

```python
a = np.zeros((2, 3))
b = a[0, :]   # View
c = a.copy()   # Copy

b[0] = 99
print("After modifying view b:\n", a)  # a is changed
print("Copy c remains unchanged:\n", c)
```

> ⚠️ Be careful: Views share memory. Modifying them affects the original.

---

## ⚡ 5. Vectorization: Why It's Fast

Vectorized operations are implemented in C, so they're **much faster** than Python loops.

### Example: Loop vs Vectorization

```python
def python_loop_sum(x, y):
    result = np.zeros_like(x)
    for i in range(len(x)):
        result[i] = x[i] + y[i]
    return result

def numpy_vectorized_sum(x, y):
    return x + y

size = 1000000
a = np.random.rand(size)
b = np.random.rand(size)

start = time.time()
python_loop_sum(a, b)
python_time = time.time() - start

start = time.time()
numpy_vectorized_sum(a, b)
numpy_time = time.time() - start

print(f"Python loop sum: {python_time:.4f} seconds")
print(f"NumPy vectorized sum: {numpy_time:.4f} seconds")
print(f"Speedup: {python_time / numpy_time:.2f}x")
```

> 📌 Result: ~10–100x speedups using NumPy!

---

## 🔁 6. Broadcasting: Powerful Dimension Matching

### Broadcasting Rules:
Two dimensions are compatible if:
1. They are equal, or
2. One of them is 1

### Examples:

```python
a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([10, 20, 30])
c = np.array([[10], [20]])

print("a + b:", a + b)  # shape (2,3) + (3,) → broadcasted to (2,3)
print("a + c:", a + c)  # shape (2,3) + (2,1) → broadcasted to (2,3)
```

> ✅ Broadcasting avoids copying data and enables elegant, concise code.

---

## 🧪 7. Challenge: Adjust Image Contrast Using NumPy Broadcasting

### Problem:
Adjust contrast using:
```python
output_pixel = (input_pixel - mean) * contrast_factor + mean
```

### Solution:

```python
def adjust_contrast_numpy(image, contrast_factor):
    mean = np.mean(image)
    adjusted_image = (image - mean) * contrast_factor + mean
    return np.clip(adjusted_image, 0, 255).astype(np.uint8)

def create_sample_image():
    return np.array([[100, 150, 200],
                     [ 50, 100, 150],
                     [ 20,  50, 100]], dtype=np.uint8)
```

### Test:

```python
sample_image = create_sample_image()
adjusted_image = adjust_contrast_numpy(sample_image, 1.5)
print("Original:\n", sample_image)
print("Adjusted:\n", adjusted_image)
```

> ✅ This shows how broadcasting simplifies complex image processing.

---

## 🧠 Summary Table

| Feature | List | Generator | Winner |
|--------|------|-----------|--------|
| Creation Time | ❌ Slower | ✅ Very fast | Generator |
| Memory Usage | ❌ High | ✅ Constant | Generator |
| Iteration Speed | ✅ Faster | ❌ Slower | List |
| Can Be Reused | ✅ Yes | ❌ Must recreate | List |
| Multiple Iterations | ✅ Yes | ❌ No | List |
| Random Access | ✅ Yes | ❌ No | List |

---

## 🧩 When to Use NumPy

| Use Case | Recommendation |
|----------|----------------|
| Large numerical arrays | ✅ Use NumPy |
| Vectorized math operations | ✅ Use NumPy |
| Interoperability (SciPy, Pandas, Matplotlib) | ✅ Use NumPy |
| Small data | ⚠️ Consider plain lists |
| Non-numerical data | ⚠️ Use native Python |
| GPU acceleration needed | ⚠️ Consider CuPy, PyTorch, TensorFlow |
| Sparse data | ⚠️ Consider SciPy.sparse |
| Distributed computing | ⚠️ Consider Dask or Spark |

---

## 🧠 Final Takeaway

| Concept | Insight |
|--------|---------|
| `ndarray` is memory-efficient | Stores data contiguously |
| Strides enable broadcasting | No need to copy data |
| Vectorization = C-level speed | Avoid Python loops |
| Broadcasting = Elegant syntax | Saves memory and improves readability |
| Views = Zero-copy slicing | Use carefully to avoid side effects |
| Copies = Independent buffers | Use when you need isolation |

---



In [None]:
import numpy as np
import ctypes

def explore_ndarray_attributes():
    arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], dtype=np.int32)
    print("Array:\n", arr)
    print("\nData Buffer Address:", arr.ctypes.data)
    print("Data Type:", arr.dtype)
    print("Shape:", arr.shape)
    print("Strides:", arr.strides)
    print("Flags:\n", arr.flags)

    # Slicing creates non-contiguous views
    sliced_arr = arr[::2, 1:3]
    print("\nSliced Array (Non-contiguous):\n", sliced_arr)
    print("Sliced Array Shape:", sliced_arr.shape)
    print("Sliced Array Strides:", sliced_arr.strides)
    print("Sliced Array Flags:\n", sliced_arr.flags)

    # Manual memory access (for illustration only)
    offset_bytes = arr.strides[0] * 1 + arr.strides[1] * 1
    address_of_element_11 = arr.ctypes.data + offset_bytes
    int_pointer = ctypes.cast(address_of_element_11, ctypes.POINTER(ctypes.c_int32))
    value_at_address = int_pointer.contents.value
    print(f"\nAccessing arr[1,1] via strides: {value_at_address}")
explore_ndarray_attributes()


Array:
 [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

Data Buffer Address: 328046736
Data Type: int32
Shape: (3, 4)
Strides: (16, 4)
Flags:
   C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False


Sliced Array (Non-contiguous):
 [[ 2  3]
 [10 11]]
Sliced Array Shape: (2, 2)
Sliced Array Strides: (32, 4)
Sliced Array Flags:
   C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False


Accessing arr[1,1] via strides: 6


In [None]:
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int32)
f_arr = np.asfortranarray(arr)
print("F-contiguous strides:", f_arr.strides)
print("Contiguous Check (C):", f_arr.flags['C_CONTIGUOUS'])
print("Contiguous Check (F):", f_arr.flags['F_CONTIGUOUS'])

F-contiguous strides: (4, 12)
Contiguous Check (C): False
Contiguous Check (F): True




````markdown
## What are Strides in NumPy?

In NumPy, **strides** define how many bytes to step in memory to move to the next element along each axis of an array. It's a low-level detail that allows NumPy to index into memory efficiently without copying data.

### Understanding with an Example:

```python
import numpy as np

a = np.array([[1, 2, 3],
              [4, 5, 6]], dtype=np.int32)

print("Array:\n", a)
print("Shape:", a.shape)
print("Strides:", a.strides)
````

### Output:

```
Array:
 [[1 2 3]
  [4 5 6]]
Shape: (2, 3)
Strides: (12, 4)
```

### Interpretation:

* The array has shape `(2, 3)` — 2 rows and 3 columns.
* Each `int32` element takes **4 bytes**.
* The first stride `12` means: to move from one row to the next, NumPy skips **3 integers × 4 bytes = 12 bytes**.
* The second stride `4` means: to move from one column to the next within the same row, NumPy skips **4 bytes**.

### Why It Matters:

* Strides make slicing fast without data copying.
* You can use `np.lib.stride_tricks.as_strided()` to create sliding window views using custom strides.
* They allow advanced memory tricks like viewing the same data in different shapes or steps — very powerful for performance tuning.


```


In [None]:
a = np.zeros((2, 3))
b = a[0, :]   # View
c = a.copy()   # Copy

b[0] = 99
print("After modifying view b:\n", a)  # a is changed
print("Copy c remains unchanged:\n", c)

After modifying view b:
 [[99.  0.  0.]
 [ 0.  0.  0.]]
Copy c remains unchanged:
 [[0. 0. 0.]
 [0. 0. 0.]]


In [None]:
def python_loop_sum(x, y):
    result = np.zeros_like(x)
    for i in range(len(x)):
        result[i] = x[i] + y[i]
    return result

def numpy_vectorized_sum(x, y):
    return x + y

size = 1000000
a = np.random.rand(size)
b = np.random.rand(size)

start = time.time()
python_loop_sum(a, b)
python_time = time.time() - start

start = time.time()
numpy_vectorized_sum(a, b)
numpy_time = time.time() - start

print(f"Python loop sum: {python_time:.4f} seconds")
print(f"NumPy vectorized sum: {numpy_time:.4f} seconds")
print(f"Speedup: {python_time / numpy_time:.2f}x")

Python loop sum: 0.3460 seconds
NumPy vectorized sum: 0.0025 seconds
Speedup: 140.05x
