In [None]:
# Cell 6: Random Sampling and Statistical Operations
# Simulating data and calculating statistics is essential for data analysis and ML.

print(\"=\" * 60)
print(\"SECTION 5: RANDOM SAMPLING & STATISTICS\")\n",
print(\"=\" * 60)
print()

print(\"RANDOM NUMBER GENERATION\")
print(\"-\" * 25)
print()

print(\"1. Uniform Distribution [0, 1):")
uniform_data = np.random.rand(3, 3)
print(uniform_data)
print()

print(\"2. Standard Normal Distribution (mean=0, std=1):")
normal_data = np.random.randn(3, 3)
print(normal_data)
print()

print(\"3. Random Integers:")
rand_int = np.random.randint(0, 10, size=(3, 3))
print(\"Random integers between 0 and 10 (inclusive):\")\n",
print(rand_int)
print()

print(\"4. Random Choice from Array:")
choices = np.random.choice([10, 20, 30, 40], size=5)
print(f\"Random choices: {choices}\")
print()

print(\"STATISTICAL OPERATIONS\")
print(\"-\" * 25)
print()

# Create a sample dataset
data = np.array([[1, 2, 3],
                 [4, 5, 6],
                 [7, 8, 9]])

print(\"Sample dataset:\")
print(data)
print()

print(\"Basic Statistics (whole array):")
print(f\"Mean: {np.mean(data)}\")\n",
print(f\"Standard Deviation: {np.std(data):.4f}\")\n",
print(f\"Minimum: {np.min(data)}\")\n",
print(f\"Maximum: {np.max(data)}\")\n",
print(f\"Sum: {np.sum(data)}\")\n",
print()

print(\"Index-based Operations:")
print(f\"Index of maximum value (argmax): {np.argmax(data)}\")\n",
print(f\"Index of minimum value (argmin): {np.argmin(data)}\")\n",
print()

print(\"AXIS-SPECIFIC OPERATIONS\")
print(\"-\" * 30)
print()
print(\"Understanding axes in 2D arrays:\")
print(\"- axis=0: operate across rows (column-wise)\")\n",
print(\"- axis=1: operate across columns (row-wise)\")\n",
print()

print(f\"Original data shape: {data.shape}\")
print(data)
print()

print(\"Sum across columns (axis=0):")
sum_axis_0 = np.sum(data, axis=0)
print(f\"Result: {sum_axis_0}\")\n",
print(f\"Shape: {sum_axis_0.shape}\")\n",
print(\"Calculation: [1+4+7, 2+5+8, 3+6+9] = [12, 15, 18]\")
print()

print(\"Sum across rows (axis=1):")
sum_axis_1 = np.sum(data, axis=1)
print(f\"Result: {sum_axis_1}\")\n",
print(f\"Shape: {sum_axis_1.shape}\")\n",
print(\"Calculation: [1+2+3, 4+5+6, 7+8+9] = [6, 15, 24]\")
print()

print(\"Mean across columns (axis=0):")
mean_axis_0 = np.mean(data, axis=0)
print(f\"Result: {mean_axis_0}\")\n",
print()

print(\"OTHER USEFUL STATISTICAL FUNCTIONS\")
print(\"-\" * 40)
print()

print(\"Median:")
print(f\"Median (whole array): {np.median(data)}\")\n",
print(f\"Median (axis=0): {np.median(data, axis=0)}\")\n",
print()

print(\"Percentiles:")
print(f\"25th percentile: {np.percentile(data, 25)}\")\n",
print(f\"75th percentile: {np.percentile(data, 75)}\")\n",
print()

print(\"Unique values and counts:")
unique_data = np.array([1, 2, 2, 3, 3, 3, 4, 4, 5])
unique_vals, counts = np.unique(unique_data, return_counts=True)
print(f\"Unique values: {unique_vals}\")\n",
print(f\"Counts: {counts}\")\n",
print()

print(\"Correlation and Covariance:")
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 6, 8, 10])
print(f\"Correlation coefficient: {np.corrcoef(x, y)[0, 1]:.4f}\")\n",
print(f\"Covariance: {np.cov(x, y)[0, 1]:.4f}\")\n",
print()

print(\"PRACTICAL APPLICATION: DATA NORMALIZATION\")
print(\"-\" * 40)
print()

# Generate some sample data (e.g., test scores)
np.random.seed(42)  # For reproducible results
test_scores = np.random.normal(loc=75, scale=10, size=100)
print(f\"Generated {len(test_scores)} test scores\")\n",
print(f\"Mean: {np.mean(test_scores):.2f}\")\n",
print(f\"Std Dev: {np.std(test_scores):.2f}\")\n",
print()

# Min-Max normalization to [0, 1]
min_val = np.min(test_scores)
max_val = np.max(test_scores)
normalized_scores = (test_scores - min_val) / (max_val - min_val)
print(\"Min-Max normalized scores (0 to 1):\")\n",
print(f\"Min: {np.min(normalized_scores):.4f}\")\n",
print(f\"Max: {np.max(normalized_scores):.4f}\")\n",
print(f\"Mean: {np.mean(normalized_scores):.4f}\")\n",
print()

# Z-score normalization (standardization)
mean_score = np.mean(test_scores)
std_score = np.std(test_scores)
z_scores = (test_scores - mean_score) / std_score
print(\"Z-scores (mean=0, std=1):\")\n",
print(f\"Mean: {np.mean(z_scores):.4f}\")\n",
print(f\"Std Dev: {np.std(z_scores):.4f}\")\n",
print()

print(\"KEY INSIGHTS FOR AI/ML:\")
print(\"- Random sampling generates synthetic data for testing\")\n",
print(\"- Statistical operations summarize and normalize datasets\")\n",
print(\"- Axis operations are crucial for feature engineering\")\n",
print(\"- Normalization ensures features are on similar scales\")\n",
print(\"- Understanding distributions helps detect anomalies\")

In [None]:
# Cell 5: Linear Algebra Operations
# Linear algebra is at the heart of Deep Learning and machine learning algorithms.

print(\"=\" * 60)
print(\"SECTION 4: LINEAR ALGEBRA OPERATIONS\")\n",
print(\"=\" * 60)
print()

print(\"1. DOT PRODUCT\")
print(\"-\" * 20)
print(\"The dot product is the sum of the products of corresponding entries.\")
print()

a = np.array([1, 2])
b = np.array([3, 4])

print(f\"a = {a}\")
print(f\"b = {b}\")
print()

# Method 1: Using np.dot()
dot_product = np.dot(a, b)
print(f\"Dot product (np.dot): {dot_product}\")
print(f\"Calculation: 1*3 + 2*4 = {dot_product}\")
print()

# Method 2: Using the @ operator (Python 3.5+)
dot_product_v2 = a @ b
print(f\"Dot product (@ operator): {dot_product_v2}\")
print()

print(\"2. MATRIX MULTIPLICATION\")
print(\"-\" * 20)
print()

A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

print(\"Matrix A:\")
print(A)
print()
print(\"Matrix B:\")
print(B)
print()

# Method 1: Using np.matmul()
C = np.matmul(A, B)
print(\"Matrix multiplication (np.matmul):")
print(C)
print()

# Method 2: Using @ operator
D = A @ B
print(\"Matrix multiplication (@ operator):")
print(D)
print()

print(\"Mathematical verification:\")
print(\"C[0,0] = 1*5 + 2*7 =\", C[0, 0], \"(expected: 19)\")
print(\"C[0,1] = 1*6 + 2*8 =\", C[0, 1], \"(expected: 22)\")
print(\"C[1,0] = 3*5 + 4*7 =\", C[1, 0], \"(expected: 43)\")
print(\"C[1,1] = 3*6 + 4*8 =\", C[1, 1], \"(expected: 50)\")
print()

print(\"3. OTHER COMMON LINEAR ALGEBRA OPERATIONS\")
print(\"-\" * 40)
print()

print(\"Matrix Inverse (A^-1):")
A_inv = np.linalg.inv(A)
print(A_inv)
print()

print(\"Eigenvalues and Eigenvectors:")
eigenvalues, eigenvectors = np.linalg.eig(A)
print(f\"Eigenvalues: {eigenvalues}\")
print(f\"Eigenvectors:\\n{eigenvectors}\")
print()

print(\"Matrix Norm (magnitude):")
norm = np.linalg.norm(A)
print(f\"Frobenius norm of A: {norm:.4f}\")
print()

print(\"Transpose:")
print(\"Original A:\")
print(A)
print()
print(\"Transposed A (A.T):")
print(A.T)
print()

print(\"4. MATRIX PROPERTIES\")
print(\"-\" * 20)
print()

print(f\"Matrix A shape: {A.shape}\")
print(f\"Matrix A size (total elements): {A.size}\")
print(f\"Matrix A data type: {A.dtype}\")
print()

print(\"Identity Matrix:\")
identity_3x3 = np.eye(3)
print(identity_3x3)
print()

print(\"KEY CONCEPTS FOR AI/ML:\")
print(\"- Dot products measure similarity between vectors\")\n",
print(\"- Matrix multiplication is fundamental to neural networks\")\n",
print(\"- Eigenvalues/eigenvectors used in dimensionality reduction (PCA)\")\n",
print(\"- Matrix inverses important for solving systems of equations\")\n",
print(\"- All operations are highly optimized in NumPy for performance\")

In [None]:
# Cell 4: Broadcasting Explanation and Examples
# WHAT IS BROADCASTING? Broadcasting allows NumPy to work with arrays of different shapes
# during arithmetic operations. The smaller array is "broadcast" across the larger array.

print(\"=\" * 60)
print(\"SECTION 3: BROADCASTING - OPERATIONS ON DIFFERENT SHAPES\")\n",
print(\"=\" * 60)
print()

print(\"BROADCASTING RULES:\")
print(\"1. If arrays don't have the same rank, prepend shape of lower rank with 1s\")
print(\"2. Arrays are compatible in a dimension if they have the same size, OR\")
print(\"   if one of the arrays has size 1 in that dimension\")
print()

print(\"EXAMPLE 1: Broadcasting 1D array to 2D array\")
print(\"-\" * 40)

# Create arrays with different shapes
A = np.array([[1, 2, 3],
              [4, 5, 6]])  # Shape: (2, 3)
b = np.array([10, 20, 30]) # Shape: (3,)

print(f\"Array A (shape {A.shape}):\")\n",
print(A)
print()
print(f\"Array b (shape {b.shape}): {b}\")
print()

# Broadcasting b to match A's shape
print(\"A + b (broadcasting b across rows):\")\n",
result = A + b
print(result)
print(f\"Result shape: {result.shape}\")
print()

print(\"WHAT HAPPENED UNDER THE HOOD?\")
print(\"NumPy 'stretched' b from shape (3,) to shape (1, 3) then to (2, 3):\")\n",
print(\"b (original): [10, 20, 30]\")
print(\"b (broadcast to match A):\")\n",
print(\"[[10, 20, 30],  <- original b\")
print(\" [10, 20, 30]]  <- broadcast copy of b\")
print()

print(\"EXAMPLE 2: Broadcasting with different dimensions\")
print(\"-\" * 40)

# Create a column vector (2D array with shape (3, 1))
col_vector = np.array([[1], [2], [3]])  # Shape: (3, 1)
row_vector = np.array([10, 20, 30])     # Shape: (3,)

print(f\"Column vector (shape {col_vector.shape}):\")\n",
print(col_vector)
print()
print(f\"Row vector (shape {row_vector.shape}): {row_vector}\")
print()

try:
    result = col_vector + row_vector
    print(\"col_vector + row_vector:\")
    print(result)
    print(f\"Result shape: {result.shape}\")
    print()
    print(\"WHY THIS WORKS:\")
    print(\"- col_vector has shape (3, 1)\")
    print(\"- row_vector has shape (3,) -> becomes (1, 3) for broadcasting\")
    print(\"- Compatible dimensions: 3==3 (rows) and 1==3 (columns via broadcasting)\")
    print(\"- Final result shape: (3, 3)\")
except Exception as e:
    print(f\"Error: {e}\")
    print(\"This combination might not be compatible for broadcasting.\")
print()

print(\"EXAMPLE 3: Practical use case - Normalizing a matrix\")
print(\"-\" * 40)

# Create a data matrix (e.g., 3 features, 5 samples)
data = np.random.rand(5, 3) * 100  # Shape: (5, 3)
print(f\"Original data (5 samples Ã— 3 features), shape {data.shape}:\")\n",
print(data.round(2))
print()

# Calculate column means (axis=0: operate across rows for each column)
means = np.mean(data, axis=0)
print(f\"Column means (shape {means.shape}): {means.round(2)}\")
print()

# Subtract means from each column using broadcasting
normalized = data - means
print(\"Data minus column means (centered data):\")\n",
print(normalized.round(2))
print(f\"Shape: {normalized.shape}\")
print()

print(\"KEY TAKEAWAYS:\")
print(\"- Broadcasting eliminates need for explicit loops or replication\")\n",
print(\"- Makes code cleaner and much faster\")\n",
print(\"- Fundamental to efficient NumPy programming\")\n",
print(\"- Understanding rules prevents errors and enables elegant solutions\")

In [None]:
# Cell 3: Vectorization Performance Comparison
# WHY THIS MATTERS: Vectorization enables batch operations on data without writing any for loops.
# This is not only more concise but significantly faster due to low-level C optimizations.

print(\"=\" * 60)
print(\"SECTION 2: VECTORIZATION - THE POWER OF NUMPY\")\n",
print(\"=\" * 60)
print()

print(\"Let's compare two approaches for calculating the sum of squares:\")
print()

# Example data
test_array = np.random.rand(10000)
print(f\"Test array size: {len(test_array)} elements\")
print()

# Approach 1: Traditional Python loop (SLOW - educational purposes only)
def sum_squares_loop(arr):
    \"\"\"
    Non-vectorized implementation using Python loop.
    This demonstrates why we avoid loops for numerical operations.
    \"\"\"
    result = 0
    for x in arr:
        result += x ** 2
    return result

# Approach 2: NumPy vectorized operation (FAST - recommended approach)
def sum_squares_numpy(arr):
    \"\"\"
    Vectorized implementation using NumPy operations.
    WHY: np.sum operates at C speed, optimized for array operations.
    The expression arr ** 2 creates the entire result array efficiently.
    \"\"\"
    return np.sum(arr ** 2)

print(\"Testing performance...\")
print()

# Performance comparison using timeit
# We run each function 100 times to get reliable timing measurements
loop_time = timeit(lambda: sum_squares_loop(test_array), number=100)
numpy_time = timeit(lambda: sum_squares_numpy(test_array), number=100)

print(f\"Loop implementation:    {loop_time:.4f} seconds (100 runs)\")
print(f\"NumPy implementation:   {numpy_time:.4f} seconds (100 runs)\")
print()

# Calculate performance improvement
improvement = loop_time / numpy_time
print(f\"Performance improvement: {improvement:.1f}x FASTER with NumPy\")
print()

print(\"KEY INSIGHTS:\")
print(\"- Vectorized operations eliminate Python loop overhead\")
print(\"- NumPy uses highly optimized C code under the hood\")
print(\"- This 10-100x speedup is typical for numerical computations\")
print(\"- ALWAYS prefer vectorized operations in production code\")
print()

print(\"Verification - both approaches give the same result:\")
result_loop = sum_squares_loop(test_array[:100])  # Small subset for quick check
result_numpy = sum_squares_numpy(test_array[:100])
print(f\"Loop result:    {result_loop:.6f}\")
print(f\"NumPy result:   {result_numpy:.6f}\")
print(f\"Results match:  {np.isclose(result_loop, result_numpy)}\")

In [None]:
# Cell 2: Array Creation and Basic Operations

# NumPy's main object is the homogeneous multidimensional array.
# It's a table of elements (usually numbers), all of the same type,
# indexed by a tuple of non-negative integers.

print(\"Creating different dimensional arrays:\")

# 1D array (vector)
array_1d = np.array([1, 2, 3, 4, 5])
print(f\"1D Array: {array_1d}\")
print(f\"Shape: {array_1d.shape}\")
print(f\"Dimensions: {array_1d.ndim}\")
print()

# 2D array (matrix)
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print(\"2D Array:\")
print(array_2d)
print(f\"Shape: {array_2d.shape}\")
print(f\"Dimensions: {array_2d.ndim}\")
print()

# 3D array (tensor)
array_3d = np.random.rand(2, 3, 3)
print(\"3D Array (random values):")
print(array_3d)
print(f\"Shape: {array_3d.shape}\")
print(f\"Dimensions: {array_3d.ndim}\")
print()

print(\"-\" * 50)
print(\"Basic Arithmetic Operations (Element-wise):\")

# Basic arithmetic operations are performed element-wise
x = np.array([10, 20, 30])
y = np.array([1, 2, 3])

print(f\"x = {x}\")
print(f\"y = {y}\")
print()

print(f\"Addition: x + y = {x + y}\")        # [11 22 33]
print(f\"Subtraction: x - y = {x - y}\")     # [ 9 18 27]
print(f\"Multiplication: x * y = {x * y}\")  # [10 40 90]
print(f\"Division: x / y = {x / y}\")        # [10. 10. 10.]
print()

print(\"Array shapes and data types:\")
print(f\"x shape: {x.shape}, dtype: {x.dtype}\")
print(f\"y shape: {y.shape}, dtype: {y.dtype}\")

# NumPy Mastery - Day 1-2 Learning Module

**Welcome to your NumPy learning journey!** This notebook will guide you through:

1. **Core Array Operations** - Creating and manipulating NumPy arrays
2. **Vectorization** - Understanding the performance benefits of NumPy
3. **Broadcasting** - Working with arrays of different shapes
4. **Linear Algebra** - Dot products, matrix multiplication, and more
5. **Statistics** - Random sampling and statistical operations

**Learning Goals:**
- Master array creation, shapes, and types
- Deep understanding of broadcasting rules
- Competence in basic linear algebra using NumPy
- Ability to perform random sampling and calculate basic statistics

**Remember:** NumPy's main object is the homogeneous multidimensional array - a table of elements (usually numbers), all of the same type, indexed by a tuple of non-negative integers.

In [None]:
# Cell 1: Imports and Setup
# This cell must be run first to import all required libraries

import numpy as np
import matplotlib.pyplot as plt
from timeit import timeit
import sys

print("NumPy Mastery - Day 1-2 Learning Module")
print(f"NumPy version: {np.__version__}")
print(f"Python version: {sys.version}")
print()
print("All imports successful! Ready to begin.")