# Labib Kamran - 467183 - BSCS13D

# Lab 1: Introduction to Machine Learning



In [1]:
# First Python cell
print("Hello, Machine Learning World!")

Hello, Machine Learning World!


In [2]:
# Example: read/write CSV via pandas when in Colab
import numpy as np

try:
    import pandas as pd
except ImportError:
    pd = None
    print("pandas not installed; skipping CSV demo.")

if IN_COLAB and pd is not None:
    # Create a small DataFrame and save to Drive
    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
    out_path = "/content/drive/MyDrive/datasets/output_data.csv"
    try:
        df.to_csv(out_path, index=False)
        print(f"Saved to {out_path}")
        # Read it back
        df2 = pd.read_csv(out_path)
        print("Loaded from Drive:\n", df2.head())
    except Exception as e:
        print("Drive path may not exist. Create MyDrive/datasets/ folder first.")
        print("Error:", e)
else:
    print("Not in Colab with pandas; skipping Drive CSV example.")

ModuleNotFoundError: No module named 'numpy'

## <a id="why-numpy"></a> Why NumPy?

NumPy provides fast, vectorized array operations essential for ML workflows (preprocessing, linear algebra, batching). Itâ€™s the foundation for many ML libraries.

### Example: BMI (vectorized)


In [None]:
import numpy as np
heights = np.array([1.75, 1.80, 1.65])  # meters
weights = np.array([65, 78, 50])       # kg
bmi = weights / (heights ** 2)
print("BMI:", bmi)

### Normalization example


In [None]:
data = np.array([4.0, 5.0, 6.0, 8.0, 10.0])
normalized_data = (data - np.mean(data)) / np.std(data)
print("Z-score normalized:", normalized_data)

### Matrix multiplication (Linear Regression prediction)


In [None]:
X = np.array([[1, 2], [3, 4], [5, 6]])
beta = np.array([0.5, 1.5])
b = 0.1
y_pred = np.dot(X, beta) + b
print("y_pred:", y_pred)

## <a id="key-numpy-concepts"></a> Key NumPy Concepts

### Arrays vs Lists


In [None]:
# Arrays vs Lists
# Element-wise sum with Python lists requires iteration
list_a = [1, 2, 3]
list_b = [4, 5, 6]
result_list = [a + b for a, b in zip(list_a, list_b)]
print("List addition:", result_list)

# With NumPy, operations are vectorized
import numpy as np
array_a = np.array([1, 2, 3])
array_b = np.array([4, 5, 6])
result_np = array_a + array_b
print("NumPy array addition:", result_np)

In [None]:
# Creating NumPy arrays
print("From list:", np.array([1, 2, 3, 4]))
print("Zeros (3x3):\n", np.zeros((3, 3)))
print("Ones (2x4):\n", np.ones((2, 4)))

# Random values (2x3) between 0 and 1
rand_mat = np.random.random((2, 3))
print("Random (2x3):\n", rand_mat)

# Element-wise operations
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print("a + b:", a + b)
print("a * b:", a * b)

# Broadcasting example
A = np.array([[1, 2, 3], [4, 5, 6]])
B = np.array([1, 2, 3])
print("Broadcasted A + B:\n", A + B)

# Reshape and flatten
c = np.array([1, 2, 3, 4, 5, 6])
c_reshaped = c.reshape(2, 3)
print("Reshaped (2x3):\n", c_reshaped)
print("Flattened:", c_reshaped.flatten())

## <a id="hands-on-coding"></a> Hands-On Coding

### Creating and Manipulating Arrays


In [None]:
# 1D and 2D arrays
import numpy as np
array_1d = np.array([10, 20, 30, 40])
print("1D:", array_1d)

array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("2D:\n", array_2d)

# Basic operations
array_sum = array_1d + 5
print("1D + 5:", array_sum)

# Random matrix and scaling
random_matrix = np.random.random((3, 3))
print("Random (3x3):\n", random_matrix)
random_matrix_scaled = random_matrix * 100
print("Scaled (*100):\n", random_matrix_scaled)

# Reshape and slicing
reshaped_array = np.array([1, 2, 3, 4, 5, 6]).reshape(2, 3)
print("Reshaped (2x3):\n", reshaped_array)

sliced_array = reshaped_array[:2, :2]
print("Sliced (first 2 rows/cols):\n", sliced_array)

## <a id="mini-challenge"></a> Mini Challenge

Write a function that takes an array of any shape and returns a min-max normalized version (values in [0, 1]). Then test it with a random array. Finally, explain why normalization is critical in ML preprocessing.


In [None]:
import numpy as np

def min_max_normalize(x: np.ndarray, axis=None, keepdims=False):
    """
    Min-max normalize the array to [0, 1].
    - axis=None normalizes across the whole array (default).
    - axis can be an int/tuple to normalize along axes.
    - keepdims keeps reduced dimensions if True.
    Handles constant arrays by returning zeros.
    """
    x = np.asarray(x)
    x_min = np.min(x, axis=axis, keepdims=True)
    x_max = np.max(x, axis=axis, keepdims=True)
    denom = x_max - x_min
    # Avoid division by zero for constant arrays
    denom = np.where(denom == 0, 1, denom)
    normalized = (x - x_min) / denom
    if not keepdims and axis is not None:
        # Squeeze reduced dims if requested
        normalized = np.squeeze(normalized, axis=axis)
    return normalized

# Test with random array
np.random.seed(42)
test = np.random.randn(3, 4)
print("Original:\n", test)
print("Min-max normalized (global):\n", min_max_normalize(test))
print("Min-max normalized (per-row):\n", min_max_normalize(test, axis=1, keepdims=True))

> Why normalization is critical in ML preprocessing:
>
> - Stabilizes and speeds up training by keeping features on similar scales (important for gradient descent).
> - Prevents features with large scales from dominating distance-based models (kNN, clustering) and regularization.
> - Improves numeric stability for matrix operations.
> - Consistent scaling is essential for fair model comparison and reproducibility.
