# Labib Kamran - 467183 - BSCS13D

# Lab 1: Introduction to Machine Learning



## Exploring Colab Interface


### Using Google Drive with Colab
Mount Drive to access datasets and save outputs directly:


In [None]:
# Mount Google Drive (Colab only)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    IN_COLAB = True
except Exception:
    print("Not running in Google Colab; skipping Drive mount.")
    IN_COLAB = False

In [None]:
# Example: read/write CSV via pandas when in Colab
import numpy as np

try:
    import pandas as pd
except ImportError:
    pd = None
    print("pandas not installed; skipping CSV demo.")

if IN_COLAB and pd is not None:
    # Create a small DataFrame and save to Drive
    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
    out_path = "/content/drive/MyDrive/datasets/output_data.csv"
    try:
        df.to_csv(out_path, index=False)
        print(f"Saved to {out_path}")
        # Read it back
        df2 = pd.read_csv(out_path)
        print("Loaded from Drive:\n", df2.head())
    except Exception as e:
        print("Drive path may not exist. Create MyDrive/datasets/ folder first.")
        print("Error:", e)
else:
    print("Not in Colab with pandas; skipping Drive CSV example.")

In [5]:
# First Python cell
print("Hello, Machine Learning World!")

Hello, Machine Learning World!


## <a id="why-numpy"></a> Why NumPy?

NumPy provides fast, vectorized array operations essential for ML workflows (preprocessing, linear algebra, batching). It’s the foundation for many ML libraries.

### Example: BMI (vectorized)


In [6]:
import numpy as np
heights = np.array([1.75, 1.80, 1.65])  # meters
weights = np.array([65, 78, 50])       # kg
bmi = weights / (heights ** 2)
print("BMI:", bmi)

BMI: [21.2244898  24.07407407 18.36547291]


### Normalization example


In [7]:
data = np.array([4.0, 5.0, 6.0, 8.0, 10.0])
normalized_data = (data - np.mean(data)) / np.std(data)
print("Z-score normalized:", normalized_data)

Z-score normalized: [-1.2070197  -0.74278135 -0.27854301  0.64993368  1.57841037]


### Matrix multiplication (Linear Regression prediction)


In [8]:
X = np.array([[1, 2], [3, 4], [5, 6]])
beta = np.array([0.5, 1.5])
b = 0.1
y_pred = np.dot(X, beta) + b
print("y_pred:", y_pred)

y_pred: [ 3.6  7.6 11.6]


## <a id="key-numpy-concepts"></a> Key NumPy Concepts

### Arrays vs Lists


In [10]:
list_a = [1, 2, 3]
list_b = [4, 5, 6]
result_list = [a + b for a, b in zip(list_a, list_b)]
print("List addition:", result_list)

array_a = np.array([1, 2, 3])
array_b = np.array([4, 5, 6])
result_np = array_a + array_b
print("NumPy array addition:", result_np)


List addition: [5, 7, 9]
NumPy array addition: [5 7 9]


In [None]:
print("From list:", np.array([1, 2, 3, 4]))
print("Zeros (3x3):\n", np.zeros((3, 3)))
print("Ones (2x4):\n", np.ones((2, 4)))

rand_mat = np.random.random((2, 3))
print("Random (2x3):\n", rand_mat)

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print("a + b:", a + b)
print("a * b:", a * b)

A = np.array([[1, 2, 3], [4, 5, 6]])
B = np.array([1, 2, 3])
print("Broadcasted A + B:\n", A + B)

c = np.array([1, 2, 3, 4, 5, 6])
c_reshaped = c.reshape(2, 3)
print("Reshaped (2x3):\n", c_reshaped)
print("Flattened:", c_reshaped.flatten())


From list: [1 2 3 4]
Zeros (3x3):
 [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
Ones (2x4):
 [[1. 1. 1. 1.]
 [1. 1. 1. 1.]]
Random (2x3):
 [[0.81537056 0.30292484 0.84386861]
 [0.20608299 0.41484479 0.8097621 ]]
a + b: [5 7 9]
a * b: [ 4 10 18]
Broadcasted A + B:
 [[2 4 6]
 [5 7 9]]
Reshaped (2x3):
 [[1 2 3]
 [4 5 6]]
Flattened: [1 2 3 4 5 6]


## <a id="hands-on-coding"></a> Hands-On Coding

### Creating and Manipulating Arrays


In [None]:
import numpy as np
array_1d = np.array([10, 20, 30, 40])
print("1D:", array_1d)

array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("2D:\n", array_2d)

array_sum = array_1d + 5
print("1D + 5:", array_sum)

random_matrix = np.random.random((3, 3))
print("Random (3x3):\n", random_matrix)
random_matrix_scaled = random_matrix * 100
print("Scaled (*100):\n", random_matrix_scaled)

reshaped_array = np.array([1, 2, 3, 4, 5, 6]).reshape(2, 3)
print("Reshaped (2x3):\n", reshaped_array)

sliced_array = reshaped_array[:2, :2]
print("Sliced (first 2 rows/cols):\n", sliced_array)

1D: [10 20 30 40]
2D:
 [[1 2 3]
 [4 5 6]]
1D + 5: [15 25 35 45]
Random (3x3):
 [[0.01419036 0.12955174 0.60550883]
 [0.17418148 0.38836731 0.19303837]
 [0.44614023 0.41967222 0.13276668]]
Scaled (*100):
 [[ 1.41903643 12.95517393 60.55088336]
 [17.41814812 38.83673057 19.30383715]
 [44.61402337 41.96722167 13.2766684 ]]
Reshaped (2x3):
 [[1 2 3]
 [4 5 6]]
Sliced (first 2 rows/cols):
 [[1 2]
 [4 5]]


## <a id="mini-challenge"></a> Mini Challenge

Write a function that takes an array of any shape and returns a min-max normalized version (values in [0, 1]). Then test it with a random array. Finally, explain why normalization is critical in ML preprocessing.


In [None]:
import numpy as np

def min_max_normalize(x: np.ndarray, axis=None, keepdims=False):
    x = np.asarray(x)
    x_min = np.min(x, axis=axis, keepdims=True)
    x_max = np.max(x, axis=axis, keepdims=True)
    denom = x_max - x_min
    # Avoid division by zero for constant arrays
    denom = np.where(denom == 0, 1, denom)
    normalized = (x - x_min) / denom
    if not keepdims and axis is not None:
        # Squeeze reduced dims if requested
        normalized = np.squeeze(normalized, axis=axis)
    return normalized

# Test with random array
np.random.seed(42)
test = np.random.randn(3, 4)
print("Original:\n", test)
print("Min-max normalized (global):\n", min_max_normalize(test))
print("Min-max normalized (per-row):\n", min_max_normalize(test, axis=1, keepdims=True))

Original:
 [[ 0.49671415 -0.1382643   0.64768854  1.52302986]
 [-0.23415337 -0.23413696  1.57921282  0.76743473]
 [-0.46947439  0.54256004 -0.46341769 -0.46572975]]
Min-max normalized (global):
 [[0.4716135  0.16166943 0.54530673 0.97257612]
 [0.1148643  0.11487231 1.         0.60375694]
 [0.         0.49399168 0.00295638 0.00182782]]
Min-max normalized (per-row):
 [[3.82219158e-01 0.00000000e+00 4.73096733e-01 1.00000000e+00]
 [0.00000000e+00 9.05375552e-06 1.00000000e+00 5.52336373e-01]
 [0.00000000e+00 1.00000000e+00 5.98467102e-03 3.70010373e-03]]


> Why normalization is critical in ML preprocessing:
>
> - Stabilizes and speeds up training by keeping features on similar scales (important for gradient descent).
> - Prevents features with large scales from dominating distance-based models (kNN, clustering) and regularization.
> - Improves numeric stability for matrix operations.
> - Consistent scaling is essential for fair model comparison and reproducibility.
