# NumPy In-Depth Notebook

This notebook goes from fundamentals to advanced NumPy usage, with realistic scenarios and working code.


## 0) Setup

In [1]:
import numpy as np


## 1) Arrays and dtypes

In [2]:
a = np.array([1, 2, 3])
b = np.array([1, 2, 3.5])

(a, a.dtype), (b, b.dtype)


((array([1, 2, 3]), dtype('int64')),
 (array([1. , 2. , 3.5]), dtype('float64')))

In [3]:
# Explicit dtype
a = np.array([1, 2, 3], dtype=np.int32)
b = np.array([1, 2, 3], dtype=np.float64)

(a.dtype, b.dtype)


(dtype('int32'), dtype('float64'))

## 2) Shape, ndim, size, and axes

In [4]:
m = np.array([[1, 2, 3], [4, 5, 6]])

m.shape, m.ndim, m.size


((2, 3), 2, 6)

## 3) Creating arrays

In [5]:
np.zeros((2, 3))


array([[0., 0., 0.],
       [0., 0., 0.]])

In [6]:
np.ones((2, 3))


array([[1., 1., 1.],
       [1., 1., 1.]])

In [7]:
np.full((2, 3), 7)


array([[7, 7, 7],
       [7, 7, 7]])

In [8]:
np.eye(3)


array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [9]:
np.arange(0, 10, 2)


array([0, 2, 4, 6, 8])

In [10]:
np.linspace(0, 1, 5)


array([0.  , 0.25, 0.5 , 0.75, 1.  ])

## 4) Indexing, slicing, and views

In [11]:
a = np.arange(10)
a[2:7]


array([2, 3, 4, 5, 6])

In [12]:
m = np.arange(1, 13).reshape(3, 4)

m, m[1, 2], m[:, 1], m[1, :]


(array([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]]),
 np.int64(7),
 array([ 2,  6, 10]),
 array([5, 6, 7, 8]))

In [13]:
# Views vs copies
x = np.arange(6)
view = x[1:4]
view[:] = 99
x


array([ 0, 99, 99, 99,  4,  5])

In [14]:
# Copy to avoid modifying original
x = np.arange(6)
copy = x[1:4].copy()
copy[:] = 77
x, copy


(array([0, 1, 2, 3, 4, 5]), array([77, 77, 77]))

## 5) Boolean and fancy indexing

In [15]:
a = np.array([10, 20, 30, 40, 50])
mask = a >= 30
a[mask]


array([30, 40, 50])

In [16]:
idx = [0, 2, 4]
a[idx]


array([10, 30, 50])

## 6) Vectorized operations and ufuncs

In [17]:
x = np.array([1, 2, 3])
y = np.array([10, 20, 30])

x + y, x * y, np.sqrt(y)


(array([11, 22, 33]),
 array([10, 40, 90]),
 array([3.16227766, 4.47213595, 5.47722558]))

In [18]:
# In-place with out
np.add(x, y, out=x)
x


array([11, 22, 33])

## 7) Broadcasting

In [19]:
A = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([10, 20, 30])

A + b


array([[11, 22, 33],
       [14, 25, 36]])

In [20]:
# Broadcasting with column vector
c = np.array([[1], [2]])
A * c


array([[ 1,  2,  3],
       [ 8, 10, 12]])

## 8) Aggregations and statistics

In [21]:
a = np.array([[1, 2, 3], [4, 5, 6]])

(a.sum(), a.mean(), a.min(), a.max())


(np.int64(21), np.float64(3.5), np.int64(1), np.int64(6))

In [22]:
a.sum(axis=0), a.sum(axis=1)


(array([5, 7, 9]), array([ 6, 15]))

In [23]:
# NaN-safe stats
b = np.array([1.0, np.nan, 3.0])
np.nanmean(b)


np.float64(2.0)

## 9) Reshape, transpose, and stacking

In [24]:
a = np.arange(6)
a.reshape(2, 3)


array([[0, 1, 2],
       [3, 4, 5]])

In [25]:
b = a.reshape(3, 2)
b.T


array([[0, 2, 4],
       [1, 3, 5]])

In [26]:
x = np.array([[1, 2]])
y = np.array([[3, 4]])

np.concatenate([x, y], axis=0)


array([[1, 2],
       [3, 4]])

In [27]:
np.vstack([x, y])


array([[1, 2],
       [3, 4]])

In [28]:
np.hstack([x, y])


array([[1, 2, 3, 4]])

## 10) Linear algebra

In [29]:
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

A @ B


array([[19, 22],
       [43, 50]])

In [30]:
np.linalg.det(A)


np.float64(-2.0000000000000004)

In [31]:
np.linalg.solve(A, np.array([1, 0]))


array([-2. ,  1.5])

## 11) Random numbers (Generator API)

In [32]:
rng = np.random.default_rng(42)

rng.integers(0, 10, size=5)


array([0, 7, 6, 4, 4])

In [33]:
rng.normal(0, 1, size=(2, 3))


array([[ 0.94056472, -1.95103519, -1.30217951],
       [ 0.1278404 , -0.31624259, -0.01680116]])

## 12) File I/O

In [34]:
arr = np.arange(5)

np.save("temp_arr.npy", arr)
loaded = np.load("temp_arr.npy")
loaded


array([0, 1, 2, 3, 4])

In [35]:
np.savetxt("temp_arr.csv", arr, delimiter=",")
loaded_txt = np.loadtxt("temp_arr.csv", delimiter=",")
loaded_txt


array([0., 1., 2., 3., 4.])

## 13) Real-world scenarios

### 13.1) Sales cleanup and transformation

In [36]:
rng = np.random.default_rng(0)

# 5 stores x 7 days
sales = rng.integers(50, 500, size=(5, 7)).astype(float)

# Inject missing values
sales[1, 3] = np.nan
sales[3, 5] = np.nan

# Replace missing with column means
col_means = np.nanmean(sales, axis=0)
inds = np.where(np.isnan(sales))
sales[inds] = np.take(col_means, inds[1])

# Apply weekend boost
sales[:, [5, 6]] *= 1.10

sales.round(2)


array([[432.  , 336.  , 280.  , 171.  , 188.  ,  74.8 ,  91.3 ],
       [ 57.  , 128.  , 415.  , 205.25, 460.  , 303.6 , 354.2 ],
       [486.  , 378.  , 334.  , 294.  , 301.  , 517.  , 191.4 ],
       [417.  , 351.  ,  51.  , 227.  , 435.  , 344.3 ,  71.5 ],
       [394.  , 378.  , 430.  , 129.  ,  90.  , 481.8 ,  64.9 ]])

### 13.2) Sensor smoothing

In [37]:
x = np.linspace(0, 4 * np.pi, 200)
signal = np.sin(x) + rng.normal(0, 0.2, size=x.size)

window = 7
kernel = np.ones(window) / window
smoothed = np.convolve(signal, kernel, mode="same")

signal[:5], smoothed[:5]


(array([0.08232611, 0.27160831, 0.10025277, 0.46160436, 0.11687403]),
 array([0.13082736, 0.14752365, 0.20192652, 0.28058071, 0.33261705]))

### 13.3) Portfolio returns

In [38]:
returns = rng.normal(0.001, 0.02, size=(10, 4))
weights = np.array([0.4, 0.3, 0.2, 0.1])

portfolio = returns @ weights
cumulative = np.cumprod(1 + portfolio) - 1

portfolio[:5], cumulative[-1]


(array([ 0.00811258,  0.00060029,  0.00420669, -0.0054396 , -0.02514096]),
 np.float64(-0.007872890366065421))

### 13.4) Image-like processing

In [39]:
image = rng.integers(0, 256, size=(8, 8))

norm = (image - image.min()) / (image.max() - image.min())

binary = (norm > 0.6).astype(int)

image, norm.round(2), binary


(array([[138, 182,  57, 105,  92, 177, 106, 107],
        [138, 148,  28, 236, 104,  44,   0, 111],
        [190,  75, 218,  51,  35, 100, 180,  76],
        [210,   5, 251,  12, 216,   0, 108,  79],
        [250, 167, 249, 106, 128, 128, 192, 130],
        [233,  49, 121,  63, 221,  35, 179, 233],
        [ 75,  71, 196, 151, 146, 226,  24,  69],
        [100, 234,  18, 115, 121,   3, 109, 166]]),
 array([[0.55, 0.73, 0.23, 0.42, 0.37, 0.71, 0.42, 0.43],
        [0.55, 0.59, 0.11, 0.94, 0.41, 0.18, 0.  , 0.44],
        [0.76, 0.3 , 0.87, 0.2 , 0.14, 0.4 , 0.72, 0.3 ],
        [0.84, 0.02, 1.  , 0.05, 0.86, 0.  , 0.43, 0.31],
        [1.  , 0.67, 0.99, 0.42, 0.51, 0.51, 0.76, 0.52],
        [0.93, 0.2 , 0.48, 0.25, 0.88, 0.14, 0.71, 0.93],
        [0.3 , 0.28, 0.78, 0.6 , 0.58, 0.9 , 0.1 , 0.27],
        [0.4 , 0.93, 0.07, 0.46, 0.48, 0.01, 0.43, 0.66]]),
 array([[0, 1, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0],
        [1, 0, 1, 0, 0, 0, 1, 0],
        [1, 0, 1, 0, 1, 0, 0, 

### 13.5) One-hot encoding

In [40]:
labels = np.array([0, 2, 1, 2, 0])
num_classes = labels.max() + 1

one_hot = np.eye(num_classes)[labels]
one_hot


array([[1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.]])

### 13.6) Grouped analysis with masks

In [41]:
ages = rng.integers(18, 70, size=100)
spend = rng.integers(10, 500, size=100)

adult_mask = (ages >= 18) & (ages <= 35)
mid_mask = (ages >= 36) & (ages <= 55)
senior_mask = ages >= 56

spend[adult_mask].mean(), spend[mid_mask].mean(), spend[senior_mask].mean()


(np.float64(233.38235294117646),
 np.float64(257.85714285714283),
 np.float64(269.9166666666667))

### 13.7) Time series weekly aggregation

In [42]:
metric = rng.integers(100, 200, size=28)
weekly = metric.reshape(4, 7).sum(axis=1)
weekly


array([1040, 1107, 1039, 1130])

## 14) Performance tips

- Prefer vectorized operations to Python loops
- Pre-allocate arrays when size is known
- Use appropriate dtypes to reduce memory
- Be careful with views vs copies
- Check shapes early to avoid silent broadcasting bugs
