<a href="https://colab.research.google.com/github/orangegreen212/cheat_sheets_python/blob/main/The_Ultimate_NumPy_Cheat_Sheet_for_Data_Science.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# The Ultimate NumPy Cheat Sheet for Data Science


The fundamental package for scientific computing in Python. Fast, powerful, essential.



## 1. The Basics: Import & Array Creation

In [2]:
import numpy as np

In [None]:
a = np.array([1,2,3])   # 1D array

b = np.array([(1,2,3),(4,5,6)])  # 2D array


In [None]:
a

array([1, 2, 3])

In [None]:
b

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
np.zeros((3,4))  # 3x4 array of zeros


array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
np.ones((2,3), dtype=np.int16) # 2x3 array of ones (with specified type)

array([[1, 1, 1],
       [1, 1, 1]], dtype=int16)

In [None]:
np.full((3,3),5)   # 3x3 array filled with the value 5


array([[5, 5, 5],
       [5, 5, 5],
       [5, 5, 5]])

In [None]:
np.eye(3)  # 3x3 identity matrix


array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [None]:
np.arange(10,30,5)    # [10 15 20 25] (start, stop, step)


array([10, 15, 20, 25])

In [None]:
np.linspace(0,2,9) # 9 numbers from 0 to 2 (inclusive)


array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])

In [None]:
np.random.rand(3,2) # 3x2 array of random numbers from [0, 1)


array([[0.5554066 , 0.42713971],
       [0.46670055, 0.82487756],
       [0.07038211, 0.79098145]])

In [None]:
np.random.randn(2,2)  # From standard normal distribution ("z-scores")


array([[ 1.16789089, -0.3974178 ],
       [ 0.86821727,  0.18541263]])

In [None]:
np.random.randint(0,10, size=(3,3)) # Random integers from [0, 10)


array([[6, 4, 6],
       [1, 5, 9],
       [0, 0, 8]])

## 2. Array Attributes & Inspection

In [None]:
arr = np.array([[1,2,3],[4,5,6]])

In [None]:
arr.shape  #Tuple of array dimensions

(2, 3)

In [None]:
arr.ndim #Number of dimensions (axes)

2

In [None]:
arr.size  #Total number of elements

6

In [None]:
arr.dtype   # dtype('int64') -> Data type of the elements

dtype('int64')

In [None]:
arr.astype(np.float64)    # Convert the array to a new data type

array([[1., 2., 3.],
       [4., 5., 6.]])

## 3. Indexing, Slicing & Masking

In [None]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
b = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
b

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [None]:
a[2]

np.int64(2)

In [None]:
a[-1]

np.int64(9)

In [None]:
b[0,1]

np.int64(2)

In [None]:
a[2:5]

array([2, 3, 4])

In [None]:
a[5:]

array([5, 6, 7, 8, 9])

In [None]:
a[::2]

array([0, 2, 4, 6, 8])

In [None]:
a[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [None]:
b[0:2,1:3]

array([[2, 3],
       [6, 7]])

In [None]:
b[:2]  # [[1 2 3 4], [5 6 7 8]] (first 2 rows)

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [None]:
b[:,1]   # [ 2  6 10] (the entire 2nd column)

array([ 2,  6, 10])

 --- Boolean Indexing (Masking) ---

In [None]:
mask = b > 5    # Creates a boolean array: [[F, F, F, F], [F, T, T, T], [T, T, T, T]]

mask

array([[False, False, False, False],
       [False,  True,  True,  True],
       [ True,  True,  True,  True]])

In [None]:
b[mask] # [ 6  7  8  9 10 11 12] (selects all True elements)

array([ 6,  7,  8,  9, 10, 11, 12])

In [None]:
b[b > 5]

array([ 6,  7,  8,  9, 10, 11, 12])

In [None]:
b[b%2 == 0]    # Select all even elements


array([ 2,  4,  6,  8, 10, 12])

In [None]:
a[[2,5,8]]   # [2 5 8] (select elements at indices 2, 5, and 8)


array([2, 5, 8])

In [None]:
b[[0,2],[1,3]]  # [2, 12] (selects elements at (0,1) and (2,3))

array([ 2, 12])

## 4. Array Manipulation

In [None]:
a = np.random.randint(0, 10, size=(2,3))
b = np.random.randint(0, 10, size=(2,3))

In [None]:
a

array([[3, 7, 4],
       [3, 2, 6]])

In [None]:
b

array([[3, 0, 8],
       [8, 3, 5]])

In [None]:
a.reshape(3,2)   #Reshape to 3x2 (size must remain the same)

array([[3, 7],
       [4, 3],
       [2, 6]])

In [None]:
a.T   # Transpose the array (2x3 -> 3x2)

array([[3, 3],
       [7, 2],
       [4, 6]])

In [None]:
a.flatten()  # Flatten to 1D (always returns a copy)

array([3, 7, 4, 3, 2, 6])

In [None]:
a.ravel()      # Flatten to 1D (returns a view if possible)

array([3, 7, 4, 3, 2, 6])

In [None]:
np.concatenate((a,b), axis = 0)   # Join along rows (vertically)

array([[3, 7, 4],
       [3, 2, 6],
       [3, 0, 8],
       [8, 3, 5]])

In [None]:
np.vstack((a,b))   # Same as above

array([[3, 7, 4],
       [3, 2, 6],
       [3, 0, 8],
       [8, 3, 5]])

In [None]:
np.concatenate((a,b), axis = 1)   # Join along columns (horizontally)

array([[3, 7, 4, 3, 0, 8],
       [3, 2, 6, 8, 3, 5]])

In [None]:
np.hstack((a,b))   # Same as above

array([[3, 7, 4, 3, 0, 8],
       [3, 2, 6, 8, 3, 5]])

In [None]:
arr = np.arange(12).reshape(3,4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [None]:
np.hsplit(arr,2) # Split into 2 arrays horizontally

[array([[0, 1],
        [4, 5],
        [8, 9]]),
 array([[ 2,  3],
        [ 6,  7],
        [10, 11]])]

In [None]:
np.vsplit(arr,3)   # Split into 3 arrays vertically

[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]

## 5. Mathematical Operations

In [None]:
a = np.array([10, 20, 30])
b = np.array([1, 2, 3])

In [None]:
a+b

array([11, 22, 33])

In [None]:
a*b

array([10, 40, 90])

In [None]:
a/b

array([10., 10., 10.])

In [None]:
a+5

array([15, 25, 35])

In [None]:
np.sqrt(a)     # Square root
np.exp(b)      # Exponential (e^x)
np.sin(b)      # Sine
np.log(b)      # Natural logarithm
np.round(arr, decimals=2) # Round to n decimal places

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [3]:
a = np.ones((2,3))
b = np.full((3,2), 2)

In [4]:
a

array([[1., 1., 1.],
       [1., 1., 1.]])

In [5]:
b

array([[2, 2],
       [2, 2],
       [2, 2]])

In [6]:
a @ b  # Matrix multiplication (preferred method)

array([[6., 6.],
       [6., 6.]])

In [7]:
np.dot(a,b)   # Also matrix multiplication

array([[6., 6.],
       [6., 6.]])

## 6. Aggregation & Statistics

In [10]:
arr = np.array([[1, 2, 3], [4, 5, 6]])

In [11]:
arr.sum()    # 21 (sum of all elements)

np.int64(21)

In [12]:
arr.min()    # 1 (minimum element)

np.int64(1)

In [13]:
arr.max()    # 6 (maximum element)

np.int64(6)

In [14]:
arr.sum(axis=0)  # [5 7 9] (sum down the columns)

array([5, 7, 9])

In [15]:
arr.sum(axis=1)  # [ 6 15] (sum across the rows)

array([ 6, 15])

In [16]:
arr.mean()  # 3.5 (mean)

np.float64(3.5)

In [17]:
np.median(arr)    # 3.5 (median)

np.float64(3.5)

In [18]:
arr.std()  # 1.707.. (standard deviation)

np.float64(1.707825127659933)

In [19]:
arr.var()    # 2.916.. (variance)

np.float64(2.9166666666666665)

In [20]:
np.unique(arr)   # [1 2 3 4 5 6] (find unique elements)

array([1, 2, 3, 4, 5, 6])

## 7. Sorting

In [22]:
arr = np.array([3, 1, 2, 5, 4])

In [23]:
arr.sort()    # Sorts the array in-place
              # arr is now [1 2 3 4 5]

In [24]:
arr2 = np.array([3, 1, 2, 5, 4])

In [25]:
np.sort(arr2)  # Returns a sorted COPY of the array

array([1, 2, 3, 4, 5])

In [26]:
np.argsort(arr2)  # [1 2 0 4 3] -> returns indices that would sort the array

array([1, 2, 0, 4, 3])

## 8. Copy vs. View (Crucial Concept!)

CRITICAL! Misunderstanding this leads to bugs.
No Copy (View): Slicing an array creates a view of the original array. Changes to the view will affect the original.
Copy: Explicitly creating a copy. Changes to the copy will not affect the original.

In [31]:
a = np.arange(10)
b = a[2:6]      # b is a view of a, not a new array

In [32]:
b[0] = 99

In [33]:
a

array([ 0,  1, 99,  3,  4,  5,  6,  7,  8,  9])

In [35]:
c = a[2:6].copy()

In [36]:
c[0]=1000

In [37]:
a   # a remains unchanged

array([ 0,  1, 99,  3,  4,  5,  6,  7,  8,  9])

## 9. Saving & Loading Arrays

In [38]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

In [39]:
np.save('my_array.npy',a)

In [40]:
loaded_a=np.load('my_array.npy')

In [42]:
np.savetxt('my_array.csv', b, delimiter = ';')
loaded_b = np.loadtxt('my_array.csv', delimiter=';')

In [43]:
np.savez('my_arrays.npz', array_a=a, array_b=b)

In [44]:
data=np.load('my_arrays.npz')

In [45]:
data['array_a']  # -> array([1, 2, 3])

array([1, 2, 3])