In [2]:
import numpy as np


In [3]:
print(np.zeros(5))
print(np.array([1,2,3]))

[0. 0. 0. 0. 0.]
[1 2 3]


# Importing/exporting
    np.loadtxt('file.txt') | From a text file
    np.genfromtxt('file.csv',delimiter=',') | From a CSV file
    np.savetxt('file.txt',arr,delimiter=' ') | Writes to a text file
    np.savetxt('file.csv',arr,delimiter=',') | Writes to a CSV file

# Creating Arrays
    np.array([1,2,3]) | One dimensional array
    np.array([(1,2,3),(4,5,6)]) | Two dimensional array
    np.zeros(3) | 1D array of length 3 all values 0
    np.ones((3,4)) | 3x4 array with all values 1
    np.eye(5) | 5x5 array of 0 with 1 on diagonal (Identity matrix)
    np.linspace(0,100,6) | Array of 6 evenly divided values from 0 to 100
    np.arange(0,10,3) | Array of values from 0 to less than 10 with step 3 (eg [0,3,6,9])
    np.full((2,3),8) | 2x3 array with all values 8
    np.random.rand(4,5) | 4x5 array of random floats between 0–1
    np.random.rand(6,7)*100 | 6x7 array of random floats between 0–100
    np.random.randint(5,size=(2,3)) | 2x3 array with random ints between 0–4

# Inspecting Properties
    arr.size | Returns number of elements in arr
    arr.shape | Returns dimensions of arr (rows,columns)
    arr.dtype | Returns type of elements in arr
    arr.astype(dtype) | Convert arr elements to type dtype
    arr.tolist() | Convert arr to a Python list
    np.info(np.eye) | View documentation for np.eye

# Copying/sorting/reshaping
    np.copy(arr) | Copies arr to new memory
    arr.view(dtype) | Creates view of arr elements with type dtype
    arr.sort() | Sorts arr
    arr.sort(axis=0) | Sorts specific axis of arr
    two_d_arr.flatten() | Flattens 2D array two_d_arr to 1D
    arr.T | Transposes arr (rows become columns and vice versa)
    arr.reshape(3,4) | Reshapes arr to 3 rows, 4 columns without changing data
    arr.resize((5,6)) | Changes arr shape to 5x6 and fills new values with 0

# Adding/removing Elements
    np.append(arr,values) | Appends values to end of arr
    np.insert(arr,2,values) | Inserts values into arr before index 2
    np.delete(arr,3,axis=0) | Deletes row on index 3 of arr
    np.delete(arr,4,axis=1) | Deletes column on index 4 of arr

# Combining/splitting
    np.concatenate((arr1,arr2),axis=0) | Adds arr2 as rows to the end of arr1
    np.concatenate((arr1,arr2),axis=1) | Adds arr2 as columns to end of arr1
    np.split(arr,3) | Splits arr into 3 sub-arrays
    np.hsplit(arr,5) | Splits arr horizontally on the 5th index

# Indexing/slicing/subsetting
    arr[5] | Returns the element at index 5
    arr[2,5] | Returns the 2D array element on index [2][5]
    arr[1]=4 | Assigns array element on index 1 the value 4
    arr[1,3]=10 | Assigns array element on index [1][3] the value 10
    arr[0:3] | Returns the elements at indices 0,1,2 (On a 2D array: returns rows 0,1,2)
    arr[0:3,4] | Returns the elements on rows 0,1,2 at column 4
    arr[:2] | Returns the elements at indices 0,1 (On a 2D array: returns rows 0,1)
    arr[:,1] | Returns the elements at index 1 on all rows
    arr<5 | Returns an array with boolean values
    (arr1<3) & (arr2>5) | Returns an array with boolean values
    ~arr | Inverts a boolean array
    arr[arr<5] | Returns array elements smaller than 5

# Scalar Math
    np.add(arr,1) | Add 1 to each array element
    np.subtract(arr,2) | Subtract 2 from each array element
    np.multiply(arr,3) | Multiply each array element by 3
    np.divide(arr,4) | Divide each array element by 4 (returns np.nan for division by zero)
    np.power(arr,5) | Raise each array element to the 5th power

# Vector Math
    np.add(arr1,arr2) | Elementwise add arr2 to arr1
    np.subtract(arr1,arr2) | Elementwise subtract arr2 from arr1
    np.multiply(arr1,arr2) | Elementwise multiply arr1 by arr2
    np.divide(arr1,arr2) | Elementwise divide arr1 by arr2
    np.power(arr1,arr2) | Elementwise raise arr1 raised to the power of arr2
    np.array_equal(arr1,arr2) | Returns True if the arrays have the same elements and shape
    np.sqrt(arr) | Square root of each element in the array
    np.sin(arr) | Sine of each element in the array
    np.log(arr) | Natural log of each element in the array
    np.abs(arr) | Absolute value of each element in the array
    np.ceil(arr) | Rounds up to the nearest int
    np.floor(arr) | Rounds down to the nearest int
    np.round(arr) | Rounds to the nearest int

# Statistics
    np.mean(arr,axis=0) | Returns mean along specific axis
    arr.sum() | Returns sum of arr
    arr.min() | Returns minimum value of arr
    arr.max(axis=0) | Returns maximum value of specific axis
    np.var(arr) | Returns the variance of array
    np.std(arr,axis=1) | Returns the standard deviation of specific axis
    arr.corrcoef() | Returns correlation coefficient of array
    
# Array Mathematics
   ## Arithmetic Operations
    >>> g = a - b Subtraction
     array([[-0.5, 0. , 0. ],
     [-3. , -3. , -3. ]])
    >>> np.subtract(a,b) Subtraction
    >>> b + a Addition
     array([[ 2.5, 4. , 6. ],
     [ 5. , 7. , 9. ]])
    >>> np.add(b,a) Addition
    >>> a / b Division
     array([[ 0.66666667, 1. , 1. ],
     [ 0.25 , 0.4 , 0.5 ]])
    >>> np.divide(a,b) Division
    >>> a * b Multiplication
     array([[ 1.5, 4. , 9. ],
     [ 4. , 10. , 18. ]])
    >>> np.multiply(a,b) Multiplication
    >>> np.exp(b) Exponentiation
    >>> np.sqrt(b) Square root
    >>> np.sin(a) Print sines of an array
    >>> np.cos(b) Element-wise cosine
    >>> np.log(a) Element-wise natural logarithm
    >>> e.dot(f) Dot product
     array([[ 7., 7.],
     [ 7., 7.]])

# Subsetting, Slicing, Indexing
    >>> a.sum() Array-wise sum
    >>> a.min() Array-wise minimum value
    >>> b.max(axis=0) Maximum value of an array row
    >>> b.cumsum(axis=1) Cumulative sum of the elements
    >>> a.mean() Mean
    >>> b.median() Median
    >>> a.corrcoef() Correlation coefficient
    >>> np.std(b) Standard deviation

# Comparison
    >>> a == b Element-wise comparison
     array([[False, True, True],
     [False, False, False]], dtype=bool)
    >>> a < 2 Element-wise comparison
     array([True, False, False], dtype=bool)
    >>> np.array_equal(a, b) Array-wise comparison
## Aggregate Functions
    >>> a.sum() Array-wise sum
    >>> a.min() Array-wise minimum value
    >>> b.max(axis=0) Maximum value of an array row
    >>> b.cumsum(axis=1) Cumulative sum of the elements
    >>> a.mean() Mean
    >>> b.median() Median
    >>> a.corrcoef() Correlation coefficient
    >>> np.std(b) Standard deviation
## Sorting Arrays
    >>> a.sort() Sort an array
    >>> c.sort(axis=0) Sort the elements of an array's axis
## Subsetting, Slicing, Indexing
   
   ### Subsetting
    >>> a[2] Select the element at the 2nd index 3
    >>> b[1,2] Select the element at row 1 column 2
     6.0 (equivalent to b[1][2])
   
   ### Slicing
   
    >>> a[0:2] Select items at index 0 and 1
     array([1, 2])
    >>> b[0:2,1] Select items at rows 0 and 1 in column 1
     array([ 2., 5.])

    >>> b[:1] Select all items at row 0
     array([[1.5, 2., 3.]]) (equivalent to b[0:1, :])
    >>> c[1,...] Same as [1,:,:]
     array([[[ 3., 2., 1.],
     [ 4., 5., 6.]]])
    >>> a[ : :-1] Reversed array a array([3, 2, 1])
   
   ### Boolean Indexing
   
   >>> a[a<2] Select elements from a less than 2
     array([1])
   
   ### Fancy Indexing
   
    >>> b[[1, 0, 1, 0],[0, 1, 2, 0]] Select elements (1,0),(0,1),(1,2) and (0,0)
     array([ 4. , 2. , 6. , 1.5])
    >>> b[[1, 0, 1, 0]][:,[0,1,2,0]] Select a subset of the matrix’s rows
     array([[ 4. ,5. , 6. , 4. ], and columns
     [ 1.5, 2. , 3. , 1.5],
     [ 4. , 5. , 6. , 4. ],
     [ 1.5, 2. , 3. , 1.5]])
# Array Manipulation
   ### Transposing Array
    >>> i = np.transpose(b) Permute array dimensions
    >>> i.T Permute array dimensions
   ### Changing Array Shape
    >>> b.ravel() Flatten the array
    >>> g.reshape(3,-2) Reshape, but don’t change data
   ### Adding/Removing Elements
    >>> h.resize((2,6)) Return a new array with shape (2,6)
    >>> np.append(h,g) Append items to an array
    >>> np.insert(a, 1, 5) Insert items in an array
    >>> np.delete(a,[1]) Delete items from an array
   ### Combining Arrays
    >>> np.concatenate((a,d),axis=0) Concatenate arrays
     array([ 1, 2, 3, 10, 15, 20])
    >>> np.vstack((a,b)) Stack arrays vertically (row-wise)
     array([[ 1. , 2. , 3. ],
     [ 1.5, 2. , 3. ],
     [ 4. , 5. , 6. ]])
    >>> np.r_[e,f] Stack arrays vertically (row-wise)
    >>> np.hstack((e,f)) Stack arrays horizontally (column-wise)
     array([[ 7., 7., 1., 0.],
     [ 7., 7., 0., 1.]])
    >>> np.column_stack((a,d)) Create stacked column-wise arrays
     array([[ 1, 10],
     [ 2, 15],
     [ 3, 20]])
    >>> np.c_[a,d] Create stacked column-wise arrays
   ### Splitting Arrays
    >>> np.hsplit(a,3) Split the array horizontally at the 3rd
     [array([1]),array([2]),array([3])] index
    >>> np.vsplit(c,2) Split the array vertically at the 2nd index
    [array([[[ 1.5, 2. , 1. ],
     [ 4. , 5. , 6. ]]]),
     array([[[ 3., 2., 3.],
     [ 4., 5., 6.]]])]

In [3]:
a=np.array([1,2,3])
b=np.array([2,3,1])
c=np.array([[1,2,3],[4,5,6],[7,8,9]])


In [35]:
np.split(c,3)

[array([[1, 2, 3]]), array([[4, 5, 6]]), array([[7, 8, 9]])]

# How to extract specific items from an array?

In [9]:
import numpy as np
# Create a 2d array with 3 rows and 4 columns
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]
arr2  = np.array(list2, dtype='float')
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [10]:
# Extract the first 2 rows and columns
arr2[1:, 2:]
#list2[:2, :2] # error

array([[5., 6.],
       [7., 8.]])

In [11]:
# Get the boolean output by applying the condition to each element.
b = arr2 > 4
b

array([[False, False, False, False],
       [False, False,  True,  True],
       [ True,  True,  True,  True]])

In [12]:
arr2[b]

array([5., 6., 5., 6., 7., 8.])

# How to reverse the rows and the whole array?

In [13]:
# Reverse only the row positions
arr2[::-1, ]

array([[5., 6., 7., 8.],
       [3., 4., 5., 6.],
       [1., 2., 3., 4.]])

In [8]:
# Reverse the row and column positions
arr2[::-1, ::-1]

array([[8., 7., 6., 5.],
       [6., 5., 4., 3.],
       [4., 3., 2., 1.]])

# How to represent missing values and infinite?

In [14]:
# Insert a nan and an inf
arr2[1,1] = np.nan # not a number
arr2[1,2] = np.inf # infinite
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., nan, inf,  6.],
       [ 5.,  6.,  7.,  8.]])

In [15]:
# Replace nan and inf with -1. Don't use arr2 == np.nan
missing_bool = np.isnan(arr2) | np.isinf(arr2)
arr2[missing_bool] = -1
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

# How to compute mean, min, max on the ndarray?

In [12]:
# mean, max and min
print("Mean value is: ", arr2.mean())
print("Max value is: ", arr2.max())
print("Min value is: ", arr2.min())
# Cumulative Sum
print(np.cumsum(arr2))

Mean value is:  3.5833333333333335
Max value is:  8.0
Min value is:  -1.0
[ 1.  3.  6. 10. 13. 12. 11. 17. 22. 28. 35. 43.]


# How to create a new array from an existing array?

In [13]:
# Assign portion of arr2 to arr2a. Doesn't really create a new array.
arr2a = arr2[:2,:2]
arr2a[:1, :1] = 100 # 100 will reflect in arr2
arr2

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

# Reshaping and Flattening Multidimensional arrays

In [14]:
# Reshape a 3x4 array to 4x3 array
arr2.reshape(4, 3)

array([[100.,   2.,   3.],
       [  4.,   3.,  -1.],
       [ -1.,   6.,   5.],
       [  6.,   7.,   8.]])

# What is the difference between flatten() and ravel()?

The difference between ravel and flatten is, the new array created using ravel is actually a
reference to the parent array. So, any changes to the new array will affect the parent as
well. But is memory efficient since it does not create a copy.

In [15]:
# Flatten it to a 1d array
arr2.flatten()

array([100.,   2.,   3.,   4.,   3.,  -1.,  -1.,   6.,   5.,   6.,   7.,
         8.])

In [16]:
# Changing the flattened array does not change parent
b1 = arr2.flatten()
b1[0] = 90 # changing b1 does not affect arr2
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [18]:
# Changing the raveled array changes the parent also.
b2 = arr2.ravel()
print(b2)
b2[0] = 101 # changing b2 changes arr2 also
arr2

[101.   2.   3.   4.   3.  -1.  -1.   6.   5.   6.   7.   8.]


array([[101.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

# How to create sequences, repetitions and random numbers using numpy?

In [18]:
# Lower limit is 0 be default
print(np.arange(5))
# 0 to 9
print(np.arange(0, 10))
# 0 to 9 with step of 2
print(np.arange(0, 10, 2))
# 10 to 1, decreasing order
print(np.arange(10, 0, -1))

[0 1 2 3 4]
[0 1 2 3 4 5 6 7 8 9]
[0 2 4 6 8]
[10  9  8  7  6  5  4  3  2  1]


In [22]:
# Start at 1 and end at 50
np.linspace(start=1, stop=100, num=10, dtype=int)

array([  1,  12,  23,  34,  45,  56,  67,  78,  89, 100])

In [23]:
# Limit the number of digits after the decimal to 2
np.set_printoptions(precision=2)
# Start at 10^1 and end at 10^50
np.logspace(start=1, stop=50, num=10, base=10)

None


array([1.00e+01, 2.78e+06, 7.74e+11, 2.15e+17, 5.99e+22, 1.67e+28,
       4.64e+33, 1.29e+39, 3.59e+44, 1.00e+50])

In [21]:
np.zeros([2,2])

array([[0., 0.],
       [0., 0.]])

In [22]:
np.ones([2,2])

array([[1., 1.],
       [1., 1.]])

# How to create repeating sequences?

In [23]:
a = [1,2,3]
# Repeat whole of 'a' two times
print('Tile: ', np.tile(a, 2))
# Repeat each element of 'a' two times
print('Repeat: ', np.repeat(a, 2))

Tile:  [1 2 3 1 2 3]
Repeat:  [1 1 2 2 3 3]


# How to generate random numbers?

In [65]:
# Random numbers between [0,1) of shape 2,2
print(np.random.rand(2,2))

# # Normal distribution with mean=0 and variance=1 of shape 2,2
print(np.random.randn(2,2))

# # Random integers between [0, 10) of shape 2,2
print(np.random.randint(0, 10, size=[2,2]))

# # One random number between [0,1)
print(np.random.random())

# # Random numbers between [0,1) of shape 2,2
print(np.random.random(size=[2,2]))

# # Pick 10 items from a given list, odd-even probability
print(np.random.choice(['a', 'e', 'i', 'o', 'u'], size=10))

# # Pick 10 items from a given list with a predefined probability 'p'
print(np.random.choice(['a', 'e', 'i', 'o', 'u'], size=10, p=[0.3, .1, 0.1, 0.4, 0.1])) # picks more o's

# # Create the random state
rn = np.random.RandomState(100)
# # Create random numbers between [0,1) of shape 2,2
print(rn.rand(2,2))
# # Set the random seed
# np.random.seed(100)
# # Create random numbers between [0,1) of shape 2,2
print(np.random.rand(2,2))

[[0.03 0.7 ]
 [0.03 0.32]]
[[-0.98  2.02]
 [-0.51  0.65]]
[[6 2]
 [4 1]]
0.7044204082888571
[[0.61 0.56]
 [0.86 0.92]]
['o' 'a' 'e' 'e' 'u' 'o' 'e' 'a' 'e' 'i']
['i' 'o' 'o' 'e' 'u' 'o' 'o' 'a' 'a' 'o']
[[0.54 0.28]
 [0.42 0.84]]
[[0.57 0.15]
 [0.95 0.23]]


In [67]:
# Create an array of 5x5 shape of elements from 10 to 100
mt=np.random.randint(50,100,size=[5,5])
mt

array([[80, 78, 66, 67, 82],
       [70, 85, 68, 73, 92],
       [88, 85, 57, 63, 97],
       [68, 93, 73, 78, 74],
       [71, 51, 93, 80, 78]])

# How to get the unique items and the counts?

In [28]:
# Create random integers of size 10 between [0,10)
np.random.seed(100)
arr_rand = np.random.randint(0, 10, size=10)
print(arr_rand)

[8 8 3 7 7 0 4 2 5 2]


In [29]:
# Get the unique items and their counts
uniqs, counts = np.unique(arr_rand, return_counts=True)
print("Unique items : ", uniqs)
print("Counts : ", counts)

Unique items :  [0 2 3 4 5 7 8]
Counts :  [1 2 1 1 1 2 2]


# How to handle datasets that has both numbers and text columns?

In [70]:
data = np.genfromtxt('Auto.csv', delimiter=',', skip_header=1, filling_values=-999, dtype='float')
data[:3] # see first 3 rows1.8000

array([[ 1.80e+01,  8.00e+00,  3.07e+02,  1.30e+02,  3.50e+03,  1.20e+01,
         7.00e+01,  1.00e+00, -9.99e+02],
       [ 1.50e+01,  8.00e+00,  3.50e+02,  1.65e+02,  3.69e+03,  1.15e+01,
         7.00e+01,  1.00e+00, -9.99e+02],
       [ 1.80e+01,  8.00e+00,  3.18e+02,  1.50e+02,  3.44e+03,  1.10e+01,
         7.00e+01,  1.00e+00, -9.99e+02]])

In [31]:
# Save the array as a csv file
np.savetxt("out.csv", data, delimiter=",")

# How to concatenate two numpy arrays columnwise and rowwise

In [26]:
a = np.zeros([4, 4])
b = np.ones([4, 4])
print(a)
print(b)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [32]:
# Vertical Stack Equivalents (Row wise)
print(np.concatenate([a, b], axis=0))
print(np.vstack([a,b]))
print(np.r_[a,b])

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [34]:
# Horizontal Stack Equivalents (Coliumn wise)
print(np.concatenate([a, b], axis=1))
print(np.hstack([a,b]))
print(np.c_[a,b])

[[0. 0. 0. 0. 1. 1. 1. 1.]
 [0. 0. 0. 0. 1. 1. 1. 1.]
 [0. 0. 0. 0. 1. 1. 1. 1.]
 [0. 0. 0. 0. 1. 1. 1. 1.]]
[[0. 0. 0. 0. 1. 1. 1. 1.]
 [0. 0. 0. 0. 1. 1. 1. 1.]
 [0. 0. 0. 0. 1. 1. 1. 1.]
 [0. 0. 0. 0. 1. 1. 1. 1.]]
[[0. 0. 0. 0. 1. 1. 1. 1.]
 [0. 0. 0. 0. 1. 1. 1. 1.]
 [0. 0. 0. 0. 1. 1. 1. 1.]
 [0. 0. 0. 0. 1. 1. 1. 1.]]


In [36]:
np.r_[[1,2,3], 0, 0, [4,5,6]]

array([1, 2, 3, 0, 0, 4, 5, 6])

# How to sort a numpy array based on one or more columns?

In [37]:
arr = np.random.randint(1,6, size=[8, 4])
arr

array([[3, 3, 2, 1],
       [1, 5, 4, 5],
       [3, 1, 4, 2],
       [3, 4, 5, 5],
       [2, 4, 5, 5],
       [4, 4, 4, 2],
       [2, 4, 1, 3],
       [2, 2, 4, 3]])

In [38]:
# Sort each columns of arr
np.sort(arr, axis=0)

array([[1, 1, 1, 1],
       [2, 2, 2, 2],
       [2, 3, 4, 2],
       [2, 4, 4, 3],
       [3, 4, 4, 3],
       [3, 4, 4, 5],
       [3, 4, 5, 5],
       [4, 5, 5, 5]])

# How to sort a numpy array based on 1 column using argsort?

In [39]:
# Get the index positions that would sort the array
x = np.array([1, 10, 5, 2, 8, 9])
sort_index = np.argsort(x)
print(sort_index)

[0 3 2 4 5 1]


In [40]:
x[sort_index]

array([ 1,  2,  5,  8,  9, 10])

In [41]:
# Argsort the first column
sorted_index_1stcol = arr[:, 0].argsort()
# Sort 'arr' by first column without disturbing the integrity of rows
arr[sorted_index_1stcol]

array([[1, 5, 4, 5],
       [2, 4, 5, 5],
       [2, 4, 1, 3],
       [2, 2, 4, 3],
       [3, 3, 2, 1],
       [3, 1, 4, 2],
       [3, 4, 5, 5],
       [4, 4, 4, 2]])

In [42]:
# Descending sort
arr[sorted_index_1stcol[::-1]]

array([[4, 4, 4, 2],
       [3, 4, 5, 5],
       [3, 1, 4, 2],
       [3, 3, 2, 1],
       [2, 2, 4, 3],
       [2, 4, 1, 3],
       [2, 4, 5, 5],
       [1, 5, 4, 5]])