In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# How to create an empty and a full NumPy array?

**Syntax:**

> **numpy.full(shape, fill_value, dtype = None, order = ‘C’)**<br>
> **numpy.empty(shape, dtype = float, order = ‘C’)**: Return a new array of given shape and type, with random values.

In [2]:
print('Empty Numpy Array\n', np.empty(shape = (2,2), dtype = 'int64'))


print('\n\nFull Numpy Array\n', np.full(shape = (2,3), fill_value = 5, dtype = 'int64'))
print('\n\nFull Numpy Array\n', np.ones((2,3), dtype = 'int') * 5)

# Create a Numpy array filled with all zeros

**Syntax:**

> **numpy.zeros(shape, fill_value, dtype = None, order = ‘C’)**<br>
> **shape** : integer or sequence of integers<br>
> **order**  : C_contiguous or F_contiguous<br>
>&emsp;&emsp;&emsp; C-contiguous order in memory(last index varies the fastest)<br>
>&emsp;&emsp;&emsp; C order means that operating row-rise on the array will be slightly quicker<br>
>&emsp;&emsp;&emsp; FORTRAN-contiguous order in memory (first index varies the fastest).<br>
>&emsp;&emsp;&emsp; F order means that column-wise operations will be faster.<br>
> **dtype** : [optional, float(byDeafult)] Data type of returned array.<br>

In [3]:
np.zeros((2,3), dtype = int)

# Check whether a Numpy array contains a specified row

**Syntax:**

> **numpy.tolist()**<br>

In [4]:
a = np.arange(0,25).reshape(5,5)

print('Original Np Array', a)

print('\nConverted Np Array to List: ', a.tolist())

############### ANSWER ###############
print('\n\nTo check whether the row is present in the NP Array')
print([1,2,3,4,5] in a.tolist())
print([0,1,2,3,4] in a.tolist())

##### To check which elements in the list are present in the Array #####


print('\nElements of the list present in Array\n', np.isin(a, [1,2,3,4,5]))

In [5]:
# 
a = np.array([[10.5, 22.5, 3.8],
                  [41, np.nan, np.nan]])
  
print("\nRemove all rows containing non-numeric elements")
print(a[~np.isnan(a).any(axis=1)])

# np.isnan(a) returns True for NaN values

print('\n\nNaN Index', np.isnan(a))


print('\n\nWhether row has NaN', np.isnan(a).any(axis=1)) # Axis = 1 implies Row; overall np.isnan(a).any(axis=1) returns True if a row has NaN

In [6]:
# numpy.squeeze() function is used when we want to remove single-dimensional entries from the shape of an array.

a = np.array([[[2, 2, 2], [2, 2, 2]]])
   
print ("Input array : ", a) 
print("Shape of input array : ", a.shape)  
  
b = np.squeeze(a) 
  
print ("output squeezed array : ", b)
print("Shape of output array : ", b.shape) 

In [7]:
# Find the most frequent value in a NumPy array

a = np.array([1,2,3,4,5,1,2,1,1,1])
print("Original array:")
print(a)
  
print("\nMost frequent value in the above array:")
print(np.bincount(a).argmax())

# numpy.argmax() function returns indices of the max element of the array in a particular axis.

# numpy.bincount() method counts the occurrence of each element.
# Each bin value is the occurrence of its index.
print('\nBin Count',np.bincount(a))

In [8]:
# How to check whether specified values are present in NumPy array?

a = np.array([[2, 3, 0],[4, 1, 6]])

a == 1

In [9]:
# flatten
a = np.array([[2, 3], [4, 5]])

print(a.flatten())

print(a.ravel())

In [10]:
# Ways to add row/columns in numpy array

a = np.array([9, 6, 10])
b = np.array([1, 2, 3])

print('Horizontal:\n',np.hstack((a,b)))

a = np.array([[11, 22, 33], [45, 4, 7], [9, 6, 10]])

print('\nVertical:\n', np.vstack((a,b)))

empt_array = np.array([[0,0]])


# adding two new rows to empt_array
# using np.append()
empt_array = np.append(empt_array, np.array([[10,20]]), axis=0)
empt_array = np.append(empt_array, np.array([[40,50]]), axis=0)

print('\nAppend rows:\n', empt_array)

In [11]:
# Matrix Multiplication in NumPy

a = [[1, 2], [2, 3]]
b = [[4, 5], [6, 7]]

print(np.dot(a, b))

In [12]:
# How to count the frequency of unique values in NumPy array?

a = np.array([10, 20, 5,10, 8, 20,8, 9])
  
unique, frequency = np.unique(a, return_counts = True)

print(f'Unique elements: {unique}\nFreqeuncy:{frequency}')

In [13]:
# Compute the covariance matrix of two given NumPy arrays

a = np.array([0, 1, 1])
b = np.array([2, 2, 1])
  
# Covariance matrix
print("\nCovariance matrix of the said arrays:\n",np.cov(a, b))

**Covariance**<br>
$cov(x,y) = 1/n  \sum (x - \bar{x}) \times (y - \bar{y})$

In [14]:
# Replace NumPy array elements that doesn’t satisfy the given condition

# Creating a 1-D Numpy array
a = np.array([75.42436315, 42.48558583, 60.32924763])
a[a < 50] = 50
print('Changing 1D Array\n', a)


# Creating a 2-D Numpy array
a = np.array([[45.42436315, 52.48558583, 10.32924763],[5.7439979, 50.58220701, 25.38213418]])
a[a > 30.] = 5.25
print('\nChanging 2D Array\n', a)

# Creating a 3-D Numpy array
a = np.array([[[11, 25.5, 70.6], [30.9, 45.5, 55.9], [20.7, 45.8, 7.1]],
                  [[50.1, 65.9, 8.2], [70.4, 85.8, 10.3], [11.3, 22.2, 33.6]],
                  [[19.9, 69.7, 36.8], [1.2, 5.1, 24.4], [4.9, 20.8, 96.7]]])
a[a > 30] = 1000
print('\nChanging 3D Array\n', a)

In [15]:
# Return the indices of elements where the given condition is satisfied

a = np.array([[1, 2, 3], [4, 5, 6]])

print(np.where(a<4))

In [16]:
# How to Remove columns in Numpy array that contains non-numeric values?


a = np.array([[10.5, 22.5, np.nan],
                  [41, 52.5, np.nan]])

print(a[:, ~np.isnan(a).any(axis=0)])

In [17]:
# How to access different rows of a multidimensional NumPy array?

a = np.array([[1, 20, 3, 1], 
                [40, 5, 66, 7], 
                [70, 88, 9, 11],
               [80, 100, 50, 77],
               [1, 8.5, 7.9, 4.8]])

print('Accessing Rows in 2-D Array:\n', a[[1,3]])


a = np.array([[[10, 25, 70], [30, 45, 55], [20, 45, 7]], 
                  [[50, 65, 8], [70, 85, 10], [11, 22, 33]]])
print('\n\nGiven 3-D Array:')
print(a)
print('\nAccessing Rows in 2-D Array:\n', a[:, [1]])

In [18]:
# Combined array index by index

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
  
# similar to ZIP
print(np.dstack((a, b)))

result = np.empty((1,2), dtype = int)
for i,j in zip(a,b):
    result = np.append(result, np.array([[i,j]]), axis=0)

print(result[1:])

In [19]:
# Generate Random Numbers From The Uniform Distribution using NumPy

  
# printing 1D array with random numbers
print("1D Array with random values and Uniform distribution: \n", np.random.uniform(0, 10, 5))
print("\n1D Array with random values and Uniform distribution: \n", np.random.uniform(size = 5))

# print("\n1D Array filled with random values : \n", np.random.rand(5))

In [20]:
# Filter out integers from float numpy array

a = np.array([1.0, 1.2, 2.2, 2.0, 3.0, 2.0])


print ("initial array : ", a)
# printing resultant
print ("final array", a[a != a.astype(int)])

In [21]:
# Insert a new axis within a NumPy array




# 1D array will become 2D array
# 2D array will become 3D array
# 3D array will become 4D array
# 4D array will become 5D array

a = np.arange(4)
print('Original Array Size',a.shape)

# make it as row vector by inserting an axis along first dimension
row_vec = a[np.newaxis, :]
print('Row Vector', row_vec.shape)

# make it as column vector by inserting an axis along second dimension
col_vec = a[:, np.newaxis]
print('Column Vector', col_vec.shape)

![](http://i.stack.imgur.com/zkMBy.png)

In [22]:
# Basic operations

a = np.array([-1.8, -1.6, -0.5, 0.5, 1.6, 1.8, 3.0])

print('Original value', a)
print("\nFloor values : \n", np.floor(a))
print("\nCeil values : \n", np.ceil(a))
print("\nTruncated values : \n", np.trunc(a))
print('\nRound values :\n', np.rint(a))
print('\nRound values :\n', a.round())
print("\nSquare-root of an array:\n", np.sqrt([1, 4, 9, 16]))
print("\nMedian: ", np.median(a))
print("\nMean: ", np.mean(a))
print("\nMean: ", np.nanmean(np.append(a, [np.nan, 50])))
print("\nVariance: ", np.var(a), a.var())
print("\nStd Dev: ", np.std(a), a.std())

# Measures of central tendency
The following methods are used to find measures of central tendency in NumPy: <br>
1. **mean()**- takes a NumPy array as an argument and returns the arithmetic mean of the data.<br>
&emsp; np.mean(arr)<br>
2. **median()**- takes a NumPy array as an argument and returns the median of the data.<br>
&emsp; np.median(arr)<br>

# Measures of dispersion
The following methods are used to find measures of dispersion in NumPy: 

1. **amin()**- it takes a NumPy array as an argument and returns the minimum.<br>
&emsp; np.amin(arr)<br>
2. **amax()**- it takes a NumPy array as an argument and returns maximum.<br>
&emsp; np.amax(arr)<br>
3. **ptp()**- it takes a NumPy array as an argument and returns the range of the data.<br>
&emsp; np.ptp(arr)
4. **var()**- it takes a NumPy array as an argument and returns the variance of the data.<br>
&emsp; np.var(arr)
5. **std()**- it takes a NumPy array as an argument and returns the standard variation of the data.<br>
&emsp; np.std(arr)