In [3]:
from IPython.display import Image

In [2]:
import numpy as np

---------------------------------
#### NumPy Datatypes
-------------------------------
The NumPy provides a higher range of numeric data types than that provided by the Python. A list of numeric data types is given in the following table.

|SN	|Data type	|Description  |
|---|-----------|-------------|
|1	|bool_	|It represents the boolean value indicating true or false. It is stored as a byte.|
|2	|int_	|It is the default type of integer. It is identical to long type in C that contains 64 bit or 32-bit integer.|
|3	|intc	|It is similar to the C integer (c int) as it represents 32 or 64-bit int.|
|4	|intp	|It represents the integers which are used for indexing.|
|5	|`int8`	|It is the 8-bit integer identical to a byte. The range of the value is -128 to 127.|
|6	|`int16`	|It is the 2-byte (16-bit) integer. The range is -32768 to 32767.|
|7	|`int32`	|It is the 4-byte (32-bit) integer. The range is -2147483648 to 2147483647.|
|8	|`int64`	|It is the 8-byte (64-bit) integer. The range is -9223372036854775808 to 9223372036854775807.|
|9	|uint8	|It is the 1-byte (8-bit) unsigned integer.|
|10	|uint16	|It is the 2-byte (16-bit) unsigned integer.|
|11	|uint32	|It is the 4-byte (32-bit) unsigned integer.|
|12	|uint64	|It is the 8 bytes (64-bit) unsigned integer.|
|13	|`float_`	|It is identical to float64.|
|14	|`float16`	|It is the half-precision float. 5 bits are reserved for the exponent. 10 bits are reserved for mantissa, and 1 bit is reserved for the sign.|
|15	|`float32`	|It is a single precision float. 8 bits are reserved for the exponent, 23 bits are reserved for mantissa, and 1 bit is reserved for the sign.|
|16	|`float64`	|It is the double precision float. 11 bits are reserved for the exponent, 52 bits are reserved for mantissa, 1 bit is used for the sign.
|17	|complex_	|It is identical to complex128.|
|18	|complex64	|It is used to represent the complex number where real and imaginary part shares 32 bits each.|
|19	|complex128	|It is used to represent the complex number where real and imaginary part shares 64 bits each.|

`NumPy dtype`
All the items of a numpy array are data type objects also known as numpy dtypes. A data type object implements the fixed size of memory corresponding to an array.

- NumPy Built-in Data Types
    - We can reference the built-in data types in NumPy by particular character code.

|Char code | description|
|--------- | -----------|
|i |`integer`|
|b |`boolean`|
|u |unsigned|
|f | `float`|
|c | complex float|
|m | `timedelta`|
|M | `datetime`|
|O | object|
|S | `string`|
|U | unicode string|
|V | void|

In [4]:
np.dtype(np.int16)

dtype('int16')

In [5]:
my_arr = np.array([1,2,3,4,5])
my_arr.dtype

dtype('int32')

In [6]:
my_arr = np.array([1,2,3,4,5], dtype=float)
my_arr.dtype

dtype('float64')

##### Example 1 - 

In [247]:
dt = np.dtype(np.int32) 
print (dt)

int32


##### Example 2 -

In [248]:
# int8, int16, int32, int64 can be replaced by equivalent string 'i1', 'i2','i4', etc. 
dt = np.dtype('i4')
print (dt)

int32


##### Example 3 -

In [254]:
dt     = np.dtype([('age', np.int8)]) 

my_arr = np.array([10, 20, 30], dtype = dt) 
print (my_arr.dtype)
print (my_arr['age'])

[('age', 'i1')]
[10 20 30]


##### Example 5 -

In [258]:
student = np.dtype([('name','S20'), ('age', 'i1'), ('marks', 'f4')]) 

a = np.array([('Raj', 21, 50), ('David', 18, 75)], dtype = student) 

print(a['name'], a['age'], a['marks'])

[b'Raj' b'David'] [21 18] [50. 75.]


The default type for many array creation operations is float64, which is an 8 byte floating point value that is mostly interchangeable with a regular Python float value.

The np.zeros function creates an array of zeros, defaulting to float64 type. The following are all equivalent:

In [259]:
m = 10
x = np.zeros(m)
x = np.zeros(m, np.float64)
x = np.zeros(m, dtype=np.float64)
x = np.zeros(m, dtype=float)
x = np.zeros(m, dtype='d')
x = np.zeros(m, dtype='double')

##### Change data type of given numpy array

In [7]:
# Create a numpy array 
arr = np.array([10, 20, 30, 40, 50]) 
  
# Print the dtype 
print(arr.dtype) 

int32


In [262]:
# change the dtype to 'float64' 
arr = arr.astype('float64') 
  
# Print the array after changing 
# the data type 
print(arr) 
  
# Also print the data type 
print(arr.dtype) 

[10. 20. 30. 40. 50.]
float64


##### numpy astype (bool)

In [10]:
a=[[1,2,1],
   [2,3,-5]]

b=[[0,0,0],
   [2,3,5]]

c=np.array(a).astype(bool)
d=np.array(b).astype(bool)

print(c)
print('\n')
print(d)

[[ True  True  True]
 [ True  True  True]]


[[False False False]
 [ True  True  True]]


0 means False, non-zero means True!

In [8]:
a=[[1,2,1],
   [2,3,5]]

b=[[0,0,0],
   [2,3,5]]

c=np.array(a).astype(bool).astype(int)
d=np.array(b).astype(bool).astype(int)

print(c)
print(d)

[[1 1 1]
 [1 1 1]]
[[0 0 0]
 [1 1 1]]


If we give a type to elements that cannot be cast then a `value error` will generate.

In [9]:
arr = np.array(['a', '2', '3'], dtype='i')

ValueError: invalid literal for int() with base 10: 'a'

- Changing Data Type of Existing Array
    - This function creates a copy of the existing array. 
    - It then allows specifying the new data type for the copy as a parameter.
    - The data type can be specified using the respective character code.


In [10]:
arr    = np.array([1.1, 2.1, 3.1])
newarr = arr.astype('i')

print(arr.dtype)
print(newarr.dtype)

float64
int32


Boolean

In [12]:
np.ones((4, 3), dtype=bool)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [15]:
arr_1 = np.random.randn(3, 3)

print(arr_1)

bool_arr = arr_1 < 0.5    # 0.5 is used in Machine learning often, as cut off

print(bool_arr.dtype)
bool_arr

[[-0.49570325  1.12827624  1.77160721]
 [ 0.04925645  1.04176921 -0.05792464]
 [ 0.93603776  0.3377035   0.3824218 ]]
bool


array([[ True, False, False],
       [ True, False,  True],
       [False,  True,  True]])

another example

In [16]:
x = np.array([1, 2, 3, 4, 5])

In [17]:
x > 4

array([False, False, False, False,  True])

another example

In [18]:
(2 * x) == (x ** 2)

array([False,  True, False, False, False])

another example

In [11]:
A = np.array([
[12, 13, 14, 12, 16, 14, 11, 10,  9],
[11, 14, 12, 15, 15, 16, 10, 12, 11],
[10, 12, 12, 15, 14, 16, 10, 12, 12],
[ 9, 11, 16, 15, 14, 16, 15, 12, 10],
[12, 11, 16, 14, 10, 12, 16, 12, 13],
[10, 15, 16, 14, 14, 14, 16, 15, 12],
[13, 17, 14, 10, 14, 11, 14, 15, 10],
[10, 16, 12, 14, 11, 12, 14, 18, 11],
[10, 19, 12, 14, 11, 12, 14, 18, 10],
[14, 22, 17, 19, 16, 17, 18, 17, 13],
[10, 16, 12, 14, 11, 12, 14, 18, 11],
[10, 16, 12, 14, 11, 12, 14, 18, 11],
[10, 19, 12, 14, 11, 12, 14, 18, 10],
[14, 22, 12, 14, 11, 12, 14, 17, 13],
[10, 16, 12, 14, 11, 12, 14, 18, 11]])

B = A < 15
B

array([[ True,  True,  True,  True, False,  True,  True,  True,  True],
       [ True,  True,  True, False, False, False,  True,  True,  True],
       [ True,  True,  True, False,  True, False,  True,  True,  True],
       [ True,  True, False, False,  True, False, False,  True,  True],
       [ True,  True, False,  True,  True,  True, False,  True,  True],
       [ True, False, False,  True,  True,  True, False, False,  True],
       [ True, False,  True,  True,  True,  True,  True, False,  True],
       [ True, False,  True,  True,  True,  True,  True, False,  True],
       [ True, False,  True,  True,  True,  True,  True, False,  True],
       [ True, False, False, False, False, False, False, False,  True],
       [ True, False,  True,  True,  True,  True,  True, False,  True],
       [ True, False,  True,  True,  True,  True,  True, False,  True],
       [ True, False,  True,  True,  True,  True,  True, False,  True],
       [ True, False,  True,  True,  True,  True,  True, False, 

In [12]:
B.astype(np.int)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  B.astype(np.int)


array([[1, 1, 1, 1, 0, 1, 1, 1, 1],
       [1, 1, 1, 0, 0, 0, 1, 1, 1],
       [1, 1, 1, 0, 1, 0, 1, 1, 1],
       [1, 1, 0, 0, 1, 0, 0, 1, 1],
       [1, 1, 0, 1, 1, 1, 0, 1, 1],
       [1, 0, 0, 1, 1, 1, 0, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 0, 0, 0, 0, 0, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1]])

another example 

- this example useful in converting `probabilities to classes`

In [13]:
prob = np.random.uniform(0, 1, 25)
prob

array([0.22326886, 0.2678452 , 0.41872259, 0.43381926, 0.87850003,
       0.47766958, 0.92824262, 0.48518916, 0.28340841, 0.38605784,
       0.93609281, 0.5801712 , 0.6077865 , 0.74348069, 0.08133779,
       0.56140931, 0.09149895, 0.25344082, 0.40054509, 0.27905331,
       0.4055264 , 0.82737258, 0.00478121, 0.79429841, 0.96474594])

In [14]:
np.set_printoptions(suppress=True)

In [15]:
prob

array([0.22326886, 0.2678452 , 0.41872259, 0.43381926, 0.87850003,
       0.47766958, 0.92824262, 0.48518916, 0.28340841, 0.38605784,
       0.93609281, 0.5801712 , 0.6077865 , 0.74348069, 0.08133779,
       0.56140931, 0.09149895, 0.25344082, 0.40054509, 0.27905331,
       0.4055264 , 0.82737258, 0.00478121, 0.79429841, 0.96474594])

In [16]:
prob_bool = prob < 0.5
prob_bool

array([ True,  True,  True,  True, False,  True, False,  True,  True,
        True, False, False, False, False,  True, False,  True,  True,
        True,  True,  True, False,  True, False, False])

another example ...

- the example useful in predictions
    - 15 samples
    - each sample has 3 output probabilities
        - probs dont add to 1 though :( 

In [17]:
pred_probs = np.random.random((15, 3))
pred_probs

array([[0.22807311, 0.01237063, 0.92133051],
       [0.22081726, 0.63561522, 0.17798331],
       [0.34412932, 0.19883081, 0.66303554],
       [0.19219688, 0.37038724, 0.43944657],
       [0.57934387, 0.05150281, 0.97169686],
       [0.96752305, 0.64618245, 0.99442591],
       [0.86241424, 0.78410942, 0.59559696],
       [0.21480658, 0.9332608 , 0.52123572],
       [0.84550201, 0.41211391, 0.67886634],
       [0.72450999, 0.56686162, 0.75600725],
       [0.31540815, 0.43891926, 0.38230374],
       [0.71579578, 0.45367652, 0.45074803],
       [0.26511609, 0.44799793, 0.24047336],
       [0.02866745, 0.01941558, 0.33989606],
       [0.05462638, 0.96742782, 0.60253582]])

get max prob for each sample(row)

In [18]:
np.max(pred_probs, axis=1, keepdims=True)

array([[0.92133051],
       [0.63561522],
       [0.66303554],
       [0.43944657],
       [0.97169686],
       [0.99442591],
       [0.86241424],
       [0.9332608 ],
       [0.84550201],
       [0.75600725],
       [0.43891926],
       [0.71579578],
       [0.44799793],
       [0.33989606],
       [0.96742782]])

get the indices of the max values for every sample/row

In [19]:
np.argmax(pred_probs, axis=1)

array([2, 1, 2, 2, 2, 2, 0, 1, 0, 2, 1, 0, 1, 2, 1], dtype=int64)