#  this tutorial refers 'python data science handbook'

In [7]:
import numpy as np

# to check which version we are using
np.version.version

In [9]:
 np.version.version

'1.14.3'

# to create a array in numpy

In [13]:
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])

In [17]:
# type np. and then press tab to see all components
np

<module 'numpy' from 'C:\\Users\\Mayank\\Anaconda3\\lib\\site-packages\\numpy\\__init__.py'>

In [20]:
# to view numpy documentation
np?

A Python Integer Is More Than Just an Integer
The standard Python implementation is written in C. This means that every Python
object is simply a cleverly disguised C structure, which contains not only its value, but
other information as well. For example, when we define an integer in Python, such as
x = 10000, x is not just a “raw” integer. It’s actually a pointer to a compound C structure,
which contains several values. Looking through the Python 3.4 source code, we
find that the integer (long) type definition effectively looks like this (once the C macros
are expanded):

struct _longobject {
long ob_refcnt;
PyTypeObject *ob_type;
size_t ob_size;
long ob_digit[1];
};
A single integer in Python 3.4 actually contains four pieces:
• ob_refcnt, a reference count that helps Python silently handle memory allocation
and deallocation
• ob_type, which encodes the type of the variable
• ob_size, which specifies the size of the following data members
• ob_digit, which contains the actual integer value that we expect the Python variable
to represent


# difference between a python list and numpy array

In [24]:
# a list in python
l=list(range(10))
l

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [27]:
type(l)

list

In [28]:
type(l[0])

int

In [30]:
l1=[str(c) for c in l]
l1

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [31]:
type(l1)

list

In [32]:
type(l1[0])

str

In [35]:
l2=[2,1,'1',bool,True]
p2=[type(c) for c in l2]

In [36]:
p2

[int, int, str, type, bool]

But this flexibility comes at a cost: to allow these flexible types, each item in the list
must contain its own type info, reference count, and other information—that is, each
item is a complete Python object. In the special case that all variables are of the same
type, much of this information is redundant: it can be much more efficient to store
data in a fixed-type array. The difference between a dynamic-type list and a fixed-type
(NumPy-style) array is illustrated in Figure 2-2.
At the implementation level, the array essentially contains a single pointer to one contiguous
block of data. The Python list, on the other hand, contains a pointer to a
block of pointers, each of which in turn points to a full Python object like the Python
integer we saw earlier. Again, the advantage of the list is flexibility: because each list
element is a full structure containing both data and type information, the list can be
filled with data of any desired type. Fixed-type NumPy-style arrays lack this flexibility,
but are much more efficient for storing and manipulating data.

# in built in array module for fixed type ,efficient data

In [37]:
import array

In [38]:
l=list(range(10))

In [40]:
Array=array.array('i',l)
Array
# here 'i' represents it is a array of type integer

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# numpy arrays

In [41]:
import numpy as np

In [43]:
# using a list to create an array
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])

In [47]:
# all the types in numpy array should be same if not values are upcasted if possible
np.array([1.0,2,3,4,5])
# converts everyone integer to floating point

array([1., 2., 3., 4., 5.])

In [48]:
np.array([1,2,3,4,'5'])

array(['1', '2', '3', '4', '5'], dtype='<U11')

In [50]:
# if we want to set data type of a array explicitly
np.array([1,2,3,4,5],dtype='float32')

array([1., 2., 3., 4., 5.], dtype=float32)

In [54]:
# creating a multidimensional array in numpy
x=np.array([range(3)])
print(x)
np.array([range(i,i+3) for i in [2,4,6]])

[[0 1 2]]


array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

#  create a numpy array from scratch

In [60]:
# create a size specified array of only 0, np.zeros()
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [61]:
# mention data type of array explicitly
np.zeros(10,dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [63]:
# create a 3x5 array filled with one,np.ones()
np.ones((3,5),dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [65]:
# a 3x5 matrix filled with zeros
np.zeros((3,5),dtype=int)

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [66]:
# create a np array filled with 3.14,np.full()
np.full(10,3.14)

array([3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14])

In [68]:
np.full((3,5),3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [69]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
np.arange(0,20,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [73]:
# create a equi spaced array from 0 to 1 with 5 elements
np.linspace(0,1,5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [74]:
# create a 3x3 array filled with random value between 0 and 1
np.random.random((3,5))

array([[0.41649426, 0.90212797, 0.05257404, 0.40330591, 0.30195826],
       [0.4237625 , 0.20481868, 0.1160847 , 0.24907789, 0.9100162 ],
       [0.27271132, 0.15222572, 0.68601184, 0.31783577, 0.47286652]])

In [76]:
# create a 3x3e array with mean 0 and standard deviation 1
np.random.normal(0,1,(3,3))

array([[-2.92501003,  0.60290518, -3.0343446 ],
       [ 0.55523174,  1.94049841, -0.10054247],
       [-0.73817566,  0.5423738 ,  0.52207396]])

In [77]:
# mean 2 and standard deviation 2
np.random.normal(2,2,(3,3))

array([[ 1.76828873,  3.29894006,  2.44842237],
       [ 2.91731045,  5.25618189,  1.34989975],
       [-0.3046209 ,  3.82697555,  1.00941079]])

In [79]:
# create a 3x3 array fileed with random values from 0 to 10
np.random.randint(0,10,(3,3))

array([[2, 8, 6],
       [9, 2, 3],
       [2, 3, 5]])

In [83]:
# create a 3x3 identity matrix
np.eye(3,dtype=int)

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

In [85]:
# create a empty array of size 3 ,gets filled with whatever values are already exist
np.empty((3,3))

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [87]:
np.empty(3)

array([1., 1., 1.])

#  numpy standard data types

In [88]:
np.zeros(10,dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [89]:
np.zeros(10,dtype=float)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [91]:
np.zeros(10,dtype='int16')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [92]:
np.zeros(10,dtype=np.int16)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [93]:
# refer page 41 of the book to view more supported dtypes

#  basics of numpy array

This section
will present several examples using NumPy array manipulation to access data
and subarrays, and to split, reshape, and join the arrays.

## array attributes

In [145]:
np.random.seed(0)
x1=np.random.randint(10,size=6)

In [113]:
# every np array has attributes ndim,shape,size for its dimension,shape, and total size
x1.ndim

1

In [114]:
x1.shape

(6,)

In [115]:
x1.size

6

In [116]:
x2=np.random.randint(10,size=(2,3))

In [117]:
x2.ndim

2

In [118]:
x2.shape

(2, 3)

In [119]:
x2.size

6

In [120]:
# we also have dtype to see data type of array
x2.dtype

dtype('int32')

In [121]:
x2.itemsize

4

In [122]:
x2.nbytes

24

In [123]:
# accessing array elememts
x1

array([5, 0, 3, 3, 7, 9])

In [124]:
x1[0]

5

In [128]:
# to index from the end use -1 and so on
x1[-1]

9

In [129]:
x1[-2]

7

In [130]:
x2

array([[3, 5, 2],
       [4, 7, 6]])

In [136]:
# to access elements in a multidim array
print(x2[0,0])
print(x2[(0,0)])

3
3


In [137]:
print(x2[0,1])

5


In [140]:
## modify values using abov notation
x2[0,1]=10

In [141]:
x2

array([[ 3, 10,  2],
       [ 4,  7,  6]])

In [144]:
# if we assign a float value it will be truncated because its a int array
x2[0,0]=99.15
x2

array([[99, 10,  2],
       [ 4,  7,  6]])

## array slicing,sub arrays

Just as we can use square brackets to access individual array elements, we can also use
them to access subarrays with the slice notation, marked by the colon (:) character.
The NumPy slicing syntax follows that of the standard Python list; to access a slice of
an array x, use this:
x[start:stop:step]
If any of these are unspecified, they default to the values start=0, stop=size of
dimension, step=1.

In [147]:
x=np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [148]:
# get first 5 element subarray
x[:5]

array([0, 1, 2, 3, 4])

In [149]:
x[2:5]

array([2, 3, 4])

In [156]:
x[5:]

array([5, 6, 7, 8, 9])

In [152]:
# steps of 2
x[::2]

array([0, 2, 4, 6, 8])

In [153]:
x[1::2]

array([1, 3, 5, 7, 9])

In [154]:
# if step value is negative , start and stop are swapped, easy way to reverse an array
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [155]:
x[5::-2]

array([5, 3, 1])

### multidimensional array slicing

In [163]:
x2=np.random.randint(20,size=(3,4))

In [164]:
x2

array([[17,  5, 13,  8],
       [ 9, 19, 16, 19],
       [ 5, 15, 15,  0]])

In [165]:
x2[:,:]

array([[17,  5, 13,  8],
       [ 9, 19, 16, 19],
       [ 5, 15, 15,  0]])

In [166]:
# upto second row and second column
x2[:2,:2]

array([[17,  5],
       [ 9, 19]])

In [169]:
# all rows alternate columns
x2[:,::2]

array([[17, 13],
       [ 9, 16],
       [ 5, 15]])

In [170]:
# reversin array altogether
x2[::-1,::-1]

array([[ 0, 15, 15,  5],
       [19, 16, 19,  9],
       [ 8, 13,  5, 17]])

In [175]:
# accessing 1st column of an array
print(x2)
print(x2[:,0])

[[17  5 13  8]
 [ 9 19 16 19]
 [ 5 15 15  0]]
[17  9  5]


In [177]:
# print first row of x2
print(x2[0,:])

[17  5 13  8]


In [179]:
# also can be used
print(x2[0])

[17  5 13  8]


One important—and extremely useful—thing to know about array slices is that they
return views rather than copies of the array data. This is one area in which NumPy
array slicing differs from Python list slicing: in lists, slices will be copies.

In [180]:
print(x2)

[[17  5 13  8]
 [ 9 19 16 19]
 [ 5 15 15  0]]


In [181]:
x2_sub=x2[:2,:2]

In [182]:
x2_sub

array([[17,  5],
       [ 9, 19]])

In [183]:
x2_sub[0,0]=45

In [186]:
# we see that x2 is changed as well despite making changes only in x2_sub, 
# this is because np array slicing return views instead of copy of array data
print(x2)

[[45  5 13  8]
 [ 9 19 16 19]
 [ 5 15 15  0]]


### create copies

In [187]:
# to create copy we use copy()
x2_sub_copy=x2[:2,:2].copy()

In [188]:
x2_sub_copy

array([[45,  5],
       [ 9, 19]])

In [191]:
x2_sub_copy[0,0]=100

In [193]:
# x2 has no change in this case
x2

array([[45,  5, 13,  8],
       [ 9, 19, 16, 19],
       [ 5, 15, 15,  0]])

## reshaping arrays

In [205]:
np.arange(9)

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [214]:
# to reshape easiest way is to use reshape()
np.arange(9).reshape((3,3))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [216]:
# note that for this to work size of original array should match the size of the new array
np.arange(9).reshape((3,4))

ValueError: cannot reshape array of size 9 into shape (3,4)

In [220]:
#Another common reshaping pattern is the conversion of a one-dimensional array
#into a two-dimensional row or column matrix. You can do this with the reshape
#method, or more easily by making use of the newaxis keyword within a slice operation:
x=np.array([1,2,3])
x.reshape((1,3))

array([[1, 2, 3]])

In [225]:
#can also use np.newaxis in slicing mode
x[np.newaxis,:]

array([[1, 2, 3]])

In [226]:
x.reshape((3,1))

array([[1],
       [2],
       [3]])

In [228]:
x[:,np.newaxis]

array([[1],
       [2],
       [3]])

## array concatination and splitting

All of the preceding routines worked on single arrays. It’s also possible to combine
multiple arrays into one, and to conversely split a single array into multiple arrays.
We’ll take a look at those operations here.

### concatenation of arrays

Concatenation, or joining of two arrays in NumPy, is primarily accomplished
through the routines np.concatenate, np.vstack, and np.hstack. np.concatenate
takes a tuple or list of arrays as its first argument, as we can see here:

In [231]:
x=np.array([1,2,3])
y=np.array([4,5,6])
np.concatenate([x,y])

array([1, 2, 3, 4, 5, 6])

In [232]:
z=np.array([7,8,9])
np.concatenate([x,y,z])

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [235]:
p=np.array([[1,2,3],[4,5,6]])
q=np.array([[7,8,9],[10,11,12]])
np.concatenate([p,q])

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [236]:
np.concatenate([p,q],axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [237]:
# vertical stack
x=np.array([1,2,3])
y=np.array([[4,5,6],[7,8,9]])
np.vstack([x,y])

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [240]:
x=np.array([[99],[99]])
np.hstack([x,y])

array([[99,  4,  5,  6],
       [99,  7,  8,  9]])

### splitting array

The opposite of concatenation is splitting, which is implemented by the functions
np.split, np.hsplit, and np.vsplit. For each of these, we can pass a list of indices
giving the split points:

In [241]:
x=np.array([1,2,3,4,5,6,7,8,9])

In [242]:
#Notice that N split points lead to N + 1 subarrays.
y1,y2,y3=np.split(x,[3,5])

In [243]:
y1

array([1, 2, 3])

In [244]:
y2

array([4, 5])

In [245]:
y3

array([6, 7, 8, 9])

In [246]:
y1,y2,y3,y4=np.split(x,[3,5,8])
print(y1)
print(y2)
print(y3)
print(y4)

[1 2 3]
[4 5]
[6 7 8]
[9]


In [247]:
grid=np.arange(16).reshape([4,4])

In [249]:
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [261]:
upper,lower=np.vsplit(grid,[2])
# try changing value of 2 to see result change

In [262]:
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [265]:
left,right=np.hsplit(grid,[2])
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [266]:
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


## computation on numpy arrays