In [1]:
# checking numpy version
import numpy
numpy.__version__

'1.24.3'

In [2]:
import numpy as np

In [None]:
#to check available features
np.<TAB>;

In [6]:
# numpy built in documentation
np?

# Understanding Data Types

 Python variables are more than just their value; they also contain extra information about the type of the value. 

 when we define an integer in Python, such as x = 10000, x is not just a "raw" integer. It's actually a pointer to a compound C structure, which contains several values.
 
 ![image.png](attachment:image.png)

Here PyObject_HEAD is the part of the structure containing the reference count, type code, and other pieces mentioned before.

Notice the difference here: a C integer is essentially a label for a position in memory whose bytes encode an integer value. A Python integer is a pointer to a position in memory containing all the Python object information, including the bytes that contain the integer value. This extra information in the Python integer structure is what allows Python to be coded so freely and dynamically.

# A python list is more than just a list

In [7]:
# a list can contain heterogeneous elements,
L=[True, "2",3.0,4,None]

to allow these flexible types, each item in the list must contain its own type info, reference count, and other information–that is, each item is a complete Python object. In the special case that all variables are of the same type, much of this information is redundant: it can be much more efficient to store data in a fixed-type array. The difference between a dynamic-type list and a fixed-type (NumPy-style) array is illustrated in the following figure:

![image.png](attachment:image.png)

At the implementation level, the array essentially contains a single pointer to one contiguous block of data. The Python list, on the other hand, contains a pointer to a block of pointers, each of which in turn points to a full Python object like the Python integer we saw earlier. Again, the advantage of the list is flexibility: because each list element is a full structure containing both data and type information, the list can be filled with data of any desired type. Fixed-type NumPy-style arrays lack this flexibility, but are much more efficient for storing and manipulating data.

# Fixed Type Arrays in Python

In [8]:
import array
L = list(range(10))
A=array.array('i',L)
A

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

The built-in array module (available since Python 3.3) can be used to create dense arrays of a uniform type

Here 'i' is a type code indicating the contents are integers.

Much more useful, however, is the ndarray object of the NumPy package. While Python's <mark>array</mark> object provides efficient storage of array-based data, NumPy adds to this efficient operations on that data.

# Creating Arrays from python lists

In [9]:
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])

unlike Python lists, NumPy is constrained to arrays that all contain the same type. If types do not match, NumPy will upcast if possible (here, integers are up-cast to floating point):

In [10]:
np.array([3.14,4,-2,3])

array([ 3.14,  4.  , -2.  ,  3.  ])

In [11]:
# to explicitly set the data type,
np.array([1,2,3,4],dtype='float32')

array([1., 2., 3., 4.], dtype=float32)

 NumPy arrays can explicitly be multi-dimensional; here's one way of initializing a multidimensional array using a list of lists:

In [13]:
np.array([range(i,i+3) for i in (2,4,6)])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

# Creating arrays from scratch

In [24]:
np.zeros(10,dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [15]:
# creating a 4x5 floating point array filled with ones
np.ones((4,5),dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [16]:
# creating a 3x5 array filled with 4.5
np.full((3,5),4.5)

array([[4.5, 4.5, 4.5, 4.5, 4.5],
       [4.5, 4.5, 4.5, 4.5, 4.5],
       [4.5, 4.5, 4.5, 4.5, 4.5]])

In [17]:
# similiar to the range()
# starting 0 , ending at 10 ( exclusive) , step by 2
np.arange(0,10,2)

array([0, 2, 4, 6, 8])

In [18]:
# an array of evenly spaced five values between 0 and 2
np.linspace(0,2,5)

array([0. , 0.5, 1. , 1.5, 2. ])

In [19]:
# random values between 0 and 1
# 3x3 array of uniformly distributed
np.random.random((3,3))

array([[0.27775202, 0.30653928, 0.12419831],
       [0.21557208, 0.51860829, 0.02887263],
       [0.13574347, 0.96495877, 0.0498758 ]])

<b> Uniformly Distributed Values </b>
<ul>
    <li> It is characterized by a constant probability of occurence for each value within a specific range </li>
    <li>In a uniform distribution, all values within a given range have an equal likelihood of occurring, and the probability density function remains constant within that range.</li>
    <li>For instance, rolling a fair six-sided die results in a uniform distribution as each number (1 through 6) has an equal chance of being rolled.</li>
    <li>Unlike the bell-shaped curve of the normal distribution, a uniform distribution appears as a flat, constant line when plotted on a graph.</li>

In [20]:
# creating a 3x3 array of normally distributed random values
# with mean 0 and std 1
np.random.normal(0,1,(3,3))

array([[ 0.37608278, -0.20977734,  0.73858336],
       [ 1.26646714, -0.26307536, -0.23636862],
       [-1.03799191,  0.95960514,  0.36562835]])

<b> Normally distributed values </b>
<ul>
    <li>It is often called a Gaussian distribution or bell curve, is characterized by a symmetrical , bell-shaped curve when plotted on graph</li>
    <li>In a normal distribution, the values cluster around the mean with most values concentrated in the middle and fewer values occuring as one move away from the mean </li>
    <li>It follows a specific mathematical form known as the Gaussian distribution , where the mean, median, and mode are all equal.</li>
</ul>

In [21]:
# creating a 3x3 array of random integers in the interval [0,10)
np.random.randint(0,10,(3,3))

array([[7, 4, 9],
       [6, 6, 5],
       [4, 0, 8]])

In [22]:
# a 3x3 identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [23]:
# creating an uninitialized array of three integers
# the values will be whatever happens to already exist at that memory location
np.empty(3)

array([1., 1., 1.])

# Numpy standard data types

In [25]:
np.zeros(10,dtype='int16')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

or using the associated numpy object:

In [26]:
np.zeros(10,dtype=np.int16)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

![image.png](attachment:image.png)

# Numpy Array Attributes

 We'll start by defining three random arrays, a one-dimensional, two-dimensional, and three-dimensional array. We'll use NumPy's random number generator, which we will seed with a set value in order to ensure that the same random arrays are generated each time this code is run:

In [29]:
np.random.seed(0)
from numpy  import random
rng=random.default_rng()
type(rng)

numpy.random._generator.Generator

In [30]:
rng.integers(0,10,size=10,endpoint=True)

array([10,  5,  9,  0,  9,  8,  3,  2,  2,  2], dtype=int64)

In [31]:
# random bits
rng.integers(2,size=64)

array([1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1],
      dtype=int64)

In [33]:
rng2=random.default_rng(0) # seed for reproducibility
rng2.integers?

In [35]:
x1=rng2.integers(10,size=6) # 1d
x2=rng2.integers(10,size=(3,4)) #2D
x3=rng2.integers(10,size=(3,4,5)) #3D

In [36]:
print(f'X3 ndim: {x3.ndim}')
print(f"x3 shape: {x3.shape}")
print(f"x3 size: {x3.size}")

X3 ndim: 3
x3 shape: (3, 4, 5)
x3 size: 60


In [37]:
# for dtype
print(x3.dtype)

int64


In [38]:
print('itemsize',x3.itemsize,'bytes') # list the size (in bytes) of each array element
print('nbytes',x3.nbytes,'bytes') # total size (in bytes) of the array

itemsize 8 bytes
nbytes 480 bytes


# Array Indexing

In [39]:
x1

array([8, 6, 5, 2, 3, 0], dtype=int64)

In [40]:
x1[0]

8

In [41]:
x1[4]

3

In [42]:
x1[-1]

0

In [43]:
x1[-2]

3

In [44]:
x2

array([[0, 0, 1, 8],
       [6, 9, 5, 6],
       [9, 7, 6, 5]], dtype=int64)

In [45]:
x2[0,0] # multi-dimensional indexing oth row,0th col element

0

In [46]:
x2[2,0] # 2th row, 0 th col element

9

In [47]:
x2[2,-1] # 2th row, last col element

5

In [48]:
# to modify value
x2[0,0]=12

In [49]:
x2

array([[12,  0,  1,  8],
       [ 6,  9,  5,  6],
       [ 9,  7,  6,  5]], dtype=int64)

unlike Python lists, NumPy arrays have a fixed type. This means, for example, that if you attempt to insert a floating-point value to an integer array, the value will be silently truncated.

In [50]:
x1[0]=3.14159
x1

array([3, 6, 5, 2, 3, 0], dtype=int64)

# Array slicing

In [51]:
# one dimensional subarrays
x=np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [52]:
x[:5]

array([0, 1, 2, 3, 4])

In [53]:
x[5:]

array([5, 6, 7, 8, 9])

In [54]:
x[4:7]

array([4, 5, 6])

In [55]:
x[::2]

array([0, 2, 4, 6, 8])

In [56]:
x[1::2]

array([1, 3, 5, 7, 9])

In [57]:
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [58]:
x[5::-2] # reversed every other from index 5

array([5, 3, 1])

In [59]:
# multi dimensional subarrays
x2

array([[12,  0,  1,  8],
       [ 6,  9,  5,  6],
       [ 9,  7,  6,  5]], dtype=int64)

In [60]:
x2[:2,:3] # 0 th and 1th row, 0,1,2 th col

array([[12,  0,  1],
       [ 6,  9,  5]], dtype=int64)

In [61]:
x2[:3,::2]

array([[12,  1],
       [ 6,  5],
       [ 9,  6]], dtype=int64)

In [62]:
x2[::-1,::-1] # reversing both row and cols

array([[ 5,  6,  7,  9],
       [ 6,  5,  9,  6],
       [ 8,  1,  0, 12]], dtype=int64)

In [63]:
# accessing array rows and cols
x2[:,0] # first col

array([12,  6,  9], dtype=int64)

In [64]:
x2[0,:] # first row

array([12,  0,  1,  8], dtype=int64)

In the case of row access, the empty slice can be omitted for a more compact syntax:

In [65]:
x2[0]

array([12,  0,  1,  8], dtype=int64)

# subarrays as no-copy views

One important–and extremely useful–thing to know about array slices is that they return views rather than copies of the array data. This is one area in which NumPy array slicing differs from Python list slicing: in lists, slices will be copies.

In [66]:
x2

array([[12,  0,  1,  8],
       [ 6,  9,  5,  6],
       [ 9,  7,  6,  5]], dtype=int64)

In [67]:
# extracting a 2x2 subarray from this
x2_sub=x2[:2,:2]
x2_sub

array([[12,  0],
       [ 6,  9]], dtype=int64)

In [68]:
#modifying this will change the original
x2_sub[0,0]=99
x2_sub

array([[99,  0],
       [ 6,  9]], dtype=int64)

In [69]:
x2

array([[99,  0,  1,  8],
       [ 6,  9,  5,  6],
       [ 9,  7,  6,  5]], dtype=int64)

This default behavior is actually quite useful: it means that when we work with large datasets, we can access and process pieces of these datasets without the need to copy the underlying data buffer.

# creating copies of arrays

In [70]:
x2_sub_copy = x2[:2,:2].copy()
x2_sub_copy

array([[99,  0],
       [ 6,  9]], dtype=int64)

In [71]:
x2_sub_copy[0,0]=100
x2_sub_copy

array([[100,   0],
       [  6,   9]], dtype=int64)

In [72]:
x2

array([[99,  0,  1,  8],
       [ 6,  9,  5,  6],
       [ 9,  7,  6,  5]], dtype=int64)

# Reshaping of arrays

In [73]:
# to put the number 1 to 9 in a 3x3 grid,
grid=np.arange(1,10).reshape((3,3))
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


Note that for this to work, the size of the initial array must match the size of the reshaped array. Where possible, the <mark>reshape</mark> method will use a no-copy view of the initial array, but with non-contiguous memory buffers this is not always the case.

### 1D array to a 2D row or col matrix

In [74]:
x=np.array([1,2,3])
x.ndim

1

In [75]:
x.reshape((1,3)) # row vector via reshape
x

array([1, 2, 3])

In [76]:
# row vector via newaxis
x[np.newaxis,:]

array([[1, 2, 3]])

In [77]:
# col vector via reshape
x.reshape((3,1))

array([[1],
       [2],
       [3]])

In [78]:
# col vector via newaxis
x[:,np.newaxis]

array([[1],
       [2],
       [3]])

## concatenation of arrays

In [79]:
x = np.array([1,2,3])
y = np.array([3,2,1])
np.concatenate([x,y])

array([1, 2, 3, 3, 2, 1])

In [80]:
# to concatenate more than 2
z=[99,99,99]
np.concatenate([x,y,z])

array([ 1,  2,  3,  3,  2,  1, 99, 99, 99])

In [81]:
# it can also be used for 2D
grid=np.array([
    [1,2,3],
    [4,5,6]
])

In [82]:
np.concatenate([grid,grid]) # along the first axis

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [83]:
# concatenating along the 2nd axis
np.concatenate([grid,grid],axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [84]:
x=np.array([1,2,3])
grid=np.array([
    [9,8,7],
    [6,5,4]
])
np.vstack([x,grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [85]:
y=np.array([
    [99],
    [99]
])
np.hstack([grid,y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

In [86]:
# np.dstack will stack arrays along the third axis

## Array Splitting

In [87]:
x=[1,2,3,88,99,3,2,1]
x1,x2,x3=np.split(x,[3,5])

In [88]:
np.split?

![image.png](attachment:image.png)

In [89]:
x1

array([1, 2, 3])

In [90]:
x2

array([88, 99])

In [91]:
x3

array([3, 2, 1])

In [92]:
grid=np.arange(16).reshape((4,4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [93]:
upper,lower=np.vsplit(grid,[2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [94]:
left, right= np.hsplit(grid,[2])

In [95]:
left

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

In [96]:
right

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])

In [97]:
# np.dsplit will split arrays along the third axis