### Creating a Numpy array


We will use array() function in numpy to create **n-dimensional** NumPy arrays.


In [None]:
# import the numpy library
import numpy as np

In [None]:
my_list = [1, 2, 3]  # This is a list
my_list

[1, 2, 3]

In [None]:
# turn my_list to array, using numpy array() function
np.array(my_list)

array([1, 2, 3])

In [None]:
my_nested = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]  # nested list
my_nested

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [None]:
# turn my_nested list to array, using array() function
np.array(my_nested)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [None]:
my_nested = ((1, 2, 3), (4, 5, 6), (7, 8, 9))  # tuple
my_nested

((1, 2, 3), (4, 5, 6), (7, 8, 9))

In [None]:
# turn my_nested tuple to array, using array() function
np.array(my_nested)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [27]:
my_array = np.array(my_nested)
my_array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [28]:
my_array.dtype

dtype('int64')

In [None]:
list1 = ["a", "b", "c"]
list2 = ["d", "e", "f"]

In [30]:
[list1, list2]

[['a', 'b', 'c'], ['d', 'e', 'f']]

In [31]:
np.array([list1, list2])

array([['a', 'b', 'c'],
       ['d', 'e', 'f']], dtype='<U1')

In [32]:
np.array([list1, list2]).dtype

dtype('<U1')

- U - Unicode String Encoding
  - an international encoding standard for use with different languages and scripts, by which each letter, digit, or symbol is assigned a unique numeric value that applies across different platforms and programs.
- 1 - number of character/length of string it can hold
- https://numpy.org/doc/stable/reference/arrays.dtypes.html


### Indexing/Subsetting


In [33]:
my_array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [34]:
my_array[0]

array([1, 2, 3])

In [35]:
my_array[0][1]

np.int64(2)

In [None]:
my_array[0, 1]

np.int64(2)

In [None]:
my_array[:, 1]

array([2, 5, 8])

In [38]:
# my_array[:,3] # Why is there an error? column 3?
# error because index out of bounds, max is 2

#### <span style="color:blue"><b> Some Hightlight: Updating value in array, and data type illustration


In [39]:
my_array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [None]:
my_array[0][1] = 10  # updating integer 10 in array
my_array

array([[ 1, 10,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [41]:
my_array.dtype

dtype('int64')

In [None]:
my_array[0][1] = "10"  # updating *string '10' in array
my_array

array([[ 1, 10,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [None]:
my_array.dtype  # Note my_array was already of datatype integer. Hence, 10 was updated as integer, instead of a string.*

dtype('int64')

In [None]:
# A new variable - my_nested_ is used here for illustration
# All values must be of the same type in an array

my_nested_ = [[1, 2, 3], [4, 5, 6], [7, 8, "9"]]  # Note that 9 is a string here
np.array(my_nested_)  # note that the data type is standardized to strings throughout.

array([['1', '2', '3'],
       ['4', '5', '6'],
       ['7', '8', '9']], dtype='<U21')

'9' is a string. Hence, the dtype becomes '<U11'.
NumPy will try to use a default dtype that can represent the values

- U - Unicode String Encoding
  - an international encoding standard for use with different languages and scripts, by which each letter, digit, or symbol is assigned a unique numeric value that applies across different platforms and programs.
- 11 - number of character/length of string it can hold


In [45]:
my_nested_ = np.array(my_nested_)

In [None]:
my_nested_

array([['1', '2', '3'],
       ['4', '5', '6'],
       ['7', '8', '9']], dtype='<U21')

In [None]:
my_nested_[2, 2]

np.str_('9')

In [None]:
my_nested_[2, 2] = (
    "012345678901"  # 12 character string is updated here. Note dtype is <U11
)

In [None]:
my_nested_  # note the 12 character string is truncated to 11 character according to dytype of array

array([['1', '2', '3'],
       ['4', '5', '6'],
       ['7', '8', '012345678901']], dtype='<U21')

In [None]:
# All values must be of the same type in an array
my_nested_ = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, "012345678901"],
]  # Note that a 12 character string is placed here
np.array(
    my_nested_
)  # note that the data type is standardized to strings throughout, and dtype is now U12

array([['1', '2', '3'],
       ['4', '5', '6'],
       ['7', '8', '012345678901']], dtype='<U21')

<span style="color:blue"><b>Learning Points:

- NumPy guesses the dtype according to the content of the list, and caters for the minimum type required to hold the objects in the sequence.
- All values must be of the same type, typically numeric values or strings.
- Reference:- https://numpy.org/doc/stable/reference/arrays.dtypes.html


### Array Properties


In [None]:
list1 = ["a", "b", "c"]
list2 = ["d", "e", "f"]
list3 = ["g", "h", "i"]
list4 = ["j", "k", "l"]
alpha = np.array([list1, list2, list3, list4])

In [52]:
alpha

array([['a', 'b', 'c'],
       ['d', 'e', 'f'],
       ['g', 'h', 'i'],
       ['j', 'k', 'l']], dtype='<U1')

In [53]:
type(alpha)

numpy.ndarray

In [None]:
alpha.shape  # 4 rows, 3 columns

(4, 3)

In [None]:
alpha.ndim  # 2-dimension

2

- The ndim is the same as the number of axes or the length of array.shape


- Also, notice the format of array [ [x] , [x] ].

- 2 dimensional arrays are great for representing matrices which are often useful in data science.


In [None]:
alpha.size  # number of elements in the array

12

In [None]:
alpha.dtype  # type of data contained in the array

dtype('<U1')

In [None]:
alpha1 = np.array([[[1, 2]], [[3, 4]], [[5, 6]]])
alpha1

array([[[1, 2]],

       [[3, 4]],

       [[5, 6]]])

In [None]:
alpha1.shape  # 3 sets, 1 row per set, 2 columns #note the []

(3, 1, 2)

In [60]:
alpha1.ndim

3

In [None]:
alpha1 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]])
alpha1

array([[[ 1,  2],
        [ 3,  4]],

       [[ 5,  6],
        [ 7,  8]],

       [[ 9, 10],
        [11, 12]]])

In [None]:
alpha1.shape  # 3 sets, 2 row per set, 2 columns #note the []

(3, 2, 2)

In [63]:
alpha1.ndim

3

In [None]:
list1 = ["a", "b", "c"]
list2 = ["d", "e", "f"]
list3 = ["g", "h", "i"]
list4 = ["j", "k", "l"]
alpha = np.array([list1, list2, list3, list4])

In [65]:
alpha

array([['a', 'b', 'c'],
       ['d', 'e', 'f'],
       ['g', 'h', 'i'],
       ['j', 'k', 'l']], dtype='<U1')

In [66]:
alpha.shape

(4, 3)

In [67]:
alpha.ndim

2

In [None]:
alpha = alpha.reshape(2, 2, 3)

In [69]:
alpha

array([[['a', 'b', 'c'],
        ['d', 'e', 'f']],

       [['g', 'h', 'i'],
        ['j', 'k', 'l']]], dtype='<U1')

In [70]:
alpha.ndim

3

In [71]:
np.random.rand(3, 2, 2, 3)

array([[[[7.70338291e-01, 5.85917198e-01, 5.93871071e-01],
         [5.15965741e-01, 3.70650652e-01, 5.63782798e-01]],

        [[8.90462158e-01, 5.15673456e-01, 8.81139836e-01],
         [2.30141810e-01, 2.67353188e-01, 8.32594262e-01]]],


       [[[9.85689040e-01, 7.40827089e-02, 6.97245026e-01],
         [3.71859921e-01, 1.43495186e-01, 2.69172855e-01]],

        [[4.10503443e-01, 5.07727523e-01, 8.16838276e-02],
         [2.72832345e-01, 4.48661554e-01, 5.55805556e-01]]],


       [[[1.01399039e-04, 2.66175340e-01, 3.28281867e-01],
         [2.29565894e-01, 8.30031656e-01, 1.50497898e-01]],

        [[6.97196100e-01, 8.88684385e-01, 8.30065302e-01],
         [1.84199282e-01, 1.27737465e-02, 4.49863694e-02]]]])

In [None]:
my_array = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)])
my_array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [None]:
# numpy.mean(a, axis=None, dtype=None, out=None, keepdims=<no value>, *, where=<no value>)

# numpy function
np.mean(my_array)

np.float64(5.0)

![image.png](attachment:image.png)


In [None]:
# numpy.mean(a, axis=None, dtype=None, out=None, keepdims=<no value>, *, where=<no value>)
np.mean(my_array, axis=1)  # taking mean of each row

array([2., 5., 8.])

In [None]:
# numpy.mean(a, axis=None, dtype=None, out=None, keepdims=<no value>, *, where=<no value>)
np.mean(my_array, axis=0)  # taking mean of each column

array([4., 5., 6.])

In [None]:
# ndarray.mean(axis=None, dtype=None, out=None, keepdims=False, *, where=True)

# method
my_array.mean()

np.float64(5.0)

In [None]:
my_array.mean(axis=1)  # taking mean of each row

array([2., 5., 8.])

In [None]:
my_array.mean(axis=0)  # taking mean of each column

array([4., 5., 6.])

Using np.mean function


In [79]:
np.mean(my_array)

np.float64(5.0)

In [80]:
np.mean(my_array, axis=1)

array([2., 5., 8.])

In [81]:
np.mean(my_array, axis=1)[0]

np.float64(2.0)

In [82]:
np.mean(my_array[0])

np.float64(2.0)

In [None]:
help(np.mean)

Help on _ArrayFunctionDispatcher in module numpy:

mean(a, axis=None, dtype=None, out=None, keepdims=<no value>, *, where=<no value>)
    Compute the arithmetic mean along the specified axis.

    Returns the average of the array elements.  The average is taken over
    the flattened array by default, otherwise over the specified axis.
    `float64` intermediate and return values are used for integer inputs.

    Parameters
    ----------
    a : array_like
        Array containing numbers whose mean is desired. If `a` is not an
        array, a conversion is attempted.
    axis : None or int or tuple of ints, optional
        Axis or axes along which the means are computed. The default is to
        compute the mean of the flattened array.

        .. versionadded:: 1.7.0

        If this is a tuple of ints, a mean is performed over multiple axes,
        instead of a single axis or all the axes as before.
    dtype : data-type, optional
        Type to use in computing the mean.  For