<a href="https://colab.research.google.com/github/mateusz-mu/data_science/blob/main/01_intruduction/01_numpy_introduction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Contents:
1. [Basics](#a1)
2. [Data types](#a2)
3. [Creating arrays](#a3)
4. [Basic array operations](#a4)
5. [Generating pseudorandom numbers](#a5)
6. [Basic functions](#a6)
7. [Indexing, Clipping](#a7)
8. [Iterating over arrays](#a8)
9. [Resizing arrays](#a9)
10. [Logical masks](#a10)

### <a name='a1'></a> Basics

In [2]:
import numpy as np

In [3]:
np.__version__

'1.25.2'

In [4]:
print(dir(np))



In [5]:
help(np.array)

Help on built-in function array in module numpy:

array(...)
    array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
          like=None)
    
    Create an array.
    
    Parameters
    ----------
    object : array_like
        An array, any object exposing the array interface, an object whose
        ``__array__`` method returns an array, or any (nested) sequence.
        If object is a scalar, a 0-dimensional array containing object is
        returned.
    dtype : data-type, optional
        The desired data-type for the array. If not given, NumPy will try to use
        a default ``dtype`` that can represent the values (by applying promotion
        rules when necessary.)
    copy : bool, optional
        If true (default), then the object is copied.  Otherwise, a copy will
        only be made if ``__array__`` returns a copy, if obj is a nested
        sequence, or if a copy is needed to satisfy any of the other
        requirements (``dtype``, ``order``, e

1D Array

In [6]:
x = np.array([1, 3])
x

array([1, 3])

In [7]:
print(x)

[1 3]


In [8]:
type(x)

numpy.ndarray

In [9]:
x.ndim

1

In [10]:
x.shape #2 elementy

(2,)

In [11]:
x.size #Liczba elementow w tablicy

2

In [12]:
x.dtype

dtype('int64')

In [13]:
x = np.array([1.3, 2.3, 1.4])

In [14]:
x

array([1.3, 2.3, 1.4])

In [15]:
print(x)

[1.3 2.3 1.4]


In [16]:
x.dtype

dtype('float64')

2D Array

In [17]:
y = np.array([[1,2], [-3, 1]])

In [18]:
print(y)

[[ 1  2]
 [-3  1]]


In [19]:
y.ndim

2

In [20]:
y.shape

(2, 2)

In [21]:
y = np.array([[1, 2, 4], [4, 2, 1]])
print(y)

[[1 2 4]
 [4 2 1]]


In [22]:
y.shape

(2, 3)

3D Array

In [23]:

z = np.array(
    [[[4, 3, 1],
      [3, 1, 2]],

     [[4, 1, 3],
      [4, 2, 1]],

     [[3, 2, 1],
      [4, 3, 2]]]
)
z

array([[[4, 3, 1],
        [3, 1, 2]],

       [[4, 1, 3],
        [4, 2, 1]],

       [[3, 2, 1],
        [4, 3, 2]]])

In [24]:
z.ndim

3

In [25]:
z.shape

(3, 2, 3)

In [26]:
print(z)

[[[4 3 1]
  [3 1 2]]

 [[4 1 3]
  [4 2 1]]

 [[3 2 1]
  [4 3 2]]]


### <a name='a2'></a> Data types

In [27]:
A = np.array([1, 2, 3])
A.dtype

dtype('int64')

In [28]:
A = np.array([1.0, 2.3, 3.3])
A.dtype

dtype('float64')

In [29]:
A = np.array([1, 2, 3], dtype = 'float')
A.dtype

dtype('float64')

In [30]:
A

array([1., 2., 3.])

In [31]:
A = np.array([1, 2, 3], dtype = 'complex')
A.dtype

dtype('complex128')

In [32]:
A

array([1.+0.j, 2.+0.j, 3.+0.j])

In [33]:
A = np.array([1.0, 2.7, 3.3], dtype = 'int')
A.dtype

dtype('int64')

In [34]:
A #Wartosc obcieta, a nie zaokraglona

array([1, 2, 3])

In [35]:
A = np.array([True, False])
A.dtype

dtype('bool')

In [36]:
A = np.array([24, 120, 230], dtype = np.int8)
A.dtype

For the old behavior, usually:
    np.array(value).astype(dtype)
will give the desired result (the cast overflows).
  A = np.array([24, 120, 230], dtype = np.int8)


dtype('int8')

In [37]:
A = np.array([24, 120, 230], dtype = np.uint8)
A.dtype

#The primitive data types prefixed with "u" are unsigned versions with the same bit sizes.
#Effectively, this means they cannot store negative numbers, but on the other hand they can store positive numbers twice as large as their signed counterparts.
#The signed counterparts do not have "u" prefixed.

#int: –2147483648 to 2147483647
#uint: 0 to 4294967295

dtype('uint8')

### <a name='a3'></a> Creating arrays

In [38]:
np.zeros(shape=(4,10))

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [39]:
np.zeros(shape=(4,10), dtype = 'int')

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [40]:
np.ones(shape=(5,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [41]:
np.ones(shape=(5,5), dtype = 'int')

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [42]:
np.full(shape=(3,3), fill_value=4, dtype ='int')

array([[4, 4, 4],
       [4, 4, 4],
       [4, 4, 4]])

In [43]:
np.arange(10) #wwylaczajac ostatni element

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [44]:
np.arange(start = 5, stop = 10)

array([5, 6, 7, 8, 9])

In [45]:
np.arange(start = 10, stop = 100, step = 10)

array([10, 20, 30, 40, 50, 60, 70, 80, 90])

In [46]:
np.arange(start = 100, stop = 10, step = -10)

array([100,  90,  80,  70,  60,  50,  40,  30,  20])

In [47]:
np.arange(start = 0, stop = 1, step = 0.05)

array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95])

In [48]:
np.linspace(start = 0, stop = 1, num = 11) #num ile chcemy podzialow, rowno rozlozonych

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [49]:
A = np.arange(15)
A

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [50]:
A.reshape((3,5))

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [51]:
A.reshape((3,-1)) # 3 wiersze i optymalna liczba kolumn dobrana przez pythona

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [52]:
A.reshape((-1,3))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

### <a name='a4'></a> Basic array operations


In [53]:
A = np.array([3, 1, 4, 2])
B = np.array([3, -1, 3, 2])
print(A)
print(B)

[3 1 4 2]
[ 3 -1  3  2]


In [55]:
A + B

array([6, 0, 7, 4])

In [56]:
A - B

array([0, 2, 1, 0])

In [57]:
A * B

array([ 9, -1, 12,  4])

In [58]:
A / B

array([ 1.        , -1.        ,  1.33333333,  1.        ])

In [60]:
A + 3 #Dodanie do kazdego elementu

array([6, 4, 7, 5])

In [61]:
2 * A

array([6, 2, 8, 4])

In [62]:
A + 3 * B

array([12, -2, 13,  8])

In [63]:
np.add(A, B)

array([6, 0, 7, 4])

In [64]:
np.subtract(A,B)

array([0, 2, 1, 0])

In [65]:
np.multiply(A,B)

array([ 9, -1, 12,  4])

In [66]:
np.divide(A,B)

array([ 1.        , -1.        ,  1.33333333,  1.        ])

In [69]:
X = np.array([[1, 3], [-2, 0]])
Y = np.array([[6, 0], [-1, 2]])
print(X, '\n')
print(Y)

[[ 1  3]
 [-2  0]] 

[[ 6  0]
 [-1  2]]


In [70]:
X * Y # Mnozenie element po elemencie

array([[6, 0],
       [2, 0]])

In [72]:
np.dot(X,Y) # Mnozenie macierzowe. Wiersz * kolumna i wiersz jako kolumna

array([[  3,   6],
       [-12,   0]])

In [73]:
X.dot(Y)

array([[  3,   6],
       [-12,   0]])

In [74]:
Y.dot(X) #Mnozenie macierzy nie jest przemienne

array([[ 6, 18],
       [-5, -3]])

In [75]:
X @ Y #Rowniez mnozenie macierzowe

array([[  3,   6],
       [-12,   0]])

### <a name='a5'></a> Generating pseudorandom numbers

In [76]:
np.random.seed(0) #ziarno losowania, zeby za kazdym losowaniem otrzymac ten sam wynik

In [77]:
np. random.randn() #Losowa wartosc z rozkladu normalnego

1.764052345967664

In [78]:
np.random.randn(10) #srednia 0 i odchylenie standardowe 1

array([ 0.40015721,  0.97873798,  2.2408932 ,  1.86755799, -0.97727788,
        0.95008842, -0.15135721, -0.10321885,  0.4105985 ,  0.14404357])

In [79]:
np.random.randn(10, 4)

array([[ 1.45427351,  0.76103773,  0.12167502,  0.44386323],
       [ 0.33367433,  1.49407907, -0.20515826,  0.3130677 ],
       [-0.85409574, -2.55298982,  0.6536186 ,  0.8644362 ],
       [-0.74216502,  2.26975462, -1.45436567,  0.04575852],
       [-0.18718385,  1.53277921,  1.46935877,  0.15494743],
       [ 0.37816252, -0.88778575, -1.98079647, -0.34791215],
       [ 0.15634897,  1.23029068,  1.20237985, -0.38732682],
       [-0.30230275, -1.04855297, -1.42001794, -1.70627019],
       [ 1.9507754 , -0.50965218, -0.4380743 , -1.25279536],
       [ 0.77749036, -1.61389785, -0.21274028, -0.89546656]])

In [82]:
# rozklad jednostajny na przedziale
np.random.rand() #0 domkniete, 1 otwarta

0.8209932298479351

In [83]:
np.random.rand(10)

array([0.09710128, 0.83794491, 0.09609841, 0.97645947, 0.4686512 ,
       0.97676109, 0.60484552, 0.73926358, 0.03918779, 0.28280696])

In [84]:
np.random.rand(10,2)

array([[0.12019656, 0.2961402 ],
       [0.11872772, 0.31798318],
       [0.41426299, 0.0641475 ],
       [0.69247212, 0.56660145],
       [0.26538949, 0.52324805],
       [0.09394051, 0.5759465 ],
       [0.9292962 , 0.31856895],
       [0.66741038, 0.13179786],
       [0.7163272 , 0.28940609],
       [0.18319136, 0.58651293]])

In [88]:
np.random.randint(10)

2

In [90]:
np.random.randint(low = 10, high=101) #losowa liczba dwucyfrowa

23

In [91]:
np.random.randint(low = 10, high=101, size = 10) #size = rozmiar generowanych danych

array([50, 82, 29, 82, 36, 76, 62, 77, 71, 24])

In [95]:
np.random.choice([4,2,1,4]) #losowy element z listy

2

In [97]:
np.random.choice(['dewd', '2312', 'c3rwf3124e']) #losowy element z listy

'c3rwf3124e'

In [98]:
data = np.arange(10)
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [99]:
np.random.shuffle(data) #Przetasowanie danych. dane sa zmieniane, nie trzeba jej przypisywac do zmiennej

In [100]:
data

array([2, 4, 3, 6, 1, 5, 7, 0, 8, 9])

### <a name='a6'></a> Basic functions

In [101]:
np. exp(1)

2.718281828459045

In [103]:
np.sqrt(4)

2.0

In [104]:
np.all([2, 3, 1]) #Bada czy element zwraca true czy false

True

In [106]:
np.all([2, 3, 0]) #Bada czy element zwraca true czy false. Gdy jeden jest flase, zrzuci false

False

In [107]:
np.any([1, 2, 1]) # czy jakikolwiek obiekt jest prawda

True

In [108]:
np.any([0, 0 ,0]) # czy jakikolwiek obiekt jest prawda

False

In [109]:
bool(0)

False

In [110]:
bool(1.3)

True

In [117]:
A = np.random.rand(5)
A

array([0.61555956, 0.12381998, 0.84800823, 0.80731896, 0.56910074])

In [118]:
np.argmax(A) #indeks gdzie mamy najwieksza wartosc

2

In [119]:
A[np.argmax(A)]

0.8480082293222344

In [120]:
np.argmin(A)

1

In [122]:
np.argsort(A) #sorotwac elementy, podajac odpowiednio indeks. domyslnie sortowane rosnaco

array([1, 4, 0, 3, 2])

In [123]:
np.max(A)

0.8480082293222344

In [124]:
np.min(A)

0.12381998284944151

In [125]:
np.mean(A)

0.5927614947590248

In [126]:
np.median(A)

0.6155595642838442

In [127]:
np.std(A)

0.2577485308575502

### <a name='a7'></a> Indexing, Clipping

In [128]:
A = np.arange(20)
A

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [129]:
A[2]

2

In [130]:
A[2:]

array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
       19])

In [131]:
A[:2]

array([0, 1])

In [132]:
A[[0, 2]]

array([0, 2])

In [133]:
A[-1]

19

In [134]:
A[10:15]

array([10, 11, 12, 13, 14])

In [137]:
A = A.reshape(4,5)
A


array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [138]:
A[0]

array([0, 1, 2, 3, 4])

In [139]:
A[1]

array([5, 6, 7, 8, 9])

In [140]:
A[:,0]

array([ 0,  5, 10, 15])

In [141]:
A[:,-1]

array([ 4,  9, 14, 19])

In [142]:
A[1,1]

6

In [143]:
A[1,3]

8

In [146]:
A[1:3, 1:4]

array([[ 6,  7,  8],
       [11, 12, 13]])

In [149]:
A[1,2] = 14
A

array([[ 0,  1,  2,  3,  4],
       [ 5,  6, 14,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

### <a name='a8'></a> Iterating over arrays

In [150]:
A

array([[ 0,  1,  2,  3,  4],
       [ 5,  6, 14,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [151]:
for row in A:
  print(row)

[0 1 2 3 4]
[ 5  6 14  8  9]
[10 11 12 13 14]
[15 16 17 18 19]


In [155]:
for row in A:
  print(row[0])

0
5
10
15


In [156]:
for row in A:
  print(row[:3])

[0 1 2]
[ 5  6 14]
[10 11 12]
[15 16 17]


In [158]:
for i in A.flat:
  print(i) #iterowanie element po elemencie

0
1
2
3
4
5
6
14
8
9
10
11
12
13
14
15
16
17
18
19


### <a name='a9'></a> Resizing arrays

In [159]:
A

array([[ 0,  1,  2,  3,  4],
       [ 5,  6, 14,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [160]:
A.shape

(4, 5)

In [161]:
A.reshape(5,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6, 14],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [163]:
A.ravel() #wyplaszczenie danych. powrot do postaci jednowymiarowej

array([ 0,  1,  2,  3,  4,  5,  6, 14,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [164]:
A.T #transponowanie

array([[ 0,  5, 10, 15],
       [ 1,  6, 11, 16],
       [ 2, 14, 12, 17],
       [ 3,  8, 13, 18],
       [ 4,  9, 14, 19]])

### <a name='a10'></a> Logical masks

In [165]:
A

array([[ 0,  1,  2,  3,  4],
       [ 5,  6, 14,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [167]:
A = np.arange(start = -10, stop = 10, step = 0.5)
A = A.reshape(10, -1)
A

array([[-10. ,  -9.5,  -9. ,  -8.5],
       [ -8. ,  -7.5,  -7. ,  -6.5],
       [ -6. ,  -5.5,  -5. ,  -4.5],
       [ -4. ,  -3.5,  -3. ,  -2.5],
       [ -2. ,  -1.5,  -1. ,  -0.5],
       [  0. ,   0.5,   1. ,   1.5],
       [  2. ,   2.5,   3. ,   3.5],
       [  4. ,   4.5,   5. ,   5.5],
       [  6. ,   6.5,   7. ,   7.5],
       [  8. ,   8.5,   9. ,   9.5]])

In [168]:
A > 0

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [169]:
A[A > 0]

array([0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5,
       7. , 7.5, 8. , 8.5, 9. , 9.5])

In [170]:
A[A > 0 and A < 5]

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [172]:
np.bitwise_and(A > 0, A < 5)

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [174]:
np.bitwise_or(A < 0, A > 5)

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])