# numpy vs pandas

- numpy is matrix type and numeric calculations
- pandas is dataframe, suitable for _standalone_ data analysis

In [2]:
import numpy as np

In [16]:
arr = np.array([1, 2, 3, 4, 5])

print(arr)
print(arr[0])

[1 2 3 4 5]
1


In [4]:
print(np.__version__)
print(type(arr))

1.24.4
<class 'numpy.ndarray'>


In [28]:
#2d
twoD = np.array([[1,2,3],
                 [4,5,6]])

print("dimension", twoD.ndim)
print("shape", twoD.shape)

print(twoD)

print(twoD[0])
print(twoD[1,2])

print(twoD[1:], twoD[1:].shape)       #keeps the dimension in the slice
print(twoD[:, 0], twoD[:, 0].shape)   #(2,)


# print(twoD[0:])
# print(twoD[0:1])
# print(twoD[0][0:2])
# print(twoD[0:2,1])  #both rows, only 2nd elements
# print(twoD[:,1:3])  #both rows, only 2nd & 3rd elements, 2-d nparray

dimension 2
shape (2, 3)
[[1 2 3]
 [4 5 6]]
[1 2 3]
6
[[4 5 6]] (1, 3)
[1 4] (2,)


In [24]:
# [ count is the dimension :)
#  3-d means [[[d1, d2, element]
#  it is kinda a tree or think first row!
# 3d's 1st row, then 2d's 1st row then column ...

threeD = np.array([                #2 row, (2,3)
                   [[1,2,3],
                    [4,5,6]],
                   [[7,8,9],
                    [10,11,12]]
                   ])

print("dim", threeD.ndim)
print("shape", threeD.shape)

print(threeD[0,1,:], threeD[0,1,].shape )   # print(threeD[0][1][1])  #same as above!

dim 3
shape (2, 2, 3)
[4 5 6] (3,)


## types

In [29]:
print(type(arr))
print(arr.dtype)

<class 'numpy.ndarray'>
int64


In [30]:
arrString = np.array(['apple', 'orange', 'berry'])
arrIntAsString = np.array([1, 2, 3], dtype='S')

print(arrString.dtype)
print(arrIntAsString.dtype)

<U6
|S1


# conversion

In [33]:
# astype, copy the array, then converts!

arrIntFromString = arrIntAsString.astype(dtype='i8')

print(arrIntFromString)
print(arrIntFromString.dtype)

[1 2 3]
int64


In [34]:
arrBool = arrIntAsString.astype(dtype=bool)

print(arrBool)

[ True  True  True]


## copy

In [46]:
original = np.array([2, 4, 1, 6, 83, 10])

x = original.copy()  #new array
y = original.view()  #same pointer as original

original[0] = 20

print(original)
print(x)   #copy - as a new array
print(y)   #view - bound to the original

print(x.base)   #does it own the original
print(y.base)

[20  4  1  6 83 10]
[ 2  4  1  6 83 10]
[20  4  1  6 83 10]
None
[20  4  1  6 83 10]


In [47]:
print(original.shape)

(6,)


## reshaping (increasing the dimension)

In [31]:
# original is 1-d and 6 elements

print(original.size)    # 6 elements

originalReshaped = original.reshape(2, 3)   # (2 * 3) = 6 !!!

print(originalReshaped)     # returns a view (the original!!)
print(originalReshaped.shape)

NameError: name 'original' is not defined

## flatting (decreasing the dimension to 1D !)

In [57]:
#
print(twoD)  # 2-d array

twoDFlat = twoD.reshape(-1)  # make it 1-d

print(twoDFlat)

[[1 2 3]
 [4 5 6]]
[1 2 3 4 5 6]


In [66]:
print(arr.shape)
print(twoD.shape)
print(threeD.shape)

# (x, y, z)
# x * y * z = total => means element count
# x, y, z count is 3 => means 3-d


(5,)
(2, 3)
(2, 2, 3)


## iteration

In [73]:
print(arr)

for i in arr:  # kinda foreach
    print(i)

[1 2 3 4 5]
1
2
3
4
5


In [68]:
print(twoD)

for x in twoD:
    for y in x:
        print(y)

[[1 2 3]
 [4 5 6]]
1
2
3
4
5
6


In [75]:
print(threeD)

for x in threeD:
    for y in x:
        for z in y:
            print(z)


[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]
1
2
3
4
5
6
7
8
9
10
11
12


In [74]:
# more optimized way! esp. for N-d arrays
for x in np.nditer(threeD):
    print(x)

1
2
3
4
5
6
7
8
9
10
11
12


## enumeration (index, value)

In [84]:
# enumeration (index, value)

for equality, x in np.ndenumerate(arr):
    print(equality, '-', x)


(0,) - 1
(1,) - 2
(2,) - 3
(3,) - 4
(4,) - 5


In [88]:
for equality, x in np.ndenumerate(twoD):
    print(equality, '-', x)

(0, 0) - 1
(0, 1) - 2
(0, 2) - 3
(1, 0) - 4
(1, 1) - 5
(1, 2) - 6


## join _by axis_ or stack

In [183]:
x1 = np.array([1, 2, 3])
x2 = np.array([4, 5, 6])

j = np.concatenate((x1, x2), axis=0)  #1 dim, max_axis=1 !
J = np.hstack((x1, x2))
K = np.vstack((x1, x2))

print(j)
print(J)
print(K)

[1 2 3 4 5 6]
[1 2 3 4 5 6]
[[1 2 3]
 [4 5 6]]


In [131]:
t1 = np.array([[1, 2],
               [3, 4]])    #shape => (2,2)
t2 = np.array([[5, 6],
               [7, 8]])

j1 = np.concatenate((t1, t2), axis=0)  #vertical
J1 = np.vstack((t1, t2))

j2 = np.concatenate((t1, t2), axis=1)  #horizontal
J2 = np.hstack((t1, t2))

print(j1); print(J1)
print(j2); print(J2)

[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2 5 6]
 [3 4 7 8]]
[[1 2 5 6]
 [3 4 7 8]]


In [123]:
k1 = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]]])
k2 = np.array([[[19, 20, 21], [22, 23, 24], [25, 26, 27], [28, 29, 130], [31, 32, 33], [34, 35, 36]]])

# k-shape => (1,6,3)

jX =  np.concatenate((k1, k2), axis=0) #vertical   = .vstack(
jY =  np.concatenate((k1, k2), axis=1) #horizontal = .hstack(
jZ =  np.concatenate((k1, k2), axis=2) #sliced     = .dstack(


In [116]:
print(jX.shape)

print(jX)

(2, 6, 3)
[[[  1   2   3]
  [  4   5   6]
  [  7   8   9]
  [ 10  11  12]
  [ 13  14  15]
  [ 16  17  18]]

 [[ 19  20  21]
  [ 22  23  24]
  [ 25  26  27]
  [ 28  29 130]
  [ 31  32  33]
  [ 34  35  36]]]


In [119]:
print(jY.shape)
print(jY)

(1, 12, 3)
[[[  1   2   3]
  [  4   5   6]
  [  7   8   9]
  [ 10  11  12]
  [ 13  14  15]
  [ 16  17  18]
  [ 19  20  21]
  [ 22  23  24]
  [ 25  26  27]
  [ 28  29 130]
  [ 31  32  33]
  [ 34  35  36]]]


In [118]:
print(jZ.shape)
print(jZ)

(1, 6, 6)
[[[  1   2   3  19  20  21]
  [  4   5   6  22  23  24]
  [  7   8   9  25  26  27]
  [ 10  11  12  28  29 130]
  [ 13  14  15  31  32  33]
  [ 16  17  18  34  35  36]]]


## split

In [136]:
# number of split as parameter

s = np.array([1, 2, 3, 4, 5, 6])
splitArray = np.array_split(s, 3)    #splits into array of arrays! no dimension increase!
split = np.split(s, 3)               #same as array_split, but it cannot divide into such as 4 !


print(splitArray)
print(splitArray[0])
print(splitArray[0][0])

print(split)

[array([1, 2]), array([3, 4]), array([5, 6])]
[1 2]
1
[array([1, 2]), array([3, 4]), array([5, 6])]


In [150]:
k = np.array([[1, 2],
              [3, 4],
              [5, 6],
              [7, 8]])    # shape => 4 * 2

# print(k.shape)

kk = np.array_split(k, 2, axis=0)    #axis=0 vertical is the default

kkkkk = np.array_split(k, 2, axis=1)  #

print(kk)
# print(kk[0].shape)        # shape => 2 * 2

print(kkkkk)

[array([[1, 2],
       [3, 4]]), array([[5, 6],
       [7, 8]])]
[array([[1],
       [3],
       [5],
       [7]]), array([[2],
       [4],
       [6],
       [8]])]


## search

In [160]:
#where
i = np.array([1, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 4])

equality = np.where(i == 4)   #returns idx !
mod = np.where( i%2 == 0)


print(equality)        # print(i[equality[0][0]])
print(mod)

(array([ 3,  4, 11]),)
(array([ 1,  3,  4,  6,  8, 10, 11]),)


In [162]:
#searchsorted - only if, in a sorted array!!
i = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

equality = np.searchsorted(i, v = 4)       #returns idx !
equalityForVs = np.searchsorted(i, [4, 6])

print(equality)
print(equalityForVs)

3
[3 5]


### sorting

In [165]:
i = np.array([1, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 4])
s = np.array(['banana', 'cherry', 'apple'])
twoD = np.array([[3, 2, 4], [5, 0, 1]])

print(np.sort(i))
print(np.sort(s))
print(np.sort(twoD))

[ 1  2  3  4  4  4  5  6  7  8  9 10]
['apple' 'banana' 'cherry']
[[2 3 4]
 [0 1 5]]


### filtering  - _search and get a new array_

In [168]:
# search and get a new array !

s = np.array(['banana', 'cherry', 'apple'])
f = [True, False, True]

print(s[f])

['banana' 'apple']


In [169]:

fEnhanced = []

for e in s:
    if str.startswith(e, 'b') :
        fEnhanced.append(True)
    else:
        fEnhanced.append(False)

print(s[fEnhanced])

['banana']
