## Numpy

Ermöglicht uns X-Dimensionale Körper zu erstellen, die mit C basiert Zahlen gefüllt sind

Muss über pip installiert und importiert werden

In [1]:
import numpy as np

## Arrays

Numpy Arrays aus einer Python Liste erzeugen

In [2]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [3]:
a = np.array([1, 2, 3, 4])

In [4]:
a

array([1, 2, 3, 4])

In [5]:
a[0]

1

In [6]:
a[3]

4

In [7]:
b = np.array([0.4, 8.2, 8.42, 18.38])

In [8]:
b

array([ 0.4 ,  8.2 ,  8.42, 18.38])

In [9]:
a.dtype

dtype('int32')

In [10]:
b.dtype

dtype('float64')

a: int32, b: float64

Nachdem Numpy in C geschrieben ist, sind effiziente Typen möglich

In [11]:
c = np.array([1, 2, 3, 4], dtype=np.int8)

In [12]:
c

array([1, 2, 3, 4], dtype=int8)

In [13]:
d = np.array([1, 2, 3, 4], dtype=np.float16)

In [14]:
d

array([1., 2., 3., 4.], dtype=float16)

In [15]:
d.itemsize

2

a: 4 Byte * 4 = 16 Byte

b: 8 Byte * 4 = 32 Byte

c: 1 Byte * 4 = 4 Byte statt 16 Byte

d: 2 Byte * 4 = 8 Byte statt 32 Byte

## Matrizen

Matrizen werden immer für Datenanalyse benötigt, z.b. Tabellen (DB)

In [16]:
e = np.array(
    [
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]
    ])

Hier haben wir ein Array von Arrays

In [17]:
e

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [18]:
e.shape

(3, 3)

In [19]:
e.size

9

In [20]:
e.ndim

2

Matrix angreifen

In [21]:
e[0]

array([1, 2, 3])

In [22]:
e[0][1]

2

In [23]:
e[0, 1]

2

## Slicing von Matrizen

Bereiche von Matrizen entnehmen mit Doppelpunkt

In [24]:
e

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [25]:
e[1, 0:2]  # Zeile 1, Elemente 0 bis 2 (exklusiv)

array([4, 5])

In [26]:
e[1:, 0:2]  # Es muss keine Grenze angegeben werden bei Slicing

array([[4, 5],
       [7, 8]])

In [27]:
e[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [28]:
e[:, 2]  # : alles nehmen (vom Anfang bis zum Ende)

array([3, 6, 9])

In [29]:
e[:, 1:]

array([[2, 3],
       [5, 6],
       [8, 9]])

## Array verändern

In [30]:
e[1, 1] = 33

In [31]:
e

array([[ 1,  2,  3],
       [ 4, 33,  6],
       [ 7,  8,  9]])

In [32]:
e[0] = 10

In [33]:
e

array([[10, 10, 10],
       [ 4, 33,  6],
       [ 7,  8,  9]])

In [34]:
e[:, 2] = 20

In [35]:
e

array([[10, 10, 20],
       [ 4, 33, 20],
       [ 7,  8, 20]])

## Einfache Analyse von Arrays und Matrizen

In [36]:
a.sum()

10

In [37]:
a.mean()

2.5

In [38]:
a.std()

1.118033988749895

In [39]:
a.var()

1.25

In [40]:
e.sum()

132

In [41]:
e.mean()

14.666666666666666

Diese Funktionen können auch Achsenweise ausgeführt werden

In [42]:
e[1].mean()

19.0

In [43]:
e.mean(axis=0)

array([ 7., 17., 20.])

In [44]:
e

array([[10, 10, 20],
       [ 4, 33, 20],
       [ 7,  8, 20]])

In [45]:
e.mean(axis=1)

array([13.33333333, 19.        , 11.66666667])

## Vektorisierung

Ein gesamtes Array mit einer Operation verarbeiten

In [46]:
e

array([[10, 10, 20],
       [ 4, 33, 20],
       [ 7,  8, 20]])

In [47]:
e + 10  # Berechnet hier nur die Summe

array([[20, 20, 30],
       [14, 43, 30],
       [17, 18, 30]])

In [48]:
e

array([[10, 10, 20],
       [ 4, 33, 20],
       [ 7,  8, 20]])

In [49]:
e += 10  # Hier wird die Matrix verändert

In [50]:
e

array([[20, 20, 30],
       [14, 43, 30],
       [17, 18, 30]])

In [51]:
e + e  # Funktioniert auch mit 2 Arrays

array([[40, 40, 60],
       [28, 86, 60],
       [34, 36, 60]])

## Vektorisierung mit Booleschen Operatoren

In [52]:
e > 50

array([[False, False, False],
       [False, False, False],
       [False, False, False]])

Diese Boolesche Maske kann wieder auf das Array angewandt werden

In [53]:
e[e > 50]  # Hier wurde jetzt eine Filterung gemacht

array([], dtype=int32)

Welche Werte sind überdurchschnittlich?

In [54]:
e.mean()

24.666666666666668

In [55]:
e > e.mean()

array([[False, False,  True],
       [False,  True,  True],
       [False, False,  True]])

In [56]:
e[e > e.mean()]

array([30, 43, 30, 30])

In [57]:
e

array([[20, 20, 30],
       [14, 43, 30],
       [17, 18, 30]])

In [58]:
e[e < e.mean()]

array([20, 20, 14, 17, 18])

In [59]:
e[~(e > e.mean())]  # Bedingungen invertieren mit ~ (statt !)

array([20, 20, 14, 17, 18])

In [60]:
x = np.random.randint(100, size=(10, 10))

In [61]:
x

array([[29, 92, 86,  6, 89, 13, 12, 65, 16, 75],
       [31, 87, 51, 45, 28, 94, 47, 27, 45, 18],
       [76, 78, 82, 19, 69, 77, 55, 80, 97, 69],
       [11, 65, 25, 57, 97, 46, 95, 17, 13,  8],
       [18, 77, 21, 53, 91, 48, 91, 18, 55, 34],
       [87, 59, 92,  8, 83, 78, 59, 89, 24, 21],
       [37, 83, 79, 53, 63, 76,  9, 96, 64, 69],
       [51, 65,  4, 21, 88, 67, 73, 47, 82, 54],
       [22, 59, 33, 67, 61, 50, 10,  8, 16, 94],
       [40, 82,  1,  2, 11, 76, 75, 98, 53, 94]])

In [62]:
x.mean()

53.31

In [63]:
x[x > x.mean()]

array([92, 86, 89, 65, 75, 87, 94, 76, 78, 82, 69, 77, 55, 80, 97, 69, 65,
       57, 97, 95, 77, 91, 91, 55, 87, 59, 92, 83, 78, 59, 89, 83, 79, 63,
       76, 96, 64, 69, 65, 88, 67, 73, 82, 54, 59, 67, 61, 94, 82, 76, 75,
       98, 94])

In [64]:
len(x[x > x.mean()])

53

## Performance

In [65]:
import sys

In [66]:
x = 5  # Normaler Python int

In [67]:
sys.getsizeof(x)  # 28 Byte für eine normale Python Zahl

28

In [68]:
a.itemsize

4

## Listenvergleich

In [77]:
pList = list(range(10_000_000))

In [82]:
pArray = np.array(range(10_000_000), dtype=np.int64)

In [83]:
%time sum([x ** 2 for x in pList])

CPU times: total: 2.45 s
Wall time: 2.47 s


333333283333335000000

In [84]:
%time np.sum(pArray ** 2)

CPU times: total: 31.2 ms
Wall time: 27.1 ms


1291890006563070912

## Weitere Funktionen

In [85]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [86]:
np.arange(0, 100)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [87]:
np.arange(0, 100, 0.5)

array([ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,
        5.5,  6. ,  6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5, 10. , 10.5,
       11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15. , 15.5, 16. ,
       16.5, 17. , 17.5, 18. , 18.5, 19. , 19.5, 20. , 20.5, 21. , 21.5,
       22. , 22.5, 23. , 23.5, 24. , 24.5, 25. , 25.5, 26. , 26.5, 27. ,
       27.5, 28. , 28.5, 29. , 29.5, 30. , 30.5, 31. , 31.5, 32. , 32.5,
       33. , 33.5, 34. , 34.5, 35. , 35.5, 36. , 36.5, 37. , 37.5, 38. ,
       38.5, 39. , 39.5, 40. , 40.5, 41. , 41.5, 42. , 42.5, 43. , 43.5,
       44. , 44.5, 45. , 45.5, 46. , 46.5, 47. , 47.5, 48. , 48.5, 49. ,
       49.5, 50. , 50.5, 51. , 51.5, 52. , 52.5, 53. , 53.5, 54. , 54.5,
       55. , 55.5, 56. , 56.5, 57. , 57.5, 58. , 58.5, 59. , 59.5, 60. ,
       60.5, 61. , 61.5, 62. , 62.5, 63. , 63.5, 64. , 64.5, 65. , 65.5,
       66. , 66.5, 67. , 67.5, 68. , 68.5, 69. , 69.5, 70. , 70.5, 71. ,
       71.5, 72. , 72.5, 73. , 73.5, 74. , 74.5, 75

In [88]:
np.arange(0, 100).reshape(10, 10)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [89]:
np.arange(0, 99).reshape(3, 33)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
        32],
       [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
        49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
        65],
       [66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
        82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
        98]])

In [94]:
np.arange(0, 99).reshape(3, 33).reshape(-1)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98])

In [95]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [96]:
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [100]:
0 == False

True

In [99]:
1 == True

True

In [105]:
np.random.random(size=(3, 3))

array([[0.55022103, 0.01631602, 0.67352769],
       [0.94700342, 0.93737302, 0.21209588],
       [0.25846805, 0.60053863, 0.16808049]])

In [107]:
np.linspace(0, 1, 20)  # Erzeugt X Elemente zwischen zwei Werten und rechnet Abstände selbst aus

array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ])

In [110]:
np.linspace(0, 1, 20).reshape(4, 5)

array([[0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632],
       [0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421],
       [0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211],
       [0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ]])

In [118]:
h = np.arange(0, 10).reshape(10, 1)

In [117]:
g = np.linspace(0, 1, 20).reshape(10, 2)

In [119]:
np.hstack((g, h))

array([[0.        , 0.05263158, 0.        ],
       [0.10526316, 0.15789474, 1.        ],
       [0.21052632, 0.26315789, 2.        ],
       [0.31578947, 0.36842105, 3.        ],
       [0.42105263, 0.47368421, 4.        ],
       [0.52631579, 0.57894737, 5.        ],
       [0.63157895, 0.68421053, 6.        ],
       [0.73684211, 0.78947368, 7.        ],
       [0.84210526, 0.89473684, 8.        ],
       [0.94736842, 1.        , 9.        ]])