## Numpy

= Number Python

Verarbeitung von Zahlen

Essentiell für große Datenmengen

In [1]:
import numpy as np

### Array

In [2]:
a = np.array([1, 2, 3, 4])

In [6]:
a  # kein print notwendig

array([1, 2, 3, 4])

### Datentypen

Ganze Zahlen:
- int64 = 2^64
- int32 = 2^32
- int16 = 2^16
- int8 = 2^8

Kommazahlen:
- float64
- float32
- float16

In [7]:
b = np.array([1, 2, 3, 4], dtype=np.int8)

In [8]:
b

array([1, 2, 3, 4], dtype=int8)

### Index

In [16]:
a[1]

np.int64(2)

In [17]:
a[-1]

np.int64(4)

In [18]:
a[1:3]

array([2, 3])

### Eigenschaften

In [19]:
len(a)

4

In [20]:
a.size

4

In [21]:
a.shape

(4,)

In [39]:
a.ndim  # Anzahl Dimensionen (n = Anzahl)

1

In [25]:
a.dtype

dtype('int64')

### Matrix

= 2D-Array

In [29]:
c = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [27]:
c

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [33]:
len(c)  # Anzahl Zeilen

3

In [32]:
c.size  # Anzahl Gesamtelemente

9

In [36]:
c.shape  # Länge x Breite (3x3)

(3, 3)

In [38]:
c.ndim  # Anzahl Dimensionen

2

In [40]:
c.dtype

dtype('int64')

In [44]:
c.sum()

np.int64(45)

In [41]:
c.mean()

np.float64(5.0)

In [43]:
c.std()

np.float64(2.581988897471611)

In [45]:
c.var()

np.float64(6.666666666666667)

In [48]:
c.mean(axis=0)  # Senkrecht

array([4., 5., 6.])

In [49]:
c.mean(axis=1)  # Waagrecht

array([2., 5., 8.])

In [52]:
np.arange(1, 10)  # Array mit Range 1 bis 9

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [53]:
np.array(range(1, 10))

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

### Index bei Matrizen

In [57]:
c

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [54]:
c[1]

array([4, 5, 6])

In [59]:
c[1][1]

np.int64(5)

In [69]:
c[1, 1]

np.int64(5)

In [68]:
c[1:3, 0]  # Zeilen 1 & 2, Spalte 0

array([4, 7])

In [77]:
c[1:3, 1:3]

array([[5, 6],
       [8, 9]])

In [78]:
c[0:3, 0:2]

array([[1, 2],
       [4, 5],
       [7, 8]])

In [84]:
# Vereinfachung
c[:, :2]
c[0:, :2]
c[:3, :2]
c[0:3, :2]
c[0:3, 0:2]

array([[1, 2],
       [4, 5],
       [7, 8]])

### Vektorisierung

Gesamtes Numpy Array mit einer Operation verändern

In [85]:
c + 10

array([[11, 12, 13],
       [14, 15, 16],
       [17, 18, 19]])

In [86]:
c ** 2

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

In [87]:
c / 2

array([[0.5, 1. , 1.5],
       [2. , 2.5, 3. ],
       [3.5, 4. , 4.5]])

In [88]:
a * 2

array([2, 4, 6, 8])

In [89]:
d = np.arange(10)
e = np.arange(10)

In [94]:
d

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [95]:
e

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [91]:
d * e  # Vektorisierung mit 2 Arrays

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

### Boolean Masken

Vektorisierung mit Vergleichsoperatoren

In [96]:
f = np.arange(20)

In [97]:
f

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [100]:
f > 10  # Neues Array mit Booleans, wo die Bedingung im Ausgangsarray True/False ist

array([False, False, False, False, False, False, False, False, False,
       False, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

In [101]:
g = f > 10

In [104]:
f[g]  # Filterung mit Boolean Masken

array([11, 12, 13, 14, 15, 16, 17, 18, 19])

In [106]:
f[f > 10]  # Bedingung direkt in die Klammer setzen

array([11, 12, 13, 14, 15, 16, 17, 18, 19])

### Performance

In [107]:
import time
def measureTime(function):
    start = time.time()
    function()
    end = time.time()
    print(end - start)

In [112]:
def pythonList():
    x = list(range(100_000_000))

In [113]:
def numpyArray():
    x = np.arange(100_000_000)

In [115]:
measureTime(pythonList)
measureTime(numpyArray)

1.4190032482147217
0.13026928901672363


### Weitere Funktionen

#### random

In [119]:
np.random.random(size=(5, 5))  # random: Kommazahl zw. 0 und 1

array([[0.38248075, 0.84594211, 0.90543986, 0.31637283, 0.30386781],
       [0.66509662, 0.05082551, 0.20733002, 0.44999278, 0.77838832],
       [0.36734041, 0.46495229, 0.99406424, 0.9312595 , 0.1207058 ],
       [0.31745527, 0.87000154, 0.00234838, 0.30498848, 0.30426943],
       [0.44809751, 0.84966406, 0.97749421, 0.5064567 , 0.1003969 ]])

In [130]:
np.random.randint(10)

5

In [131]:
np.random.randint(10, size=(5, 5))

array([[3, 5, 8, 4, 1],
       [2, 5, 0, 3, 6],
       [4, 5, 1, 9, 3],
       [7, 3, 0, 2, 3],
       [7, 7, 3, 6, 7]], dtype=int32)

#### reshape

Form eines Arrays verändern (1D -> 2D, 2D -> 1D)

In [133]:
h = np.arange(9)

In [134]:
h

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [135]:
h.reshape(3, 3)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [136]:
i = np.arange(8)

In [137]:
i

array([0, 1, 2, 3, 4, 5, 6, 7])

In [138]:
i.reshape(2, 2, 2)

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [139]:
j = i.reshape(2, 2, 2)

In [142]:
j.reshape(-1)  # -1: Form eindimensional machen

array([0, 1, 2, 3, 4, 5, 6, 7])

In [144]:
k = h.reshape(3, 3)

In [145]:
k

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [146]:
k.reshape(-1)

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [147]:
h

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [157]:
h.reshape(9, 1)  # 9 Zeilen, 1 Spalte

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8]])

In [158]:
h.reshape(-1, 1)  # -1: Platzhalter für Länge

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8]])

#### linspace

Linear Space

Generiert ein Array von X bis Y, mit Z Elementen (mit gleichem Abstand zw. den Elementen)

In [165]:
np.linspace(0, 10, 5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [166]:
np.linspace(0, 10, 50)

array([ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,  0.81632653,
        1.02040816,  1.2244898 ,  1.42857143,  1.63265306,  1.83673469,
        2.04081633,  2.24489796,  2.44897959,  2.65306122,  2.85714286,
        3.06122449,  3.26530612,  3.46938776,  3.67346939,  3.87755102,
        4.08163265,  4.28571429,  4.48979592,  4.69387755,  4.89795918,
        5.10204082,  5.30612245,  5.51020408,  5.71428571,  5.91836735,
        6.12244898,  6.32653061,  6.53061224,  6.73469388,  6.93877551,
        7.14285714,  7.34693878,  7.55102041,  7.75510204,  7.95918367,
        8.16326531,  8.36734694,  8.57142857,  8.7755102 ,  8.97959184,
        9.18367347,  9.3877551 ,  9.59183673,  9.79591837, 10.        ])

#### zeros, ones

In [167]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [168]:
np.ones(5)

array([1., 1., 1., 1., 1.])

#### hstack, vstack

horizontal stack

vertical stack

In [171]:
l = np.arange(30).reshape(6, 5)

In [172]:
l

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29]])

In [174]:
# Aufgabe: Summenzeile erzeugen (Summe aller Spalte)
l.sum(axis=0)

array([75, 81, 87, 93, 99])

In [175]:
s = l.sum(axis=0)

In [178]:
np.vstack([l, s])  # WICHTIG: Bei hstack/vstack MÜSSEN die Inhalte in Klammern gesetzt werden

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29],
       [75, 81, 87, 93, 99]])

In [180]:
m = np.vstack([l, s])

In [181]:
m

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29],
       [75, 81, 87, 93, 99]])

In [182]:
# Aufgabe: Summenspalte erzeugen (Summe aller Zeilen)
m.sum(axis=1)

array([ 10,  35,  60,  85, 110, 135, 435])

In [183]:
t = m.sum(axis=1)

In [186]:
np.hstack([m, t.reshape(-1, 1)])  # t hat nur eine Dimension, m hat 2 Dimensionen -> reshape

array([[  0,   1,   2,   3,   4,  10],
       [  5,   6,   7,   8,   9,  35],
       [ 10,  11,  12,  13,  14,  60],
       [ 15,  16,  17,  18,  19,  85],
       [ 20,  21,  22,  23,  24, 110],
       [ 25,  26,  27,  28,  29, 135],
       [ 75,  81,  87,  93,  99, 435]])