## Numpy

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
a = np.arange(10)
b = range(10)
print(a,b)
print(type(a),type(b))

In [None]:
b = [1,2,3]
print(type(b),b,b[0])
a = np.array(b)
print(type(a),a,a[0])

#### reshape, dimension

In [None]:
a = np.array([1,2,3,4,5,6])
b = np.array([[1,2,3],[4,5,6]])
print(np.ndim(a))
print(np.ndim(b),np.ndim(b[0]))

In [None]:
a = np.array([1,2,3,4,5,6])
c = a.reshape(2,3)
print(c,np.ndim(c))

In [None]:
a = np.array([1,2,3,4,5,6])
c = a.reshape(2,4)

#### Use -1 for any. The computer will calculate the size

In [None]:
a = np.array([1,2,3,4,5,6])
c = a.reshape(2,-1)
d = c.T
print(c,c.shape)
print(d,d.shape)

In [None]:
plt.plot(c)

In [None]:
plt.plot(*c,"r*-")

#### Data type, indexing

In [None]:
a = np.zeros((3,3),dtype=int)
print("a=",a)
a[0][1] = 1
a[1, 2] = 3.5
print("\na=",a)
a[:,0:3:2] = 2
print("\na=",a)

In [None]:
a = np.arange(9).reshape(3,3)
print(a)
print("")
print(a[1])
print("---")
print(a[:,1])

In [None]:
plt.plot(d[:,0],d[:,1],"r*-")

In [None]:
# string of length 20
b = np.zeros(3,dtype="S20")
b[2] = "alma"
b[1] = "1234567890123456789012"
print("b=",b)

The towns.csv file
<pre>
#town Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
Tallinn -3 -5 -1 3 10 13 16 15 10 6 1 -2
Beijing -3 0 6 13 20 24 26 25 20 13 5 -1
Berlin 0 -1 4 7 12 16 18 17 14 9 4 1
Buenos_Aires 23 22 20 16 13 10 10 11 13 16 18 22
Cairo 13 15 17 21 25 27 28 27 26 23 19 15
Cape_Town 21 21 20 17 15 13 12 13 14 16 18 20
Helsinki -5 -6 -2 3 10 13 16 15 10 5 0 -3
London 3 3 6 7 11 14 16 16 13 10 6 5
Moscow -8 -7 -2 5 12 15 17 15 10 3 -2 -6
Ottawa -10 -8 -2 6 13 18 21 20 14 7 1 -7
Paris 3 4 7 10 13 16 19 19 16 11 6 5
Riga -3 -3 1 5 11 15 17 16 12 7 2 -1
Rome 8 8 11 12 17 20 23 23 21 17 12 9
Singapore 27 27 28 28 28 28 28 28 27 27 27 26
Stockholm -2 -3 0 3 10 14 17 16 11 6 1 -2
Waschington_D.C. 2 3 7 13 18 23 26 25 21 15 9 3
</pre>
<a href="http://www.phy.bme.hu/~torok/towns.csv">http://www.phy.bme.hu/~torok/towns.csv</a>

In [None]:
csv = np.genfromtxt('towns.csv', delimiter=" ", dtype=int)
print(csv[0:3])
print("-----------------")
print(csv[:,1:][0:3])

In [None]:
data =  np.genfromtxt('towns.csv', delimiter=" ", dtype=int)[0:,1:]
print(len(data),len(data[0]))

In [None]:
print(data.mean(axis=0))

In [None]:
print(data.mean(axis=1))

#### Pandas

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("towns.csv",sep=' ')
df.head()

In [None]:
df['Jan']

In [None]:
df.iloc[0]

In [None]:
print(df.loc[df['#town'] == 'Tallinn'])

In [None]:
df.values[:,1:]

#### Setting index column
With index_col option you can set the column of the index. The values in that column must be uniqe, then you can reference the columns by these values.

(Helpful comment by Tamás Kalmár)

In [None]:
df = pd.read_csv("towns.csv",sep=' ',index_col=0)
df.head()

In [None]:
df.loc["Tallinn"]

In [None]:
df.values

#### Random numbers
* seed: the pseudo random generator starts with this number. Always the same sequence
* Mersenne Twister algorithm (C Linear congruential generator)
* np.random.random -> uniform random number from 0 to 1

In [None]:
np.random.seed(12345)
a = np.random.random(10)
print(a)

In [None]:
a = np.random.random(10)
print(a)

#### do something with probability p

In [None]:
N = 10
p = 0.3
a = np.random.random(N)
for i in range(N):
    if a[i] < p:
        print("It happened in step %d." % (i))

In [None]:
a = np.random.random(size=(3,3))
print(a)

In [None]:
print(np.random.randint(10,size=(2,2)))
print("--------")
print(np.random.normal(2,1,size=(2,2)))
print("--------")
print(np.random.normal(loc=2,scale=1,size=(2,2)))
print("--------")
print(np.random.normal(scale=1,loc=2,size=(2,2)))

#### Choose values

In [None]:
print(np.random.choice(5, 10))
print(np.random.choice(5,3,replace=False))
print(np.random.choice(5, 10, p=[0.1, 0, 0.3, 0.6, 0]))
c = ["Budapest", "Pécs", "Debrecen", "Miskolc"]
print(np.random.choice(c,1))
print(np.random.choice(c,2))

#### Indexing array by array!

In [None]:
a = np.random.random(6)
index = np.random.choice(6,3,replace=False)
print(a)
print(index)
print(a[index])
a[index] = np.random.random(3)
print(a)
print(a[a>0.5])
a[a>0.5] = 0.5
print(a)

#### Mask out elements

In [None]:
a = np.random.random(6)
mask = np.zeros(6,dtype=bool)
mask[0:2] = True
print("a=",a)
print("mask=",mask)
print("a[mask]=",a[mask])

### Change all values based on conditions

In [None]:
a = np.random.random(10)
print(a)
print(len(a[a < 0.5]))
a[a < 0.5] = np.random.random((a < 0.5).sum())
print(a)

In [None]:
a = np.random.random(6)
print(a)
a[a>0.5] = 0.5
print(a)

### Try to change only the first one

In [None]:
a = np.random.random(6)
print(a)
a[a>0.5][0] = 0.5
print(a)

In [None]:
a = np.random.random(6)
print(a)
w = np.where(a<0.5)
print(w)

In [None]:
if len(w):
    a[w[0][0]] = 0.5
print(a)

### in higher dimensions

In [None]:
a = np.random.random((3,3))
print(a)
a[a>0.5] = 0.5
print("----")
print(a)

In [None]:
a = np.random.random((3,3))
print(a)
w = np.where(a<0.5)
print("----")
print(w)

In [None]:
print(a[w[0],w[1]])
a[w[0],w[1]] = 0.5
print("----")
print(a)

In [None]:
a = np.random.random((3,3))
print(a)
w = np.where(a<0.5)
print("----")
print(w)

In [None]:
for i in range(len(w[0])):
    a[w[0][i],w[1][i]] = 0.5
    break
print(a)

In [None]:
for i in range(len(w[0])):
    a[w[0][i],w[1][i]] = 0.5
    break
print(a)

#### numpy operations

In [None]:
a = np.arange(12).reshape((4,3))
print(a)
print("--------")
print(a.T)
print("--------")
print("a*a=", a * a)

In [None]:
a = np.arange(9).reshape((3,3))
b = np.array([1,0,1])
print(a, b)

In [None]:
print("2*a=",2 * a)

In [None]:
print("a+1=",a + 1)

In [None]:
print("a+b=",a + b)

In [None]:
print("a+a=",a + a)

In [None]:
print("a+b=",a * b)

### Linear algebra, matrix operations

In [None]:
a = np.random.random((2,4))
a

In [None]:
b = np.random.random((2,3))
c = np.random.random((3,3))
print(b)
print("---")
print(c)

### matrix multiplication
Let $a_{ij}$ be an $l\times m$ matrix, $b_{ij}$ be an $m\times n$ matrix and $c_{ij}$ be an $l\times n$ matrix than the matrix product (np.dot() in python) is defined as
$$c_{ij}=\sum_{k=1}^{m} a_{ik} b_{kj}$$
If $c^{-1}$ is the inverse of a matrix than $c\cdot c^{-1}=I$ where $I$ is the unit matrix.

Transpose (a.T in python) mirrors the matrix to its diagonal (exchanges rows and columns)

In [None]:
print(np.linalg.inv(c))
print("---")
print(np.dot(c,np.linalg.inv(c)))

In [None]:
np.dot(a,b.T)

In [None]:
np.dot(b,c)

In [None]:
a = np.arange(3)
print(a,a.T)

In [None]:
print(np.dot(a,a))

In [None]:
b = np.array([[0,1,2]])
print(b)
print("---")
print(b.T)

In [None]:
print(np.dot(b,b.T))

In [None]:
print(np.dot(b.T,b))

In [None]:
a = np.arange(9).reshape((3,3))
c = a
c[0] = 9
print("a=", a)
d = np.copy(a)
d[0] = 0
print("a=",a,"\nd=",d)

In [None]:
a = np.arange(9).reshape((3,3))
print("a=",a)
print("sin=",np.sin(a))
print("x**3=",np.power(a,3))
print("sum=",np.sum(a,axis=0))
print("mean=",np.mean(a))
print("size=",np.size(a))
print("min=",np.min(a))

In [None]:
#np.random.seed(12347)
a = np.random.random((3,3))
print(a)
print("shape=",a.shape)
print("minpos=",a.argmin())
print("2d minpos=",np.unravel_index(a.argmin(), a.shape))

### normalize data

In [None]:
a = np.random.random((4,2))
print(a)

In [None]:
print(a.sum(axis=1))

In [None]:
b = a / a.sum(axis=1)

In [None]:
b = a.T / a.sum(axis=1)
print(b)

In [None]:
b = (a.T / a.sum(axis=1)).T
print(b)

#### Regular arrays

In [None]:
print(np.arange(10))
print(np.arange(5,7,0.1))
print(np.linspace(5,7,9))

In [None]:
print(np.logspace(0,2,10))
print(np.logspace(np.log10(0.3),np.log10(28),10))

#### Histogram

In [None]:
a = np.random.random(100)
h = np.histogram(a)
print(h)
print(len(h[0]),len(h[1]))

In [None]:
a = np.random.random(10)
h = np.histogram(a, bins=[0, 0.5, 1.0])
print(a)
print(h)

In [None]:
a = np.random.random(10000)
h = np.histogram(a, bins=[0, 0.5, 1.0])
print(h[0])

In [None]:
plt.hist(a)

In [None]:
h = np.histogram(a,bins=10)
print(h)

In [None]:
print(h[0].shape,h[1].shape)

In [None]:
plt.plot((h[1][1:]+h[1][:-1])/2,h[0])

In [None]:
plt.plot((h[1][1:]+h[1][:-1])/2,h[0])
plt.ylim(0,1100)

In [None]:
plt.bar((h[1][1:]+h[1][:-1])/2,h[0],width=0.07)