#### NumPy:
- a package for working with arrays
- has linear algebra, Fourier transforms and matrices
- about 50x faster than lists because of locality of reference

In [1]:
# example
import numpy as np
x = np.array([1, 2, 3, 4, 5])
y=[1, 2, 3, 4, 5]
print(x)
print(y)
print(np.__version__) # prints the NumPy version

[1 2 3 4 5]
[1, 2, 3, 4, 5]
1.23.5


In [2]:
# Creating arrays
a = np.array(11) #0D array
b = np.array([1, 2, 3, 4, 5]) #1D array
c = np.array([[1, 2, 3], [4, 5, 6]]) # 2D array
d = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]]) # 3D array

print(a.ndim, b.ndim, c.ndim, d.ndim)
print(a)
print(b)
print(c)
print(d)
print(d.shape)

0 1 2 3
11
[1 2 3 4 5]
[[1 2 3]
 [4 5 6]]
[[[1 2 3]
  [4 5 6]]

 [[1 2 3]
  [4 5 6]]]
(2, 2, 3)


In [3]:
# Creating arrays
x = np.zeros((3,10))
print(x)
o=np.ones((3,10))
print(o)
x=np.arange(10)
print(x)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
[0 1 2 3 4 5 6 7 8 9]


In [4]:
# Creating array of arbitrarily many dimensions
x = np.array([1, 2, 3, 4], ndmin=5)

print(x)
print('number of dimensions :', x.ndim)
print(x.shape)

[[[[[1 2 3 4]]]]]
number of dimensions : 5
(1, 1, 1, 1, 4)


In [7]:
# array indexing - similar to lists
c = np.array([[1, 2, 3], [4, 5, 6]]) # 2D array
print(c.shape)
print(c)
print(c[1,2])
d = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]]) # 3D array
print(d.shape)
print(d[1,1,2])
print(d[0,1,-2:]) # slice with negative indexing
x=np.arange(10)
print(x[0:10:2])  # slice with step

(2, 3)
[[1 2 3]
 [4 5 6]]
6
(2, 2, 3)
6
[5 6]
[0 2 4 6 8]


#### Array data types
- i - integer
- b - boolean
- u - unsigned integer
- f - float
- c - complex float
- m - timedelta
- M - datetime
- O - object
- S - string
- U - unicode string
- V - fixed chunk of memory for other type ( void )

In [8]:
# array data types
x = np.array([1, 2, 3, 4])
print(x.dtype)
a = np.array(['apple', 'banana', 'cherry'])
print(a.dtype)
a = np.array(['apple', 'banana', 'cherry'], dtype='S')
print(a.dtype)
x = np.array([1, 2, 3, 4],dtype=np.float32)
print(x.dtype)
print(x)
print(x.astype('i'))  # convert to another data type

int32
<U6
|S6
float32
[1. 2. 3. 4.]
[1 2 3 4]


In [9]:
# Reshaping arrays
x = np.arange(12)
y = x.reshape(2, 3, 2)
print(y.shape)
print(y)
print(x.reshape(-1,6)) #  choosing the number of rows automatically
print(y.base)          # print the original shape, so y is a view, not a copy of x
y[0,0,1]=10
print(x)               # yes indeed
print(y.reshape(-1))   # flattening y
y=x.reshape(4, 3)
print(y.T)  # transpose

(2, 3, 2)
[[[ 0  1]
  [ 2  3]
  [ 4  5]]

 [[ 6  7]
  [ 8  9]
  [10 11]]]
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0 10  2  3  4  5  6  7  8  9 10 11]
[ 0 10  2  3  4  5  6  7  8  9 10 11]
[[ 0  3  6  9]
 [10  4  7 10]
 [ 2  5  8 11]]


In [10]:
# Squeezing out irrelevant dimensions
x = np.arange(10)
y = x.reshape(1,2,5)
print(y.shape)
print(y.squeeze().shape) # squeeze out irrelevant dimensions

(1, 2, 5)
(2, 5)


In [11]:
y = x.reshape(2,5)
for idx, i in np.ndenumerate(y):
    print(idx, i)

(0, 0) 0
(0, 1) 1
(0, 2) 2
(0, 3) 3
(0, 4) 4
(1, 0) 5
(1, 1) 6
(1, 2) 7
(1, 3) 8
(1, 4) 9


In [12]:
# iterating through arrays
for i in y:       # like a list of lists
    print(i)
for i in y:
    for j in i:
        print(j)
for i in np.nditer(y): # iterates through all elements
    print(i)
for idx, i in np.ndenumerate(y):
    print(idx, i)

[0 1 2 3 4]
[5 6 7 8 9]
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
(0, 0) 0
(0, 1) 1
(0, 2) 2
(0, 3) 3
(0, 4) 4
(1, 0) 5
(1, 1) 6
(1, 2) 7
(1, 3) 8
(1, 4) 9


In [25]:
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])
a = np.concatenate((x.reshape(1,-1), y.reshape(1,-1)),axis=0)  # concatenate
print(a)

[[1 2 3]
 [4 5 6]]


In [24]:
# Concatenating arrays
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])
a = np.concatenate((x, y))  # concatenate
b = np.stack((x, y), axis=0) #stack along given dimension
c = np.vstack((x, y))       # stack vertically
print(a.shape)
print(b.shape)
print(b)
print(c)

(6,)
(2, 3)
[[1 2 3]
 [4 5 6]]
[[1 2 3]
 [4 5 6]]


In [None]:
# Splitting arrays
x = np.array([1, 2, 3, 4, 5, 6])
a = np.array_split(x, 3)
print(a)
x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]])
a = np.array_split(x, 3, axis=0)
print(x)
print(a)

In [None]:
x = np.array([1, 2, 3, 4, 5, 2, 4])
j=(x>2)   # obtaining a filter
print(j)
print(x[j])

In [None]:
# Searching arrays
x = np.array([1, 2, 3, 4, 5, 2, 4])
i = np.where(x == 4)
print(i)
j=(x>2)   # obtaining a filter
print(j)
print(x[j])
x[x==4]=6   # replacing values
print(x)

In [None]:
y=np.array([-1,-1,-1,-1,1,1,1,1])
y[y<0]=0
y

#### Array Operations
- standard ones: +,-,* ,/,** (power)
- matrix multiplication @

In [26]:
# matrix multiplication example
x=np.array([[1,2],[3,4]])
y=np.array([1,2]).reshape(2,1)
print(x@y)
print(np.matmul(x,y))
z=x*x
print(z)

[[ 5]
 [11]]
[[ 5]
 [11]]
[[ 1  4]
 [ 9 16]]


#### Broadcasting
- an important feature to facilitate operations
- reshapes vectors so that operations can be completed in some cases

In [None]:
# Broadcasting example
x=np.array([[1,1],[2,2]])
y=np.array([0,1])
y1=y.reshape(1,2)
z=np.array([0,1]).reshape(2,1)
print(x)
print(x*y) # y was made 2x2 by copying it as a row
print(x*y1) # y1 was made 2x2 by copying it as a row
print(x*z) # z was made 2x2 by copying it as a column
print(y1+z) # both y and z were broadcasted

In [None]:
x = np.arange(12).astype('f4').reshape(3,4)
print(x)
print(np.sum(x))


In [None]:
# sum, mean, etc
x = np.arange(12).astype('f4').reshape(3,4)
print(x)
print(np.sum(x,axis=1))
print(np.mean(x,axis=1))
print(np.std(x,axis=0))
print(np.var(x,axis=1))
print(np.prod(x,axis=1))
print(np.cumsum(x))  # cumulative sum

In [None]:
mx=np.mean(x,axis=0)
sx=np.std(x,axis=0)
print(mx)
x=(x-mx)/sx
print(np.mean(x,axis=0))
print(np.std(x,axis=0))


In [None]:
x = np.array(['cat','dog','cat','fox'])
u = np.unique(x)  
print(u)

In [None]:
# Set operations
x = np.array([1, 1, 1, 2, 3, 4, 5, 5, 6, 7])
u = np.unique(x)  
print(u)
a = np.array([1, 2, 3, 4])
b = np.array([3, 4, 5, 6])

au = np.union1d(a, b)
ai = np.intersect1d(a, b, assume_unique=True)
ad = np.setdiff1d(a,b, assume_unique=True)
ax = np.setxor1d(a, b, assume_unique=True)

print(au)
print(ai)
print(ad)
print(ax)

#### Random number generation
- choice(x,p,size) samples from a pmf at locations in x, with probabilities p

In [None]:
from numpy import random
l=[[1,1], [3,2], [4,5], [7,4]]
x = random.choice([0,1,2,3], p=[0.1, 0.2, 0.6, 0.1], size=(3,9))
print(x)

#### Random permutations
- permutation - returns a permutation of the array
- shuffle - shuffles in place

In [None]:
x = np.array(range(5))
print(x)
y=random.permutation(x)
print(y)
random.shuffle(x)
print(x)

Sampling from different distributions
- normal, uniform
- binomial, multinomial
- Poisson, exponential, chi-square
- Rayleigh, Pareto, zipf

In [None]:
xn = random.normal(loc=1, scale=0.5, size=(1000))
print(xn)

In [None]:
import matplotlib as plt
import seaborn as sns

xn = random.normal(loc=1, scale=0.5, size=(1000))
xb = random.binomial(n=10, p=0.5, size=1000)
xp = random.poisson(lam=2, size=1000)
xu = random.uniform(0, 8, size=1000)
xm = random.multinomial(n=1000, pvals=[1/10, 1/10, 1/5, 1/5, 1/5, 1/5])
xe = random.exponential(scale=2, size=1000)
xc = random.chisquare(df=5, size=1000)
sns.kdeplot(xn)
sns.kdeplot(xb)
sns.kdeplot(xp)
sns.kdeplot(xu)
sns.kdeplot(xe)
sns.kdeplot(xc)
print(xm)


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
x = np.loadtxt("abalone.csv", delimiter=',')
y=x[:,7]
x=x[:,0:7]
x=np.concatenate((np.ones((x.shape[0],1)),x),axis=1)
print(x.shape,y.shape)

In [None]:
la=0.0001
r2=[]
r2t=[]
for r in range(10):
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=r)
    XX=X_train.T@X_train+la*np.eye(8)
    XY=X_train.T@y_train
    be=np.linalg.solve(XX,XY)
    py=X_train@be
    pyt=X_test@be
    r2i=r2_score(y_train, py)
    r2ti=r2_score(y_test, pyt)
    print(r,r2i,r2ti)
    r2.append(r2i)
    r2t.append(r2ti)
print(np.mean(r2),np.mean(r2t))