In [1]:
import numpy as np
from scipy import sparse
import zsh_in_jupyter

In [2]:
# 打开当前文件夹
!temp=$(pwd);\
open $temp

In [3]:
zsh_in_jupyter.opendir()

In [11]:
# 在terminal里跑的文件不能拿到这里来用
!python fib.py 1

1


In [22]:
# 要run一次之后才能将.py文件中定义的函数或者对象导入目前的环境
%run fib.py 10

3628800


In [23]:
%who

NamespaceMagics	 fib	 get_ipython	 getsizeof	 json	 np	 para	 sparse	 sys	 
var_dic_list	 yapf_reformat	 


# Vector

In [8]:
vector_row = np.array([1,2,3])
vector_column = np.array([[1],
                          [2],
                          [3]])
print(vector_row.shape)
print(vector_column.shape) # 这个已经不是一个vector了，而是一个array

(3,)
(3, 1)


In [10]:
type(vector_column)

numpy.ndarray

In [11]:
type(vector_row)

numpy.ndarray

# Array

In [12]:
matrix = np.array([[1,2],
                   [0,4],
                   [5,6]])

In [13]:
print(sparse.csr_matrix(matrix))

  (0, 0)	1
  (0, 1)	2
  (1, 1)	4
  (2, 0)	5
  (2, 1)	6


# Selecting Elements   
Expression| Description
---|----
a[m]
a[-m]
a[m:n]
a[:],a[0:-1]
a[:n]
a[m:],a[m:-1]
a[m:n:p]
a[::-1]| Select all the elements, in reverse order.


注意提取部分元素出来的时候，存储地址并没有改变，所以一般是用.copy()

In [13]:
matrix

array([[1, 2],
       [0, 4],
       [5, 6]])

In [17]:
print(matrix[1,1])
print(matrix[1,-2])

4
0


# Creating Arrays

Function Name| Type of Array
------|---------
np.array
np.zeros
np.ones
np.diag
np.linspace
np.logspcae
np.fromfunction
np.fromfile
np.random.rand

In [7]:
np.array([1,2,3,4]).shape

(4,)

In [9]:
np.array([[1],
          [2],
          [3],
          [4]]).shape

(4, 1)

In [10]:
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [11]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [13]:
np.linspace(0,10,5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [14]:
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [15]:
np.eye(3,k=1)

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 0.]])

In [17]:
np.diag((3,5,6,10))

array([[ 3,  0,  0,  0],
       [ 0,  5,  0,  0],
       [ 0,  0,  6,  0],
       [ 0,  0,  0, 10]])

In [14]:
f = lambda m,n:np.sin(m)+np.cos(n)
A = np.fromfunction(f,(5,5))
A

array([[ 1.        ,  0.54030231, -0.41614684, -0.9899925 , -0.65364362],
       [ 1.84147098,  1.38177329,  0.42532415, -0.14852151,  0.18782736],
       [ 1.90929743,  1.44959973,  0.49315059, -0.08069507,  0.25565381],
       [ 1.14112001,  0.68142231, -0.27502683, -0.84887249, -0.51252361],
       [ 0.2431975 , -0.21650019, -1.17294933, -1.74679499, -1.41044612]])

In [16]:
A[0,0] = np.nan

In [18]:
np.exp(A)

array([[       nan, 1.7165257 , 0.65958341, 0.37157948, 0.5201471 ],
       [6.30580719, 3.98195654, 1.53008631, 0.86198146, 1.20662519],
       [6.74834593, 4.26140847, 1.63746709, 0.92247494, 1.29130561],
       [3.13027233, 1.9766872 , 0.75955175, 0.42789712, 0.59898207],
       [1.27532048, 0.80533238, 0.30945291, 0.17433178, 0.24403439]])

# Features of a Matrix

------------
Feature| Description 
---| :-----------:
shape      | Something  here
size   | Text
|

In [18]:
matrix = np.array([[1,2,3,4],
                   [5,6,7,8],
                   [4,3,2,1]])

In [19]:
matrix.shape

(3, 4)

# Reshaping and Resizing

Function/Method| Decription
-----|-----
np.reshape
np.ndarray.flatten
np.transpose
np.append
np.insert
np.delete

# Vetorized Expressions
Function| Description
-----|-----
np.cos
np.arccos
np.cosh
np.arccosh
np.sqrt
np.exp
np.log
np.sign
np.vectorize

In [23]:
def heaviside(x):
    return 1 if x>0 else 0

In [26]:
heaviside(np.linspace(-5,5,10))

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [28]:
# 对函数进行向量化的操作
heaviside = np.vectorize(heaviside)
heaviside(np.linspace(-5,5,10))

array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

# Operations of a Matrix

$(M^{m \times n},+)$ is a Abelian Group.

func| function
---| ----|
np.max|
np.mean|
np.var|
np.std|

上述函数均可以指定axis=1以获得每列的最大值,

Supppose matrix is in array class, then we have 

func| roles|          
---| ---  |    
matrix.T| transpose  
matrix.diagonal()| 可以指定offset = -1以获得次对角线 
matrix.trace()|

Linear algebra

func| roles  
---| ---
np.linalg.matrix_rank()
np.linalg.det()
np.linalg.eig()
np.dot()| 对于向量而言是内积，对矩阵则是矩阵乘法
np.linalg.inv()

In [28]:
add_100 = lambda i:i + 100
def add_func(x):
    y = x^2 + x
    return y**0.5 + 100

In [29]:
print(add_100(10))
print(add_func(10))

110
102.44948974278317


In [47]:
matrix = np.array([[1,-1,3],
                   [1,1,6],
                   [3,8,9]])

In [57]:
matrix.T

array([[ 1,  1,  3],
       [-1,  1,  8],
       [ 3,  6,  9]])

In [36]:
list(map(add_100,matrix))
list(map(add_func,matrix))

[array([101.41421356, 102.44948974, 102.44948974, 101.41421356]),
 array([101.41421356, 103.74165739, 103.74165739, 101.41421356]),
 array([101.41421356, 102.44948974, 102.44948974, 101.41421356])]

In [40]:
print(matrix + 100)
print(matrix*matrix)

[[101 102 103 104]
 [105 106 107 108]
 [104 103 102 101]]
[[ 1  4  9 16]
 [25 36 49 64]
 [16  9  4  1]]


In [48]:
# eigenspace
# 值得注意的是，
eigenvalues, eigenvectors = np.linalg.eig(matrix)

In [52]:
i = 1
matrix.dot(eigenvectors[:,i])-eigenvalues[i]*eigenvectors[:,i]

array([ 5.55111512e-16,  3.60822483e-16, -5.68989300e-16])

In [59]:
# 注意这等价于
matrix @ eigenvectors[:,i] - eigenvalues[i]*eigenvectors[:,i]

array([ 5.55111512e-16,  3.60822483e-16, -5.68989300e-16])

# Aggregate Functions
Functions| Description
-----|-------
np.mean
np.std
np.var
np.sum
np.prod
np.cumsum
np.cumprod
np.min
np.argmin
np.all
np.any

Functions| Description
-----|-------
np.where| 
np.choose|

In [7]:
data = np.random.rand(100000)
data.resize(10000,10)
df=pd.DataFrame(data)


In [36]:
np.apply_along_axis(lambda x:x.shape,0,data)

array([[10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000,
        10000]])

In [53]:
def func(x):
    return x.sum();

In [61]:
df.apply(lambda x:x.rolling(5).apply(lambda y:y.sum()),axis=0)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,,,,,,,,,,
1,,,,,,,,,,
2,,,,,,,,,,
3,,,,,,,,,,
4,2.635972,3.070456,2.980133,3.578833,3.890773,2.113300,2.417604,2.340682,3.143515,1.707335
...,...,...,...,...,...,...,...,...,...,...
9995,2.735663,2.308812,2.333262,0.987390,2.155822,1.629107,1.404240,3.323115,2.783520,1.582242
9996,2.590306,2.055295,2.711499,1.374067,2.443862,1.875957,1.625650,2.741136,2.688636,1.410324
9997,3.038121,2.583288,2.616227,1.417873,2.748492,2.691603,2.295152,2.207244,2.484935,1.068682
9998,2.628768,2.806818,2.727379,2.112378,2.328959,2.224719,2.299257,2.620573,2.650969,1.249636


In [59]:
np.apply_along_axis(lambda y:y.sum(),0,df)

array([5006.21823313, 4973.04360786, 4940.40132472, 4983.03829584,
       5021.52932979, 4971.32888347, 4972.24972261, 5010.9656087 ,
       5014.20597124, 4997.6854893 ])

# Random samples

a|b
---|---
np.random.random()| U(0,1)
np.random.normal($\mu$,$\sigma$,n)| N($\mu$,$\sigma$) 
np.random.logistic($\mu$,$\sigma$,n)
np.random.uniform(a,b,n)| U(a,b)

In [60]:
np.random.seed(0)

In [64]:
np.random.random(10)

array([0.5488135 , 0.71518937, 0.60276338, 0.54488318, 0.4236548 ,
       0.64589411, 0.43758721, 0.891773  , 0.96366276, 0.38344152])

In [68]:
np.random.normal(0,10,10)

array([  1.21675016,   4.43863233,   3.33674327,  14.94079073,
        -2.05158264,   3.13067702,  -8.54095739, -25.52989816,
         6.53618595,   8.64436199])

module