## 线性代数

In [6]:
def vector_add(v, w):
    return [vi + wi for vi, wi in zip(v, w)]

def vector_subtract(v, w):
    return [vi - wi for vi, wi in zip(v, w)]

def vector_sum(vectors):
    return reduce(vector_add, vectors)

def scalar_multiply(c, v):
    return [c * vi for vi in v]

def vector_mean(vs):
    return scalar_multiply(1/len(vs), vector_sum(vs))

def dot(v, w):
    return [vi*wi for vi, wi in zip(v, w)]

def sum_of_squares(v):
    return dot(v, v)

In [8]:
import math 

# 计算向量的大小
def magnitude(v):
    return math.sqrt(sum_of_squares(v))

def squared_distance(v, w):
    """
    (v1 - w1)**2 + (v2 - w2)**2 + ...
    """
    return sum_of_squares(vector_subtract(v, w))

def distance(v, w):
    return magnitude(vector_subtract(v, w))

### 矩阵

In [4]:
A = [[1, 2, 3],
     [4, 5, 6]]

B = [[1, 2],
     [3, 4],
     [5, 6]]

def shape(A):
    rows = len(A)
    cols = len(A[0]) if A else 0
    return rows, cols

print(shape(A))
print(shape(B))



(2, 3)
(3, 2)


### 统计学

In [1]:
# 均值

def mean(x):
    return sum(x) / len(x)

In [2]:
# 中位数
def median(v):
    n = len(v)
    sorted_v = sorted(v)
    mid = n // 2
    
    if n % 2 == 1:
        return v[mid]
    else:
        # 如果是偶数 返回中间2个说的均值
        lo = mid - 1
        hi = mid 
        return (sorted_v[lo] + sorted_v[hi])

In [3]:
# 分位数
def quantile(x, p):
    p_index = int(p * len(x))
    return sorted(x)[p_index]

### 离散度

In [5]:
# 极差
def data_range(x):
    return max(x) - min(x)

In [7]:
# 方差
def de_mean(x):
    mean_data = mean(x)
    return [i - mean_data for i in x]

def variance(x):
    n = len(x)
    deviations = de_mean(x)
    return sum_of_squares(deviations) / (n - 1)

In [9]:
# 标准差
def standard_deviation(x):
    return math.sqrt(variance(x))

In [11]:
# 稳健的算法
def interquartile_range(x):
    # %75 和 25% 分位差
    return quantile(x, 0.75) - quantile(x, 0.25)

方差衡量了单个变量对均值的偏离程度，而协方差衡量了两个变量对均值的串联偏离程度

In [13]:
# 协方差
def covariance(x, y):
    n = len(x)
    return dot(de_mean(x), de_mean(y)) / (n - 1)

In [14]:
# 相关
def correlation(x, y):
    stdev_x = standard_deviation(x)
    stdev_y = standard_deviation(y)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(x, y) / stdev_x / stdev_y
    else:
        return 0