In [1]:
import pandas as pd
import numpy as np
from line_profiler import LineProfiler

%load_ext Cython

ImportError: No module named line_profiler

In [None]:
# https://www.kaggle.com/apratim87/housingdata
data = pd.read_csv('housingdata.csv', header=None)

In [None]:
data = (data - data.mean()) / data.std()

In [None]:
def profile_print(func_to_call, *args):
    profiler = LineProfiler()
    profiler.add_function(func_to_call)
    profiler.runcall(func_to_call, *args)
    profiler.print_stats()

In [None]:
X = data.values[:, :-1]
y = data.values[:, -1:]

У нас есть выборка $X \in R^{n\times d}, y\in R^n$, состоящая из $n$ объектов. Каждый из $n$ объектов описывается вектором из $d$ признаков (строка матрицы $X$) и для каждого объекта мы знаем значение целевой переменной $y$. В данной задаче по некоторым параметрам мы хотим научиться предсказывать стоимость квадратного метра жилья.


Предположим, что для объекта $i$ мы можем описать $y_i$ линейной комбинацией $x_i$ с некоторыми весами $w$, где w, x_i - вектора размера $d \times 1$, а $y$ - вещественное число.
$$y_i \sim <w,x_i>$$


Методом наименьших квадратов найдем веса $w$.
$$ J(w) = \frac{1}{n} \sum_{i=1}^n (<w,x_i> - y_i)^2$$
$$ J(w) \rightarrow \min_w $$

Минимум данного функционала будем искать методом градиентного спуска (те будет идти по направлению противоположному градиенту):
$$ w_j = w_j - \alpha \frac{\partial}{\partial w_j}J(w) $$
$$ \frac{\partial}{\partial w_j}J(w)  = \frac{2}{n}  \sum_{i=1}^n (<w, x_i> - y_i)\cdot x_i^j $$

In [None]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr = lr.fit(X, y)
print ((y - lr.predict(X)) ** 2).mean()

In [None]:
N_ITER = 300

In [None]:
def gradient_decent_python_naive(X, y, n_iter=100, alpha=0.1):
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features)
    w_old = np.random.rand(n_features)
    for iteration in xrange(n_iter):
        np.copyto(w_old, w)
        for f in xrange(n_features):
            gradient = 0
            for obj in xrange(n_objects):
                gradient += ((X[obj, :] * w).sum() - y[obj, 0]) * X[obj, f] 
            gradient = gradient * 2 / n_objects
            w[f] = w_old[f] - alpha * gradient
    return w.reshape(-1, 1)

In [None]:
w = gradient_decent_python_naive(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
gradient_decent_python_naive(X, y, n_iter=N_ITER)

In [None]:
profile_print(gradient_decent_python_naive, X, y, N_ITER)

In [None]:
#  поменяли порядок циклов по объектам и признакам

def gradient_decent_python(X, y, n_iter=100, alpha=0.1):
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features)
    w_old = np.random.rand(n_features)
    for iteration in xrange(n_iter):
        np.copyto(w_old, w)
        gradient = np.zeros(n_features)
        for obj in xrange(n_objects):
            for f in xrange(n_features):
                gradient[f] += ((X[obj, :] * w).sum() - y[obj, 0]) * X[obj, f] 
        gradient = gradient * 2 / n_objects
        w = w_old - alpha * gradient
    return w.reshape(-1, 1)

In [None]:
w = gradient_decent_python(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
gradient_decent_python(X, y, n_iter=N_ITER)

In [None]:
profile_print(gradient_decent_python, X, y, N_ITER)

In [None]:
#  вынесем подсчет ошибки

def gradient_decent_python_faster(X, y, n_iter=100, alpha=0.1):
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features)
    w_old = np.random.rand(n_features)
    for iteration in xrange(n_iter):
        np.copyto(w_old, w)
        gradient = np.zeros(n_features)
        for obj in xrange(n_objects):
            diff = (X[obj, :] * w).sum() - y[obj, 0]  # changed
            for f in xrange(n_features):
                gradient[f] += diff * X[obj, f] 
        gradient = gradient * 2 / n_objects
        w = w_old - alpha * gradient
    return w.reshape(-1, 1)

In [None]:
w = gradient_decent_python_faster(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
gradient_decent_python_faster(X, y, n_iter=N_ITER)

In [None]:
profile_print(gradient_decent_python_faster, X, y, N_ITER)

In [None]:
from numba import jit

In [None]:
# numba !!!

@jit(nopython=True)
def gradient_decent_numba(X, y, n_iter=100, alpha=0.1):
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features)
    w_old = np.random.rand(n_features)
    for iteration in xrange(n_iter):
        for f in xrange(n_features):  # changed
            w_old[f] = w[f]
        gradient = np.zeros(n_features)
        for obj in xrange(n_objects):
            diff = (X[obj, :] * w).sum() - y[obj, 0]
            for f in xrange(n_features):
                gradient[f] += diff * X[obj, f] 
        gradient = gradient * 2 / n_objects
        w = w_old - alpha * gradient
    return w.reshape(-1, 1)

In [None]:
w = gradient_decent_numba(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
gradient_decent_numba(X, y, n_iter=N_ITER)

In [None]:
# уберем цикл по объектам

def gradient_decent_numpy(X, y, n_iter=100, alpha=0.1):
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features, 1)  # changed
    w_old = np.random.rand(n_features, 1)  # changed
    for iteration in xrange(n_iter):
        np.copyto(w_old, w)
        gradient = np.zeros((n_features, 1))  # chaned
        diff = X.dot(w) - y
        for f in xrange(n_features):
            gradient[f, 0] = ((X.dot(w) - y) * X[:, f:f+1]).sum()
        gradient = gradient * 2 / n_objects
        w = w_old - alpha * gradient
    return w  # changed

In [None]:
w = gradient_decent_numpy(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
gradient_decent_numpy(X, y, n_iter=N_ITER)

In [None]:
profile_print(gradient_decent_numpy, X, y, N_ITER)

In [None]:
# уберем еще и цикл по признакам

def gradient_decent_numpy_faster(X, y, n_iter=100, alpha=0.1):
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features, 1)  # changed
    for iteration in xrange(n_iter):
        gradient = ((X.dot(w) - y) * X).sum(axis=0).reshape(-1, 1)
        gradient = gradient * 2 / n_objects
        w -= alpha * gradient
    return w

In [None]:
w = gradient_decent_numpy_faster(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
gradient_decent_numpy_faster(X, y, n_iter=N_ITER)

In [None]:
profile_print(gradient_decent_numpy_faster, X, y, N_ITER)

In [None]:
@jit(nopython=True)
def gradient_decent_numpy_faster_numba(X, y, n_iter=100, alpha=0.1):
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features, 1)  # changed
    for iteration in xrange(n_iter):
        gradient = ((X.dot(w) - y) * X).sum(axis=0).reshape(-1, 1)
        gradient = gradient * 2 / n_objects
        w -= alpha * gradient
    return w

In [None]:
# самая большая проблема - sum c axis

w = gradient_decent_numpy_faster_numba(X, y)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%cython
import numpy as np
cimport numpy as np

def gradient_decent_numpy_faster_cython(X, y, n_iter=100, alpha=0.1):
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features, 1)  # changed
    for iteration in xrange(n_iter):
        gradient = ((X.dot(w) - y) * X).sum(axis=0).reshape(-1, 1)
        gradient = gradient * 2 / n_objects
        w -= alpha * gradient
    return w

In [None]:
w = gradient_decent_numpy_faster_cython(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
gradient_decent_numpy_faster_cython(X, y, n_iter=N_ITER)

In [None]:
# аннтоции 

In [None]:
%%cython -a
import numpy as np
cimport numpy as np

def gradient_decent_numpy_faster_cython(X, y, n_iter=100, alpha=0.1):
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features, 1)  # changed
    for iteration in xrange(n_iter):
        gradient = ((X.dot(w) - y) * X).sum(axis=0).reshape(-1, 1)
        gradient = gradient * 2 / n_objects
        w -= alpha * gradient
    return w

In [None]:
# добавим типы

In [None]:
%%cython -a
import numpy as np
cimport numpy as np

cpdef gradient_decent_numpy_faster_cython(np.ndarray[np.float64_t, ndim=2] X, 
                                          np.ndarray[np.float64_t, ndim=2] y, 
                                          int n_iter=100, 
                                          np.float64_t alpha=0.1):
    cdef int n_objects, n_features;
    cdef np.ndarray[np.float64_t, ndim=2] w;
    cdef np.ndarray[np.float64_t, ndim=2] gradient;
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features, 1)  # changed
    for iteration in xrange(n_iter):
        gradient = ((X.dot(w) - y) * X).sum(axis=0).reshape(-1, 1)
        gradient = gradient * 2 / n_objects
        w -= alpha * gradient
    return w

In [None]:
w = gradient_decent_numpy_faster_cython(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
gradient_decent_numpy_faster_cython(X, y, n_iter=N_ITER)

In [None]:
#  вернемся к нашему хорошему коду на питоне и добавим типы

In [None]:
%%cython -a
import numpy as np
cimport numpy as np


cpdef gradient_decent_python_faster_cython_v0(np.ndarray[np.float64_t, ndim=2] X, 
                                              np.ndarray[np.float64_t, ndim=2] y, 
                                              int n_iter=100, 
                                              np.float64_t alpha=0.1):
    cdef int n_objects, n_features;
    cdef np.ndarray[np.float64_t, ndim=1] w;
    cdef np.ndarray[np.float64_t, ndim=1] w_old;
    cdef np.ndarray[np.float64_t, ndim=1] gradient;
    cdef np.float64_t diff;
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features)
    w_old = np.random.rand(n_features)
    for iteration in xrange(n_iter):
        np.copyto(w_old, w)
        gradient = np.zeros(n_features)
        for obj in xrange(n_objects):
            diff = (X[obj, :] * w).sum() - y[obj, 0]
            for f in xrange(n_features):
                gradient[f] += diff * X[obj, f] 
        gradient = gradient * 2 / n_objects
        w = w_old - alpha * gradient
    return w.reshape(-1, 1)

In [None]:
w = gradient_decent_python_faster_cython_v0(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
res = gradient_decent_python_faster_cython_v0(X, y, n_iter=N_ITER)

In [None]:
# заменим код, где много вызовов к python (отмечены желтым)

In [None]:
%%cython -a
import numpy as np
cimport numpy as np


cpdef gradient_decent_python_faster_cython_v1(np.ndarray[np.float64_t, ndim=2] X, 
                                              np.ndarray[np.float64_t, ndim=2] y, 
                                              int n_iter=100, 
                                              np.float64_t alpha=0.1):
    cdef int n_objects, n_features;
    cdef np.float64_t diff
    cdef np.ndarray[np.float64_t, ndim=1] w;
    cdef np.ndarray[np.float64_t, ndim=1] w_old;
    cdef np.ndarray[np.float64_t, ndim=1] gradient;
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features)
    w_old = np.random.rand(n_features)
    gradient = np.zeros(n_features)
    for iteration in xrange(n_iter):
        for f in xrange(n_features):
            gradient[f] = 0
            w_old[f] = w[f]
        for obj in xrange(n_objects):
            diff = - y[obj, 0]
            for f in xrange(n_features):
                diff += X[obj, f] * w[f]
            for f in xrange(n_features):
                gradient[f] += diff * X[obj, f] 
        for f in xrange(n_features):
            gradient[f] = gradient[f] * 2 / n_objects
            w[f] = w_old[f] - alpha * gradient[f]
    return w.reshape(-1, 1)

In [None]:
w = gradient_decent_python_faster_cython_v1(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
res = gradient_decent_python_faster_cython_v1(X, y, n_iter=N_ITER)

In [None]:
# добавим декораторы

In [None]:
%%cython -a
import numpy as np
cimport numpy as np
import cython


@cython.boundscheck(False)
@cython.cdivision(True)
cpdef gradient_decent_python_faster_cython_v2(np.ndarray[np.float64_t, ndim=2] X, 
                                              np.ndarray[np.float64_t, ndim=2] y, 
                                              int n_iter=100, 
                                              np.float64_t alpha=0.1):
    cdef int n_objects, n_features;
    cdef np.float64_t diff
    cdef np.ndarray[np.float64_t, ndim=1] w;
    cdef np.ndarray[np.float64_t, ndim=1] w_old;
    cdef np.ndarray[np.float64_t, ndim=1] gradient;
    n_objects, n_features = X.shape[0], X.shape[1]
    w = np.random.rand(n_features)
    w_old = np.random.rand(n_features)
    gradient = np.zeros(n_features)
    for iteration in xrange(n_iter):
        for f in xrange(n_features):
            gradient[f] = 0
            w_old[f] = w[f]
        for obj in xrange(n_objects):
            diff = - y[obj, 0]
            for f in xrange(n_features):
                diff += X[obj, f] * w[f]
            for f in xrange(n_features):
                gradient[f] += diff * X[obj, f] 
        for f in xrange(n_features):
            gradient[f] = gradient[f] * 2 / n_objects
            w[f] = w_old[f] - alpha * gradient[f]
    return w.reshape(-1, 1)

In [None]:
w = gradient_decent_python_faster_cython_v2(X, y, n_iter=N_ITER)
print ((y - X.dot(w)) ** 2).mean()

In [None]:
%%timeit
res = gradient_decent_python_faster_cython_v2(X, y, n_iter=N_ITER)

In [None]:
def m_mult(X, Y, cs):
    if cs == 'std':
        return X.dot(Y)
    else:
        Z = np.arra[]
        for x in range 