In [1]:
import numpy as np
import pandas as pd

## Linear Regression

$$ \mathbf{C(w)}=  \sum\limits_{i=1}^{m}  (y^{i}- \mathbf{w^{T}x^{i}})^2 $$

Where: 
$\mathbf{x^{i}} = [1, x_1^{i}, x_2^{i}...x_n^{i}]$, 
$\mathbf{w^{i}} = [w_0, w_1, w_2...w_n]$

The gradient is:
$$  \mathbf{\nabla C(w)} = -\mathbf{X^{T}}(y-\mathbf{Xw})  $$



In [None]:

class linear_regression():
    def __init__(self, learning_rate=0.01, iterations=100):
        self.learning_rate = learning_rate
        self.iterations = iterations
    
    def fit(self, X, y):
        # add constant to X: n*(k+1)
        self.observations = len(X)
        self.dimentions = len(X[0])+1
        X = np.c_[X, np.ones((self.observations,1))]    
        
        #initalize weight: k+1 * 1
        weights = np.ones((self.dimentions, 1))
        
        for _ in self.iterations:
            # compute errors: n*1
            errors = y - np.dot(X, weights) 
            # compute gradient: k+1 * 1
            gradient = -X.T * errors
            # update weights
            weight -= self.learning_rate * gradient
        
        self.weights = weights
        return self.weight
    
    def predict(self, X):
        return X.dot(self.weights)



## Logistic Regression

$$ J(\theta)=-\frac{1}{m} \sum\limits_{i=1}^{m} [y^i log( h_{\theta}(x^i)) + (1-y^i) log(1-h_{\theta}(x^i))]   $$

Where: $h(x) = \dfrac{1}{1+e^{-\theta^T x}}$, note $h^{'}(x) = h(x)(1-h(x))$

The gradient is:

$$  \nabla J(\theta) =  -X^T(y^i - h_{\theta}(x^i)) $$

In [5]:

class logistic_regression():
    def __init__(self, learning_rate, iteration):
        self.learning_rate = learning_rate
        self.iteration = iteration
    
    def _sigmoid(self, x):
        return 1/(1+np.exp(-x))
    
    def _entropy(self, theta, X, y):
        loss0 = -np.dot(y, np.log(self._sigmoid(X.dot(theta))))
        loss1 = -np.dot(1-y, np.log(1- self._sigmoid(X.dot(theta))))
        return loss0 + loss1
    
    def _scale(self, X):
        for i in range(len(a[0])):
            a[:, i] = (a[:, i] - min(a[:, i]))/ np.ptp(a[:, i])
        return X
    
    def fit(self, X, y):
        observations, dimensions = X.shape
        # initalize theta
        theta = np.zeros((dimensions, 1))  # (k+1)*1
        
        for _ in self.iteration:
            # computer error
            loss = self._entropy(theta, X, y)
            # computer gradient
            gradient = -X.T*(y- self._sigmoid(np.dot(X, theta)))
            # update theta
            theta -= self.learning_rate*gradient
        
        self.theta = theta
        return self.theta
    
    
    def predict(self, X):
        y_hat = self._sigmoid(np.dot(X, self.theta))
        return 1 if y_hat >= 1/2 else 0


    

2.718281828459045

## KNN

Time complexity: O(N\*K)+O(Nlog(N))

Space complexity: O(N)

How to find the optimal K:
- simple approach: $\sqrt{K}$
- CV

In [None]:
from collections import Counter
class KNN():
    def ___inite__(self, X=None, y=None):
        self.X = X
        self.y = y
    
    def _dist(self, x1, x2):
            diff = x1 - x2
            return np.dot(diff.T, diff)
    
    
    def fit(self, X, y):
        self.X = X
        self.y = y
    
    
    def preict(self, x, k):
        distance_label = [(self._dist(x, point), label) for (point, label) in zip(self.X, self.y)]
        neighbors = sorted(distance_label, key=lambda tup: tup[0])[:k]
            
        return Counter([label for _, label in neighbors]).most_common()[0][0]
            

## K-Means

Pesudocode:
1. intializz centroids

2. repeat until centroids remain same

     - compute distance of points to every centroid
     
     - label the point to nearest one
     
     - compute new centroids


Time complexity: O(N\*K\*I)

Space complexity: O(N+K)/ O(N)

In [7]:
import random
# 可以找空间内的任意点，而不是数据集中的点
def initial_centroids(data, k, seed=None):
    if seed:
        random.seed(seed)
    
    return random.sample(data, k)


In [14]:
import math
def dist(point1, point2):
    return (point1[0]-point2[0])^2+(point1[1]-point2[1])^2

def compute_and_label(data, centroids):
    labels = [-1]*len(data)
    
    for i, point in enumerate(data):
        min_dist = float(math.inf)
        
        for j, centr in enumerate(centroids):
            if dist(point, centr) <= min_dist:
                labels[i] = j
    
    return labels


In [None]:
# 中心点 不一定要是 数据点！！
def update_centroid(data, labels, k):
    new_centroid = [(0,0)]*k
    count = [0]*k

    for (point, label) in zip(data, labels):
        count[label] += 1
        new_centroid[label][0] += point[0]
        new_centroid[label][1] += point[1]
    
    for idx, (x,y) in enumerate(new_centroid):
        centr[0] = x/count[idx]
        centr[1] = y/count[idx]
        
    return new_centroid

In [15]:
import math
def should_stop(old, new, threshold=1e-5):
    total_movement = 0
    
    for (p1, p2) in zip(old, new):
        total_movement += math.sqrt(p1, p2)
    
    return total_movement <= threshold
    

In [16]:
def main(data, k, seed=None, threshold=1e-5):
    old_centroids = []
    new_centroids = initial_centroids(data, k, seed=seed)
    
    while True:
        labels = compute_and_label(data, new_centroid)
        old_centroid = new_centroid
        new_centroid = update_centroid(data, labels)
        
        if should_stop(old_centroid, new_centroid, threshold):
            break
            
    return labels  


## [Decision Tree](https://towardsdatascience.com/implementing-a-decision-tree-from-scratch-f5358ff9c4bb)

In [None]:
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        
    
    def is_leaf(self):
        return self.value is not None

In [None]:
class DecisionTree:
    def __init__(self, max_depth=100, min_samples_split=2):
        self.max_depth = max_depth
        self.min_sample_split = min_samples_split
        self.root = None
        
    
    def _is_finished(self, depth):
        if (depth > self.max_depth 
            or self.n_class_labels == 1
            or self.n_samples < self.min_sample_split):
            return True
        return False
    
    
    def _entropy(self, y):
        proportions = np.bincount(y)/len(y)
        entropy =  np.sum([ p*np.log2(p) for p in proportions if p >0])



    def _create_split(self, X, thresh):
        left_idx = np.argwhere(X<=thresh).flatten
        right_idx = np.argwhere(X>thresh).flatten
        return left_idx, right_idx



    def _information_gain(self, X, y, thresh):
        parent_loss = self._entropy(y)
        left_idx, right_idx = self._create_split(X, thresh)
        n, n_left, n_right = len(y), len(left_idx), len(right_idx)
        
        if n_left ==0 or n_right == 0:
            return 0
        
        child_loss = (n_left)/n*self._entropy(y[left_idx])  + (n_right)/n*self._entropy(y[right_idx])
        


    def _best_split(self, X, y, features):
        split = {"score": -1, "feature": None, "thresh": None}
        
        for feat in features:
            X_feat = X[:, feat]
            thresholds = np.unique(X_feat)
            for thresh in thresholds:
                score = self._information_gain(X_feat, y, thresh)
                
                if score > split("score"):
                    split["score"] = score
                    split["feat"] = feat
                    split["thresh"] = thresh
        
        return split["feat"], split["thresh"]
            
 
        
    def _build_tree(self, X, y, depth=0):
        self.n_samples, self.n_features = X.shape
        self.n_class_labels = len(np.unique(y))
        
        #stopping critieria
        if self._is_finished(depth):
            most_common_label = np.argmax(np.bincount(y))
            return Node(value=most_common_label)
        
        #get best split
        rnd_feats = np.random.choice(self.n_features, self.n_features, replace=False)
        best_feat, best_thresh = self._best_split(X, y, rnd_feats)
        
        #grow children recursively
        left_idx, right_idx = self._create_split(X[:, best_feat], best_thresh)
        left_child = self._build_tree(X[left_idx, :], y[left_idx], depth+1)
        right_child = self._build_tree(X[right_idx, :], y[right_idx], depth+1)
        
        return Node(best_feat, best_thresh, left_child, right_child)
    
    
    
    def _traverse_tree(self, x, node):
        if node.is_leaf():
            return node.value
        
        if x[node.feature] <= node.threshold:
            return self._traverse_tree(x, node.left)
        return self._traverse_tree(x, node.right)
     
    
     
    def fit(self, X, y):
        self.root = self._build_tree(X, y)



    def predict(self, X):
        predictions = [self._traverse_tree(x, self.root) for x in X]
        return np.array(predictions)