# 线性回归代码实现

In [11]:
import sys
sys.path.append('..')

import numpy as np
from utils.features import prepare_for_training

In [10]:
class LinearRegression:
      
    def __init__(self, data, labels, polynomial_degree = 0, sinusoid_degree = 0, normalize_data = True):
        '''
        1. 数据预处理
        2. 得到特征个数
        3. 初始化参数矩阵
        '''
        (data_processed,
         features_mean,
         features_deviation) = prepare_for_training(data, polynomial_degree=0, sinusoid_degree=0, normalize_data=True)
        
        self.data = data_processed
        self.labels = labels
        self.features_mean = features_mean
        self.features_deviation = features_deviation
        self.polynomial_degree = polynomial_degree
        self.sinusoid_degree = sinusoid_degree
        self.normalize_data = normalize_data
        
        num_features = self.data.shape[1] #特征列
        self.theta = np.zeros((num_features, 1)) #theta的shape为(num_features, 1)，提前先转置
        
    def train(self, alpha, num_interations = 500):
        '''
        训练模块
        调用梯度下降
        '''
        cost_history = LinearRegression.gradient_descent(alpha, num_interations)
        return self.theta, cost_history
    
    def gradient_descent(self, alpha, num_interations):
        '''
        迭代模块
        '''
        cost_history = []
        for _ in range(num_interations):
            self.gradient_step(alpha)
            cost_history.append(self.cost_function(self.data, self.labels))
        return cost_history
            
    def gradient_step(self, alpha):
        '''
        参数更新计算方法，
        '''
        num_samples = self.data.shape[0]
        prediction = LinearRegression.hypothesis(self.data, self.theta)
        delta = prediction - self.labels
        theta = self.theta
        theta = theta - alpha * (1 / num_samples) * (np.dot(delta.T, self.data)).T #不转置矩阵计算不了
        self.theta = theta
        
    def cost_function(self, data, labels):
        num_examples = data.shapes[0]
        delta = LinearRegression.hypothesis(self.data, self.theta) - labels
        cost = (1/2) * np.dot(delta.T, delta) / num_examples #损失函数定义
        return cost[0][0]
    
    @staticmethod
    def hypothesis(data, theta):
        predictions = np.dot(data, theta)
        return predictions
    
    
    def get_cost(self, data, labels):
        data_processed = prepare_for_training(data, self.polynomial_degree, self.sinusoid_degree, self.normalize_data)[0]
        
        return self.cost_function(data_processed, labels)
    
    def predict(self, data):
        data_processed = prepare_for_training(data, self.polynomial_degree, self.sinusoid_degree, self.normalize_data)[0]
        
        predictions = LinearRegression.hypothesis(data_processed, self.theta)
        
        return predictions