### Soft Regression

主要用于处理多分类问题，其中任意两个类是线性可分的。

假设有$N$个训练样本$\{(X_1,y_1),(X_2,y_2),\cdots,(X_N,y_N)\}$，对于soft regression算法，输入特征为$X_i\in\mathcal{R}^{n+1}$，类标记为:$y_i\in\{0,1,\cdots,k\}$.

假设每一个样本估计所属的类概率为$P(y=j|X)$，假设函数为:
\begin{equation}
h_\theta(X_i)=\begin{bmatrix}P(y_i=1|X_i;\theta)\\P(y_i=2|X_i;\theta)\\\vdots\\P(y_i=k|X_i;\theta)\end{bmatrix}=\frac{1}{\sum_{j=1}^{k}e^{\theta_j^T X_i}}\begin{bmatrix}e^{\theta_1^T X_i}\\e^{\theta_2^T X_i}\\\vdots\\e^{\theta_k^T X_i}\end{bmatrix}
\end{equation}

其中$\theta$表示的向量，且$\theta_i\in\mathcal{R}^{n+1}$。每一个样本所属类的概率为$P(y_i=j|X_i;\theta)=\frac{e^{\theta_j^T X_i}}{\sum_{l=1}^{k}e^{\theta_l^T X_i}}$

损失函数用交叉熵$l(\theta)=-\frac{1}{N}[\sum_{i=1}^N\sum_{j=1}^kI(y_i=j)\ln\frac{e^{\theta_j^T X_i}}{\sum_{l=1}^{k}e^{\theta_l^T X_i}}]$，$I(y_i=j)$表示指示函数

损失函数的梯度表达式为:$\nabla_{\theta_j} l(\theta)=-\frac{1}{N}\sum_{i=1}^{N}[X_i\cdot (I(y_i=j)-P(y_i=j|X_i;\theta))]$

梯度下降法更新公式为:$\theta_j=\theta_j-\alpha\nabla_{\theta_j}l(\theta)$

In [52]:
# coding:UTF-8
import numpy as np
import os
cwd=os.getcwd()

In [53]:
#首先实现对于一个np.mat矩阵，不同元素值计数
def count_diff(mats):
    '''实现矩阵不同元素的计数
    input:  mats(mat)输入矩阵
    output: elements(int)不同元素的个数
    '''
    element,count=np.unique(np.array(mats.T),return_counts=True)
    return len(element)

In [54]:
#计算损失函数
def loss(expo,labels):
    '''计算损失函数值
    input:  expo(mat):概率exp因子
            labels(mat):标签值
    '''
    num=np.shape(expo)[0]
    loss_total=0
    for i in range(num):
        loss_total+=np.log(expo[i,labels[i,0]]/np.sum(expo[i,:]))
    return -(1/num)*loss_total

In [55]:
#梯度下降求解模型参数
def gradascent(features,labels,epochs,alpha):
    '''利用梯度下降法训练softmax模型
    input:  features(mat):特征
            labels(mat):标签
            epochs(int):迭代次数
            alpha(float):学习率
    output: weights(mat):权重
    '''
    num,n=np.shape(features)    #m样本个数，n特征个数
    k=count_diff(labels)        #labels里有k个类
    weights=np.mat(np.ones((n,k)))#初始权重
    i=0
    while i<=epochs:
        expo=np.exp(features*weights)
        if i%500==0:
            print("\t-----iter:",i,",loss:",loss(expo,labels))
        expo_sum=expo.sum(axis=1).repeat(k,axis=1)    #求和求完后变成(num,1)的列矩阵，所以需要复制k次
        expo=expo/expo_sum
        for j in range(num):
            expo[j,labels[j,0]]=1-expo[j,labels[j,0]]
        weights=weights-(alpha/num)*features.T*expo
        i+=1
    return weights

In [56]:
#导入数据
def load_data(filepath):
    '''
    input:  filepath(str)训练集文件路径
    output: features(mat)特征
            labels(mat)标签
    '''
    file=open(filepath)
    features=[]
    labels=[]
    for row in file.readlines():
        features_temp=[]
        features_temp.append(1)
        row=row.strip().split('\t')
        for i in range(len(row)-1):
            features_temp.append(float(row[i]))
        labels.append(int(row[-1]))
        features.append(features_temp)
    file.close()
    return np.mat(features),np.mat(labels).T

#保存数据
def save_model(filepath,weights):
    '''保存最后的模型
    input:  filepath(str):保存的路径
            weights(mat):保存的权重
    '''
    file=open(filepath,'w')
    n,k=np.shape(weights)
    for i in range(n):
        weights_temp=[]
        for j in range(k):
            weights_temp.append(str(weights))   #实际上这里就是单纯做了一个格式转换，把矩阵里的float转化成str，方便用open()方法存储
        file.write('\t'.join(weights_temp)+'\n')
    file.close()

#训练数据主函数
if __name__=='__main__':
    filename=os.path.join(cwd,'SoftInput.txt')
    print('1.loading')
    features,labels=load_data(filename)
    print('2.training')
    weights=gradascent(features=features,labels=labels,epochs=10000,alpha=0.4)
    print('3.saving')
    savename=os.path.join(cwd,'model')
    save_model(savename,weights=weights)

1.loading
2.training
	-----iter: 0 ,loss: 1.3862943611198926
	-----iter: 500 ,loss: nan
	-----iter: 1000 ,loss: nan


  expo=expo/expo_sum


	-----iter: 1500 ,loss: nan
	-----iter: 2000 ,loss: nan
	-----iter: 2500 ,loss: nan
	-----iter: 3000 ,loss: nan
	-----iter: 3500 ,loss: nan
	-----iter: 4000 ,loss: nan
	-----iter: 4500 ,loss: nan
	-----iter: 5000 ,loss: nan
	-----iter: 5500 ,loss: nan
	-----iter: 6000 ,loss: nan
	-----iter: 6500 ,loss: nan
	-----iter: 7000 ,loss: nan
	-----iter: 7500 ,loss: nan
	-----iter: 8000 ,loss: nan
	-----iter: 8500 ,loss: nan
	-----iter: 9000 ,loss: nan
	-----iter: 9500 ,loss: nan
	-----iter: 10000 ,loss: nan
3.saving
