# Softmax Regression Example

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
class SoftmaxRegressionModel:
    def Y(self,y):
        m = len(y)
        k = len(np.unique(y))
        Y = np.zeros((m,k))
        for j,kk in enumerate(np.unique(y)):
            Y[y==kk,j] = 1
        return Y
    
    def y(self,Y):
        y = np.argmax(Y,axis=1)
        return y
    
#1. hypothesis function
    def h(self, X, theta):      
        hypothesis = np.exp(X@theta)/np.sum(np.exp(X@theta),axis=1)[:,np.newaxis]
        return hypothesis
        
#2. cost function
    def cost(self, X, y, theta, average = False):
        #expects X to be a design matrix, y to be a column vector and theta to be a column vector
        if(average == False):
            J = -np.sum(y*np.log(self.h(X,theta)))
        else:
            J = -np.sum(y*np.log(self.h(X,theta)))/(X.shape[0])
        return J

#3. gradient function
    def gradient(self, X, y, theta, average = False):
        if(average == False):
            dJ = -X.T@(y-self.h(X,theta))
        else:
            dJ = -X.T@(y-self.h(X,theta))/(X.shape[0])
        return dJ
    
#4. batch gradient descent
    def batch_gd(self, X, y, initial_theta, max_iteration, alpha, tolerance = 0,average = False):
        cost = []
        theta = initial_theta
        iteration = 0
        cost.append(self.cost(X,y,theta,average))
        for n in range(max_iteration):
            gradient = self.gradient(X,y,theta,average)
            theta = theta - alpha*gradient
            cost.append(self.cost(X,y,theta,average))
            iteration += 1
        cost = np.array(cost)
        return theta,cost,iteration
    
#5. predict
    def predict_Y(self,X,theta):
        prediction = self.h(X,theta)
        prediction = prediction/np.max(prediction,axis=1)[:,np.newaxis]
        prediction[prediction < 1] = 0
        return prediction
    
    def predict_y(self,X,theta):
        prediction = self.h(X,theta)
        y = self.y(prediction)
        return y
    
#6. score/error calculation
    def accuracy(self,y,y_pred):
        acc = np.sum(y == y_pred)/y.size
        return acc
    
#7. plotting cost
    def plot_cost(self,cost, iteration_no):
        iteration_series = np.arange(0,iteration_no+1)
        ax = plt.axes()
        ax.plot(iteration_series, cost)

In [3]:
def standardScaler(X):
    mean = np.mean(X,axis=0)
    std = np.std(X,axis=0)
    X_norm = (X-mean)/std
    return X_norm, mean, std

def standardUnScaler(X, mean, std):
    X_unscaled = X*std+mean
    return X_unscaled

In [None]:
def replaceKeys(series):
    series_dict = {}
    for i,u in enumerate(series.unique()):
        series_dict[u] = i
    return series.replace(series_dict), series_dict

data = pd.read_csv("gender_height_weight.csv") 

gender_dict = {}
data['Gender'], gender_dict = replaceKeys(data['Gender'])

In [None]:
#Split Data

X_all = (data[['Gender','Height','Weight']]).dropna().astype(float)
columns = X_all.columns

ix = np.arange(0,X_all.shape[0])
np.random.shuffle(ix)
percentage = 0.7

X_train = X_all[:int(percentage*X_all.shape[0])]
X_test = X_all[int(percentage*X_all.shape[0]):]

mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)
X_norm = (X_train-mean)/std
X_norm_test = (X_test-mean)/std
X_norm.insert(0,"Intercept",1)
X_norm_test.insert(0,"intercept",1)

print(X_norm.head())
print()
y = data['Index'].astype(float)
print(y.head())
y_train = y[:int(percentage*X_all.shape[0])]
y_test = y[int(percentage*X_all.shape[0]):]

m,n = X_norm.shape

k = y.unique().size

In [None]:
k = len(y.unique())

if (X_train.shape[1] == X.shape[1]): 
    X_train.insert(0, "intercept", 1)

# Reset m and n for training data

m, n = X_train.shape

# Initialize theta for each class  

In [None]:
SR = SoftmaxRegressionModel()

yy = SR.Y(y_train.values)

In [None]:
theta_initial = np.ones((n,k))

alpha = 5
iterations = 5000

theta,cost,iteration_no = SR.batch_gd(X_norm.values,yy,theta_initial,iterations,alpha,average=True)
SR.plot_cost(cost,iteration_no)

In [None]:
y_pred = SR.predict_y(X_norm_test.values,theta)

In [None]:
SR.accuracy(y_pred, y_test.values)