In [64]:
import numpy as np
from scipy.special import expit
from scipy.optimize import fmin_bfgs
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [65]:
df = pd.read_csv('star_classification.csv')

In [66]:
df

Unnamed: 0,obj_ID,alpha,delta,u,g,r,i,z,run_ID,rerun_ID,cam_col,field_ID,spec_obj_ID,class,redshift,plate,MJD,fiber_ID
0,1.237661e+18,135.689107,32.494632,23.87882,22.27530,20.39501,19.16573,18.79371,3606,301,2,79,6.543777e+18,GALAXY,0.634794,5812,56354,171
1,1.237665e+18,144.826101,31.274185,24.77759,22.83188,22.58444,21.16812,21.61427,4518,301,5,119,1.176014e+19,GALAXY,0.779136,10445,58158,427
2,1.237661e+18,142.188790,35.582444,25.26307,22.66389,20.60976,19.34857,18.94827,3606,301,2,120,5.152200e+18,GALAXY,0.644195,4576,55592,299
3,1.237663e+18,338.741038,-0.402828,22.13682,23.77656,21.61162,20.50454,19.25010,4192,301,3,214,1.030107e+19,GALAXY,0.932346,9149,58039,775
4,1.237680e+18,345.282593,21.183866,19.43718,17.58028,16.49747,15.97711,15.54461,8102,301,3,137,6.891865e+18,GALAXY,0.116123,6121,56187,842
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,1.237679e+18,39.620709,-2.594074,22.16759,22.97586,21.90404,21.30548,20.73569,7778,301,2,581,1.055431e+19,GALAXY,0.000000,9374,57749,438
99996,1.237679e+18,29.493819,19.798874,22.69118,22.38628,20.45003,19.75759,19.41526,7917,301,1,289,8.586351e+18,GALAXY,0.404895,7626,56934,866
99997,1.237668e+18,224.587407,15.700707,21.16916,19.26997,18.20428,17.69034,17.35221,5314,301,4,308,3.112008e+18,GALAXY,0.143366,2764,54535,74
99998,1.237661e+18,212.268621,46.660365,25.35039,21.63757,19.91386,19.07254,18.62482,3650,301,4,131,7.601080e+18,GALAXY,0.455040,6751,56368,470


In [67]:
# Get rid of all rows which have class as GALAXY
# df = df[df['class'] != 'GALAXY']

In [68]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('class', axis=1), df['class'], test_size=0.2, random_state=42)

In [70]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# encode the labels:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(y_train)
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [71]:
class LogisticRegressionBase:
    def __init__(self, eta=0.1, iterations=20, C1=0.0001, C2 = 0.0001):
        self.eta = eta
        self.iters = iterations
        self.C1 = C1
        self.C2 = C2
    def __str__(self):
        if(hasattr(self, 'w_')):
            return 'Binary Logistic Regression Object with coefficients:\n'+ str(self.w_) # is we have trained the object
        else:
            return 'Untrained Binary Logistic Regression Object'
    @staticmethod
    def _add_bias(X):   
        return np.hstack((np.ones((X.shape[0], 1)), X))

    @staticmethod
    def _sigmoid(theta):
        return expit(theta)
    def _get_gradient(self, X, y):
        ydiff = y - self.predict_proba(X, add_bias = False).ravel()
        gradient = np.mean(X * ydiff[:,np.newaxis], axis=0)
        gradient = gradient.reshape(self.w_.shape)
        return gradient
    def _get_gradient_L2(self, X, y):
        gradient = self._get_gradient(X, y)
        gradient[1:] += -2 * self.w_[1:] * self.C2
        return gradient
    def _get_gradient_L1(self, X, y):
        gradient = self._get_gradient(X, y)
        l1_der = self.w_[1:] / np.abs(self.w_[1:])
        l1_der[self.w_[1:] == 0] = 0
        gradient[1:] +=  -1 * l1_der * self.C1
        return gradient
    def _get_gradient_elastic(self, X, y):
        gradient = self._get_gradient(X, y)
        l1_der = self.w_[1:] / np.abs(self.w_[1:])
        l1_der[self.w_[1:] == 0] = 0
        gradient[1:] +=  -1 * l1_der * self.C1
        gradient[1:] += -2 * self.w_[1:] * self.C2
        return gradient
    def predict_proba(self, X, add_bias=True):
        Xb = self._add_bias(X) if add_bias else X
        return self._sigmoid(Xb @ self.w_)

    def predict(self, X):
        return self.predict_proba(X) > 0.5
    def fit(self, X, y, regularization=None):
        Xb = self._add_bias(X)
        num_samples, num_features = Xb.shape
        # self.w_ = np.zeros((num_features, 1))
        self.w_ = np.random.uniform(-1, 1, (num_features, 1))
        for i in range(self.iters):
            if(regularization == 'L1'):
                grad = self._get_gradient_L1(Xb, y)
            elif(regularization == 'L2'):
                grad = self._get_gradient_L2(Xb, y)
            elif(regularization == 'elastic'):
                grad = self._get_gradient_elastic(Xb, y)
            else:
                grad = self._get_gradient(Xb, y)
            self.w_ += grad*self.eta

In [72]:
model = LogisticRegressionBase(eta=0.1, iterations=1000)
model.fit(X_train, y_train, regularization='elastic')
y_pred = model.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, y_pred))

Accuracy:  0.33585


In [None]:
class LogisticRegressionSGD(LogisticRegressionBase):
    def _get_gradient(self, X, y):
        sample = int(np.random.rand()  * len(y))
        ydiff = y[sample] - self.predict_proba(X[sample],add_bias=False)
        gradient = X[sample] * ydiff[:, np.newaxis]
        gradient = gradient.reshape(self.w_.shape)

        return gradient

In [None]:
class LogisticRegressionNewtons(LogisticRegressionBase):
    def _get_gradient(self, X, y):
        g = self.predict_proba(X, add_bias=False).ravel()
        hessian = X.T @ np.diag(g * (1-g)) @ X
        ydiff = y - g
        gradient = np.sum(X * ydiff[:, np.newaxis], axis=0)
        gradient = gradient.reshape(self.w_.shape)
        return np.linalg.pinv(hessian) @ gradient
    def _get_gradient_L1(self, X, y):
        g = self.predict_proba(X, add_bias=False).ravel()
        hessian = X.T @ np.diag(g * (1-g)) @ X # the second derivative of abs(x) evaluates to 0 so our hessian will simply be the one for the ordinary log likelihood
        ydiff = y - g
        gradient = np.sum(X @ ydiff[:, np.newaxis], axis=0)
        gradient = gradient.reshape(self.w_.shape)
        l1_der = self.w_[1:] / np.abs(self.w_[1:])
        l1_der[self.w_[1:] == 0] = 0
        gradient[1:] += -1 * l1_der[1:] * self.C1
        return np.linalg.pinv(hessian) @ gradient
    def _get_gradient_L2(self, X, y):
        g = self.predict_proba(X, add_bias=False).ravel()
        hessian = X.T @ np.diag(g * (1-g)) @ X - 2 * self.C2
        ydiff = y - g
        gradient = np.sum(X * ydiff[:, np.newaxis], axis=0)
        gradient = gradient.reshape(self.w_.shape)
        gradient[1:] += -2 * self.w_[1:] * self.C2
        return np.linalg.pinv(hessian) @ gradient
    def _get_gradient_elastic(self, X, y):
        g = self.predict_proba(X, add_bias=False).ravel()
        hessian = X.T @ np.diag(g * (1-g)) @ X - 2 * self.C2
        ydiff = y - g
        gradient = np.sum(X @ ydiff[:, np.newaxis], axis=0)
        gradient = gradient.reshape(self.w_)
        l1_der = self.w_ / np.abs(self.w_)
        gradient[1:] += -1 * l1_der[1:] * self.C1
        gradient[1:] += -2 * self.w_[1:] * self.C2
        return np.linalg.pinv(hessian) @ gradient


In [None]:
model = LogisticRegressionNewtons(eta=0.1, iterations=10)
model.fit(X_train, y_train, regularization='L2')
y_pred = model.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, y_pred))

In [87]:
class LogisticRegression:
    def __init__(self, eta=.1, iters=10, C1=.001, C2=.0001, solver="default", regularization=None):
        self.eta = eta
        self.iters = iters
        self.C1 = C1
        self.C2 = C2
        self.solver = solver
        self.classifiers = []
        self.regularization = regularization

    def __str__(self):
        if(hasattr(self,'w_')):
            return 'MultiClass Logistic Regression Object with coefficients:\n'+ str(self.w_) # is we have trained the object
        else:
            return 'Untrained MultiClass Logistic Regression Object'

    def fit(self, X, y):
        # Get number of unique values of y
        unique_classes = np.unique(y)
        unique_classes.sort()
        for target in unique_classes:
            # Transform the data into binary classification, the taget class vs the rest
            y_binary = np.where(y == target, 1, 0)
            if self.solver == "default":
                model = LogisticRegressionBase(iterations=self.iters, eta=self.eta, C1=self.C1, C2=self.C2)
            elif self.solver == "sgd":
                model = LogisticRegressionSGD(iterations=self.iters, eta=self.eta, C1=self.C1, C2=self.C2)
            elif self.solver == "newton":
                model = LogisticRegressionNewtons(iterations=self.iters, eta=self.eta, C1=self.C1, C2=self.C2)
            model.fit(X, y_binary, regularization=self.regularization)
            self.classifiers.append(model)
        self.w_ = np.hstack([x.w_ for x in self.classifiers]).T
    def predict_proba(self, X):
        probs = []
        for model in self.classifiers:
            probs.append(model.predict_proba(X).reshape(len(X), 1))
        return np.hstack(probs)

    def predict(self, X):
        probs = self.predict_proba(X)
        return np.argmax(probs, axis=1)


In [145]:
model = LogisticRegression(iters=150000, eta=1, solver="sgd", regularization="l2")
model.fit(X_train, y_train)
y_hat = model.predict(X_test)

In [146]:
# Calculate accuracy of y_hat vs y_test with sklearn
print('Accuracy: ', accuracy_score(y_test, y_hat))

Accuracy:  0.9948218468746147


In [134]:
from sklearn.linear_model import LogisticRegression as SKLogisticRegression
SKmodel = SKLogisticRegression(solver='lbfgs', penalty='l2')
SKmodel.fit(X_train, y_train)
print(accuracy_score(y_true=y_test, y_pred=SKmodel.predict(X_test)))

0.9944519787942301
