In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine

# Data Prep

In [None]:
dat = load_wine()
X_temp = pd.DataFrame(dat['data'], columns=dat['feature_names'])
y_temp = pd.Series(dat['target'])
y_temp = y_temp.rename('target')
X_temp.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [3]:
full_dat = pd.concat([X_temp,y_temp], axis=1)
full_dat = full_dat[full_dat['target'].isin([0,1])]
full_dat.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [4]:
full_dat.shape

(130, 14)

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [6]:
X = full_dat.drop(columns='target')
y = full_dat['target']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
standardize = StandardScaler()
X_train = standardize.fit_transform(X_train)
X_test = standardize.transform(X_test)

In [8]:
y_train.value_counts()

target
1    58
0    46
Name: count, dtype: int64

In [9]:
y_test.value_counts()

target
0    13
1    13
Name: count, dtype: int64

# Base Logistic Regression

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss

In [11]:
clf = LogisticRegression(penalty=None, max_iter=100_000, verbose=5, n_jobs=-1)
clf.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.


In [12]:
from sklearn.metrics import log_loss

y_train_preds_proba = clf.predict_proba(X_train)
loss = log_loss(y_train, y_train_preds_proba[:,1])
round(loss, 5)

0.00025

# Maximum Gradient Coordinate Descent

$$
\nabla{f(w)} = \sum_{i=1}^n \frac{-y^{(i)}x^{(i)}}{1+\exp(y^{(i)}w^T x^{(i)})}
$$

In [25]:
w_0 = np.zeros(X_train.shape[1])

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [29]:
numerator = (-y_train @ X_train)
denominator = 1 + np.exp((y_train * w_0.T) * X_train)

ValueError: operands could not be broadcast together with shapes (104,) (13,) 

In [15]:
numerator / denominator

array([ 20.88678612,   0.13063692,   8.27721288, -12.65425015,
        11.51454032,  13.38346666,  15.84164915,  -9.08597012,
         8.27757156,  18.93856729,   2.81122556,  11.1203019 ,
        22.35311923])

In [35]:
n = X_train.shape[0]
exp_term = np.exp(y_train * (X_train @ w_0))
grad = np.sum((-np.array(y_train)[:, np.newaxis] * X_train) / (1 + exp_term), axis=0)
grad

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 104 is different from 1)

In [17]:
grad[np.argmax(np.abs(grad))]

22.35311922866902

In [18]:
grad

array([ 20.88678612,   0.13063692,   8.27721288, -12.65425015,
        11.51454032,  13.38346666,  15.84164915,  -9.08597012,
         8.27757156,  18.93856729,   2.81122556,  11.1203019 ,
        22.35311923])

In [22]:
len(w_0)

104

In [20]:
def max_grad_logistic(w, X, y):
    numerator = (-y @ X)
    denominator = 1 + np.exp((y * w.T) @ X)
    grad = numerator / denominator
    max_grad = np.zeros(len(w))
    grad_val, idx = np.max(np.abs(grad)), np.argmax(np.abs(grad))
    max_grad[idx] = grad_val
    return max_grad

max_grad_logistic(w_0, X_train, y_train)



array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        , 22.35311923,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.  