# handwritten digits verification

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
data_tes = pd.read_csv('/content/drive/MyDrive/ee6180_project/optdigits.tes', header=None)
data_tra = pd.read_csv('/content/drive/MyDrive/ee6180_project/optdigits.tra', header=None)
data = pd.concat([data_tra, data_tes])
data = data.sample(frac=1)
data.head()

In [5]:
def get_cor(y, yhat, zeros=True, normalize=False):
  if zeros:
    yhat = 2*yhat - 1
    y = 2*y - 1
    return np.inner(y, yhat)/yhat.shape[0] if normalize else np.inner(y, yhat)
  return np.inner(y, yhat)/yhat.shape[0] if normalize else np.inner(y, yhat)

def normalize(X, eps=1e-10):
  """
  X is n X d matrix. Make each coordinate in range [0,1].
  """
  n, d = X.shape
  Y = np.zeros_like(X, dtype=np.float32)
  for i in range(d):
    Y[: ,i] = (X[:, i] - X[:, i].min())/(X[:, i].max() - X[:, i].min() + eps)
  
  return Y

def get_gamma(wl_cor, h_cor, regret):
  e_wl = np.mean(np.asarray(wl_cor))
  e_h = np.mean(np.asarray(h_cor))
  return (e_wl+regret)/(e_h)

In [None]:
data_sample = data.sample(frac = 1)
X, y = data_sample.to_numpy()[:,:-1], data_sample.to_numpy()[:,-1]
print(X.shape, y.shape)

(5620, 64) (5620,)


In [None]:
X = normalize(X) # simple normalization
# y = np.logical_or(np.logical_or(y==3, y==6), np.logical_or(y==8, y==9) ) # multiclass to binary
y = y % 2 
clf = LogisticRegression(random_state=0 ,fit_intercept=True).fit(X, y) 
yhat = clf.predict(X)
get_cor(y, yhat, normalize=True)

0.8622775800711744

Plan : We will have 64 online weak learners corresponding to each coordinate of X. These weak learners are Hedge algorithms. Each instance of Hedge has access to 17 experts corresponding to the fact that each coordinate of X can take 17 values (0-16). Now, we will observe an data point (x,y) and we are interested to choose a threshold above which the weak learner would predict x.

In [6]:
import math
import random
import matplotlib.pyplot as plt

def categorical_draw(probs):
  z = random.random()
  cum_prob = 0.0
  for i in range(len(probs)):
    prob = probs[i]
    cum_prob += prob
    if cum_prob > z:
      return i

  return len(probs) - 1

class Hedge():
  def __init__(self, n_expts, weights, eta=1):
    self.eta = eta
    self.n_expts = n_expts
    self.weights = weights
    self.bounds = (np.arange(self.n_expts) + 0.5)/(self.n_expts - 1)

  def initialize(self):
    self.weights = np.ones(self.n_expts) * (1./self.n_expts)

  def get_normalized_weights(self):
    probs = self.weights / np.sum(self.weights)
    return probs

  def choose_expt(self):
    probs = self.weights / np.sum(self.weights)
    return categorical_draw(probs)
  
  def play(self, x, y):
    # bounds = np.arange(17) + 0.5
    # bounds = bounds / 16.0 # normalizing bounds
    preds = np.asarray(x < self.bounds, dtype=np.float32)
    score = preds == y
    costs = 1 - score
    update = np.exp( -self.eta * costs)
    self.weights = self.weights * update
    self.weights = self.weights / np.sum(self.weights)
  
  def predict(self, x):
    expt = self.choose_expt()
    y_pred = int(x < self.bounds[expt])
    return y_pred
    # return np.asarray([x < (expt + 0.5)/16.0]).astype(np.float32)

In [None]:
n_expts = 17
T = 5000
eta = np.sqrt(np.log(n_expts)/T)
hedge = Hedge(n_expts=n_expts, weights=None, eta=eta)
hedge.initialize()

In [None]:
threshold = 1.0 # this is the fraction of data points whose labels are intact
X_ = np.random.randint(0,17,size=T)/16.0
y_ = np.asarray(X_ < (5.5/16.0), dtype=np.float32)
mask = np.random.random(size=T)
mask = mask < threshold
y_ = (y_*mask).astype(np.float32)

In [None]:
hedge.initialize()
preds, chosen_expts = [], []
for i in range(X_.shape[0]):
  chosen_expts.append(hedge.choose_expt())
  preds.append(hedge.predict(X_[i])) # predict y_hat
  hedge.play(X_[i], y_[i]) # update weights
  if (i+1) % 1000 == 0:
    print(i+1, hedge.weights)

In [None]:
plt.plot(hedge.weights)

In [None]:
best_expt, best_score = 0, 0
for i in range(17):
  j = (i+0.5)/16.0
  score = np.sum((X_ < j) == y_)/T
  if score > best_score:
    best_expt = i
    best_score = score

print(best_score, best_expt)

In [None]:
best_expt_preds = X_ < hedge.bounds[best_expt]
best_expt_acc = best_expt_preds.flatten() == y_

In [None]:
preds = np.asarray(preds)
acc = preds.flatten() == y_
cum_acc = np.cumsum(acc)
cum_best_expt_acc = np.cumsum(best_expt_acc)
regret = cum_best_expt_acc - cum_acc # regret
plt.xlabel(r't$\rightarrow$')
plt.ylabel(r'regret$\rightarrow$')
plt.grid(which='both')
plt.plot(regret, label='actual regret')
plt.plot(np.sqrt(2*np.arange(T)*np.log(n_expts)), label='theoretical regret')
plt.legend()

In [None]:
trend_of_expts = np.zeros((n_expts, T))
trend_of_expts[chosen_expts, np.arange(T)] = 1
trend_of_expts = np.cumsum(trend_of_expts, axis=1)
trend_of_expts = trend_of_expts / np.arange(1, T+1)
plt.figure(figsize=(12, 9))
plt.grid(which='both')
plt.xlabel(r't$\rightarrow$')
plt.ylabel(r'fraction$\rightarrow$')
for i in range(trend_of_expts.shape[0]):
  plt.plot(trend_of_expts[i], label='expt_'+str(i))
  plt.legend()

In [None]:
trend_of_expts[5,-1]

In [None]:
hedge.predict(4/16.0)

Estimating gamma

Sample T = 5000 and pass them in arbitrary order in get empirical estimation of expectation.  We use $R_W(T)=\sqrt{T\log N}$. Then calculate $\gamma$ as:
$$\gamma\leq\frac{E[<W(x_t),y_t>]+R_W}{E[<h(x_t),y_t>]}$$

In [None]:
wl = Hedge(n_expts=n_expts, weights=None, eta=eta)
wl.initialize()
wl.weights

In [None]:
X.shape

(5620, 64)

In [7]:
from tqdm import tqdm
n_iterations = 100

def estimate_gamma(X, y, T, n_expts, wl, n_iterations):
  n, d = X.shape
  wl_cor, h_cor, gammas = [], [], []
  theoretical_regret = np.sqrt(2*T*np.log(n_expts))
  final_weights = np.zeros((d, n_expts)) # a 64*17 dim matrix to store the final weights of 64 weak learners
  for wl_no in tqdm(range(d)): # choose the dimension along which you require the weak learner
    assert X.max() <= 1
    assert X.min() >= 0
    cum_weights = np.zeros(n_expts)
    for _ in range(n_iterations):
      start = np.random.choice(n - T - 1)
      X_wl, y_sampled = X[start:start + T, wl_no], y[start:start + T]
      X_lr = X[start:start+T]
      wl.initialize()
      preds = []
      for i in range(T):
        preds.append(wl.predict(X_wl[i])) # predict y_hat
        wl.play(X_wl[i], y_sampled[i]) # update weights

      preds = np.asarray(preds).flatten()
      wl_cor.append(get_cor(y_sampled, preds))
      # print(X_lr.shape)
      # import pdb; pdb.set_trace()
      clf = LogisticRegression(random_state=0 ,fit_intercept=True).fit(X_lr, y_sampled) 
      yhat = clf.predict(X_lr)
      h_cor.append(get_cor(y_sampled, yhat))
      cum_weights += wl.weights
    final_weights[wl_no] = cum_weights / n_iterations
    gammas.append(get_gamma(wl_cor, h_cor, theoretical_regret))
  return final_weights, gammas

In [None]:
# from sklearn.utils.testing import ignore_warnings
# from sklearn.exceptions import ConvergenceWarning
# from warnings import simplefilter
# simplefilter(action='ignore')
final_weights, gammas = estimate_gamma(X = X, y = y, T = T, n_expts = n_expts, wl = wl, n_iterations = n_iterations)
gammas = np.asarray(gammas)
np.save('/content/drive/MyDrive/ee6180_project/final_weights_digits', final_weights)
np.save('/content/drive/MyDrive/ee6180_project/gammas_digits', gammas)
print(gammas.max(), gammas.min(), gammas.mean())

In [None]:
# gammas = np.asarray(gammas)
# np.save('final_weights', final_weights)
# np.save('gammas', gammas)
# print(gammas.max(), gammas.min(), gammas.mean())

In [8]:
class OnlineConvexOptimizer():
  """
  Online Gradient Descent according to 
  Elad Hazan. Introduction to online convex optimization. Foundations and Trends in Optimization, 2(3-4):157–325, 2016., Chapter 3.1
  """
  def __init__(self, D=2, gamma=0.02):
    self.D = D
    self.gamma = gamma
    self.G = 2./self.gamma
    self.iter = 0
  
  def initialize(self):
    self.iter = 0

  def project(self, x):
    """
    Project to [-1,1] i.e. the convex set K upon which are projecting.
    """
    if x < -1:
      return -1
    if x > 1:
      return 1
    return x
  
  def step(self, x, f):
    self.iter = self.iter + 1
    stepsize = self.D/(self.G*np.sqrt(self.iter))
    y = x - stepsize * f(1)
    x = self.project(y)
    return x

In [None]:
def f(x):
  return x * np.random.random()
  
oco = OnlineConvexOptimizer()
x = 0
for _ in range(100):
  x_ = oco.step(x, f)
  print(oco.iter, x, f(x), x_)
  x = x_

In [9]:
class OnlineBooster():
  """
  weak_learners: A dictionary of N weak learners.
  T: time horizon
  gamma: AWOL parameter, in our case, it is set as 0.02
  oco: online convex optimizer (we use OGD)
  """
  def __init__(self, T=5000, gamma=0.02, weak_learners=None, oco=None, best_wl=-1):
    self.weak_learners = weak_learners
    self.T = T
    self.N = len(weak_learners)
    self.gamma = gamma
    self.oco = oco
    self.grads = []
    self.best_wl = best_wl
  
  def weak_learners_initialize(self):
    for algo in self.weak_learners:
      self.weak_learners[algo].initialize()
  
  def randomized_project(self, x):
    if np.abs(x) >= 1:
      return np.sign(x)
    p1 = 0.5 * (1 + x)
    p2 = 0.5 * (1 - x)
    z = np.random.choice(np.asarray([1, -1]), p=[p1, p2])
    return z

  def randomized_label(self, y, p):
    # y in {0,1}
    z = random.random()
    if z < 0.5 * (1 + p):
      return y
    return 1 - y

  def booster_predict(self, x):
    preds = []
    # assert x.shape[0] == self.N
    x_tilda = x[i] if self.best_wl == -1 else x[self.best_wl]
    for i in range(self.N):
      p = self.weak_learners[i].predict(x_tilda)
      p = np.sign(p - 0.5) # To make predictions in {-1,+1}.
      preds.append(p)
    
    yhat = np.asarray(preds).mean()/self.gamma
    yhat = self.randomized_project(yhat)
    return yhat

  def update(self, x, y):
    """
    x: 64-dim input, y in {0,1}
    """
    for i in range(self.N):
      if i == 0:
        p_ti = 0.0
      else:
        p_ti = self.oco.step(p_ti, l_ti) # Not sure here
      
      x_tilda = x[i] if self.best_wl == -1 else x[self.best_wl]
      W_xt = np.sign(self.weak_learners[i].predict(x_tilda) - 0.5) # To make W(x_t) in {-1,+1}
      l_ti = lambda t: t * ((( W_xt * y) / self.gamma) - 1) # Not sure here also
      self.grads.append(np.abs(l_ti(1))) # To compute G = max |grad(f(x))|
      y_random = self.randomized_label(y, p_ti)
      self.weak_learners[i].play(x_tilda, y_random)
  
  def run(self, X, y):
    yhat_list = []
    for t in tqdm(range(self.T)):
      self.oco.initialize()
      xt, yt = X[t], y[t]
      yhat = self.booster_predict(xt)
      yhat_list.append(yhat)
      self.update(xt, yt)
      # print(self.weak_learners[10].weights)
    
    return np.asarray(yhat_list)

In [None]:
gammas = np.load('/content/drive/MyDrive/ee6180_project/gammas_digits.npy')
final_weights = np.load('/content/drive/MyDrive/ee6180_project/final_weights_digits.npy')

In [None]:
n, d = X.shape
n

5620

In [None]:
best_expt = gammas.argmax()
best_expt

62

In [11]:
def load_weights(final_weights, best_expt=-1, n_wl=100):
  weak_learners = dict()
  if best_expt == -1:
    n_wl = final_weights.shape[0]
    for i in range(n_wl): # change back to final_weights.shape[0] for warm starts
      weak_learners[i] = Hedge(n_expts=n_expts, weights=final_weights[i], eta=eta)
  else:
    for i in range(n_wl): # change back to final_weights.shape[0] for warm starts
      weak_learners[i] = Hedge(n_expts=n_expts, weights=final_weights[best_expt], eta=eta)

  return weak_learners
# weak_learners = load_weights(final_weights, best_expt=best_expt, n_wl = 500)
# weak_learners[10].weights

In [None]:
# gamma = gammas.min()
gamma = gammas[best_expt]
gamma

0.11266051416452608

In [None]:
K = 50
n_wl = 500
results = np.zeros((K, 2))
for k in range(K):
  start = np.random.choice(n - T - 1)
  X_sampled, y_sampled = X[start:start + T], y[start:start + T]
  # Keep multiple instance of best weak learner instead of keeping one wrt each dim else best_expt = -1
  oco = OnlineConvexOptimizer(gamma = gamma)
  weak_learners = load_weights(final_weights, best_expt=best_expt, n_wl = n_wl) 
  oco.initialize()
  booster = OnlineBooster(weak_learners = weak_learners, oco = oco, gamma = gamma, T = T, best_wl=best_expt)
  yhat_list = booster.run(X_sampled, y_sampled)

  clf = LogisticRegression(random_state=0 ,fit_intercept=True).fit(X_sampled, y_sampled)
  yhat_lr = clf.predict(X_sampled)
  h_star_cor = get_cor(y_sampled, yhat_lr, normalize=True)
  y_temp = np.sign(y_sampled - 0.5)
  pred_cor = get_cor(y_temp, yhat_list, zeros=False, normalize=True)
  results[k, 0] = h_star_cor
  results[k, 1] = pred_cor
  print(h_star_cor, pred_cor)

In [None]:
avg = np.mean(results[:,0] - results[:,1])
avg

0.6893520000000001

In [None]:
G = 2./gamma
D = 2
N = booster.N # change back to X.shape[1]

In [None]:
G = min(G, max(booster.grads))

In [None]:
theoretical_regret = np.sqrt(2*T*np.log(n_expts))
t1 = (theoretical_regret)/(gamma * T)
t2 = (1.5 * G * np.sqrt(N))/N
t1 + t2

0.9613291824633509

In [None]:
t1

0.2988119117046618

In [None]:
t2

0.662517270758689

In [None]:
1/gamma-1-G

In [None]:
booster.weak_learners[0].weights-booster.weak_learners[9].weights

## **New dataset: ISOLET**

In [23]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
data_tes = pd.read_csv('/content/drive/MyDrive/ee6180_project/isolet1+2+3+4.data', header=None)
data_tra = pd.read_csv('/content/drive/MyDrive/ee6180_project/isolet5.data', header=None)
data = pd.concat([data_tra, data_tes])
data = data.sample(frac=1)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617
225,-0.7872,-0.4604,-0.2128,-0.1218,0.1142,0.4738,0.7544,0.8974,0.8994,0.998,1.0,0.737,0.4778,0.2882,0.1508,0.1702,0.1528,0.118,0.12,0.3094,0.3578,0.5996,0.7794,0.7912,0.737,0.6034,0.5764,0.4564,0.3636,0.3404,0.2438,0.1606,-0.5944,-0.2826,-0.0488,0.0062,0.1266,0.5944,0.7024,1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-0.496,-0.1382,-0.3008,-0.2358,-0.2846,-0.2846,-0.1382,0.0082,0.4472,0.7236,1.0,0.6098,0.7886,0.8048,0.4634,0.5122,0.756,0.7724,0.2196,0.1708,0.3496,0.3008,0.3658,0.3658,0.2358,-0.0406,-0.2682,-0.0244,0.3496,0.4146,-0.0244,-0.3658,9.0
169,-0.3928,-0.0532,0.2796,0.3414,-0.2144,-0.3756,-0.5472,-0.554,-0.5608,-0.6604,-0.65,-0.482,-0.4476,-0.4134,-0.3962,-0.2624,-0.1458,-0.0772,0.0258,0.3208,0.5438,0.6706,0.9142,0.9074,1.0,0.9656,0.9862,0.7976,0.6124,0.657,0.6638,0.6192,-0.4518,-0.1506,0.271,0.3222,-0.1596,-0.3704,-0.4278,-0.512,...,-1.0,1.0,-0.6,-0.4,-1.0,0.2334,1.0,-0.1126,0.2536,0.493,0.6338,0.3098,0.1268,0.2676,0.2676,0.5212,0.6338,0.5774,0.6478,1.0,0.9436,0.5634,0.507,0.2394,0.3944,0.4508,0.6198,0.3944,0.4788,0.3662,0.338,0.2112,0.5352,0.6056,0.2816,0.3662,0.1268,0.169,-0.2394,7.0
2838,-0.3626,0.1838,0.5376,0.352,0.0438,-0.387,-0.5306,-0.7058,-0.6778,-0.6952,-0.6148,-0.5762,-0.2714,-0.0542,0.31,0.7268,0.937,0.867,0.6498,0.6918,0.916,0.9194,0.6812,0.5866,0.4046,0.5342,0.6392,0.7934,0.979,1.0,0.9474,0.5832,-0.3414,0.3622,0.6812,0.3102,0.0918,-0.4108,-0.435,-0.6118,...,-1.0,1.0,-1.0,-1.0,-1.0,0.0334,-1.0,-0.3704,-0.2962,-0.3888,-0.074,-0.074,-0.2038,-0.0926,0.037,0.7408,0.7962,1.0,0.8888,0.6482,0.7408,0.6666,0.2408,0.037,0.1482,0.1112,0.1112,-0.2592,0.2038,0.2408,0.2778,0.2778,0.1852,0.2592,0.2592,0.1482,-0.2038,-0.1482,-0.5,16.0
674,-0.097,0.4132,0.8878,1.0,0.3062,-0.2704,-0.2296,-0.4286,-0.449,-0.3878,-0.4286,-0.1836,-0.1888,-0.0358,0.3572,0.4846,0.7346,0.6888,0.8316,0.6224,0.5102,0.602,0.796,0.9184,0.903,0.5052,0.3674,0.1734,0.0918,0.0918,0.0612,0.2654,-0.2442,0.2314,0.7538,0.8854,0.4522,-0.0574,-0.1848,-0.3334,...,1.0,-1.0,1.0,-1.0,0.2,0.6,-1.0,0.0736,0.3088,0.0736,0.4558,0.2942,0.3236,0.2352,0.1176,0.3236,0.3824,0.5294,0.5294,0.9706,1.0,0.5588,0.6764,0.7058,0.6176,0.25,0.5736,0.7352,0.6176,0.6324,0.6324,0.5148,0.1324,0.1618,0.0736,0.0,0.4412,0.1618,0.0294,26.0
2398,-0.3422,0.4112,0.3184,0.0848,-0.5624,-0.7586,-0.687,-0.7188,-0.984,-0.9576,-0.756,-0.5438,-0.4006,-0.2388,0.0026,0.3872,0.9258,1.0,0.7746,0.6074,0.6552,0.7188,0.7108,0.4536,0.236,0.1168,0.2042,0.4376,0.6578,0.6312,0.2838,0.0982,-0.3768,0.3116,0.2996,0.1474,-0.5434,-0.7682,-0.7632,-0.7078,...,-1.0,1.0,-0.6,-1.0,-0.8,-0.6666,-1.0,0.123,0.3538,0.5076,0.5384,0.1538,0.2462,0.123,0.3692,1.0,0.6,0.5538,0.9384,0.723,0.8,0.4462,0.5692,0.477,0.0924,0.123,0.0308,-0.1692,0.0,0.277,0.323,0.4462,0.3846,0.4154,0.1538,0.1692,0.123,0.2308,0.1076,4.0


In [24]:
data_sample = data.sample(frac = 1)
X, y = data_sample.to_numpy()[:,:-1], data_sample.to_numpy()[:,-1]
print(X.shape, y.shape)

(7797, 617) (7797,)


In [25]:
X = normalize(X)
y = y % 2

In [26]:
clf = LogisticRegression(random_state=0 ,fit_intercept=True).fit(X, y) 
yhat = clf.predict(X)
get_cor(y, yhat, normalize=True)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


0.7273310247531102

In [27]:
n_expts = 17
T = 7500
eta = np.sqrt(np.log(n_expts)/T)

In [28]:
wl = Hedge(n_expts=n_expts, weights=None, eta=eta)
wl.initialize()
wl.weights

array([0.05882353, 0.05882353, 0.05882353, 0.05882353, 0.05882353,
       0.05882353, 0.05882353, 0.05882353, 0.05882353, 0.05882353,
       0.05882353, 0.05882353, 0.05882353, 0.05882353, 0.05882353,
       0.05882353, 0.05882353])

In [None]:
n_iterations = 10
final_weights, gammas = estimate_gamma(X = X, y = y, T = T, n_expts = n_expts, wl = wl, n_iterations = n_iterations)
gammas = np.asarray(gammas)
np.save('/content/drive/MyDrive/ee6180_project/final_weights_isolet', final_weights)
np.save('/content/drive/MyDrive/ee6180_project/gammas_isolet', gammas)
print(gammas.max(), gammas.min(), gammas.mean())

In [29]:
gammas = np.load('/content/drive/MyDrive/ee6180_project/gammas_isolet.npy')
final_weights = np.load('/content/drive/MyDrive/ee6180_project/final_weights_isolet.npy')

In [32]:
gamma = gammas.max()
best_expt = gammas.argmax()
best_expt

27

In [33]:
gamma

0.11160698916778682

In [34]:
n, d = X.shape
d

617

In [None]:
K = 4
n_wl = d # using 617 instances of weak learners
results = np.zeros((K, 2))
for k in range(K):
  start = np.random.choice(n - T - 1)
  X_sampled, y_sampled = X[start:start + T], y[start:start + T]
  # Keep multiple instance of best weak learner instead of keeping one wrt each dim else best_expt = -1
  oco = OnlineConvexOptimizer(gamma = gamma)
  weak_learners = load_weights(final_weights, best_expt=best_expt, n_wl = n_wl) 
  oco.initialize()
  booster = OnlineBooster(weak_learners = weak_learners, oco = oco, gamma = gamma, T = T, best_wl=best_expt)
  yhat_list = booster.run(X_sampled, y_sampled)

  clf = LogisticRegression(random_state=0 ,fit_intercept=True).fit(X_sampled, y_sampled)
  yhat_lr = clf.predict(X_sampled)
  h_star_cor = get_cor(y_sampled, yhat_lr, normalize=True)
  y_temp = np.sign(y_sampled - 0.5)
  pred_cor = get_cor(y_temp, yhat_list, zeros=False, normalize=True)
  results[k, 0] = h_star_cor
  results[k, 1] = pred_cor
  print(h_star_cor, pred_cor)


  0%|          | 0/7500 [00:00<?, ?it/s][A
  0%|          | 2/7500 [00:00<06:58, 17.91it/s][A
  0%|          | 4/7500 [00:00<07:06, 17.56it/s][A
  0%|          | 6/7500 [00:00<06:58, 17.90it/s][A
  0%|          | 8/7500 [00:00<07:15, 17.20it/s][A
  0%|          | 10/7500 [00:00<07:12, 17.33it/s][A
  0%|          | 12/7500 [00:00<07:23, 16.88it/s][A
  0%|          | 14/7500 [00:00<07:18, 17.09it/s][A
  0%|          | 16/7500 [00:00<07:12, 17.30it/s][A
  0%|          | 18/7500 [00:01<07:01, 17.73it/s][A
  0%|          | 20/7500 [00:01<07:07, 17.52it/s][A
  0%|          | 22/7500 [00:01<07:06, 17.52it/s][A
  0%|          | 24/7500 [00:01<07:35, 16.42it/s][A
  0%|          | 26/7500 [00:01<07:19, 17.02it/s][A
  0%|          | 28/7500 [00:01<07:13, 17.23it/s][A
  0%|          | 30/7500 [00:01<07:05, 17.55it/s][A
  0%|          | 32/7500 [00:01<07:13, 17.22it/s][A
  0%|          | 34/7500 [00:01<07:16, 17.10it/s][A
  0%|          | 36/7500 [00:02<07:17, 17.06it/s][A
  1%|

0.7328 0.0496



  0%|          | 0/7500 [00:00<?, ?it/s][A
  0%|          | 2/7500 [00:00<07:10, 17.42it/s][A
  0%|          | 4/7500 [00:00<07:07, 17.55it/s][A
  0%|          | 6/7500 [00:00<07:20, 17.00it/s][A
  0%|          | 8/7500 [00:00<07:11, 17.36it/s][A
  0%|          | 10/7500 [00:00<06:59, 17.85it/s][A
  0%|          | 12/7500 [00:00<07:01, 17.76it/s][A
  0%|          | 14/7500 [00:00<07:03, 17.68it/s][A
  0%|          | 16/7500 [00:00<06:58, 17.89it/s][A
  0%|          | 18/7500 [00:01<07:00, 17.78it/s][A
  0%|          | 20/7500 [00:01<06:57, 17.90it/s][A
  0%|          | 22/7500 [00:01<06:49, 18.28it/s][A
  0%|          | 24/7500 [00:01<07:01, 17.75it/s][A
  0%|          | 26/7500 [00:01<06:59, 17.83it/s][A
  0%|          | 28/7500 [00:01<07:14, 17.20it/s][A
  0%|          | 30/7500 [00:01<07:07, 17.49it/s][A
  0%|          | 32/7500 [00:01<06:58, 17.86it/s][A
  0%|          | 34/7500 [00:01<06:49, 18.23it/s][A
  0%|          | 36/7500 [00:02<06:52, 18.07it/s][A
  1%|

0.7269333333333333 0.04853333333333333



  0%|          | 0/7500 [00:00<?, ?it/s][A
  0%|          | 2/7500 [00:00<07:15, 17.21it/s][A
  0%|          | 4/7500 [00:00<07:36, 16.40it/s][A
  0%|          | 6/7500 [00:00<07:25, 16.81it/s][A
  0%|          | 8/7500 [00:00<07:26, 16.77it/s][A
  0%|          | 10/7500 [00:00<07:45, 16.08it/s][A
  0%|          | 12/7500 [00:00<08:10, 15.26it/s][A
  0%|          | 14/7500 [00:00<07:52, 15.86it/s][A
  0%|          | 16/7500 [00:01<07:57, 15.66it/s][A
  0%|          | 18/7500 [00:01<07:44, 16.09it/s][A
  0%|          | 20/7500 [00:01<08:39, 14.41it/s][A
  0%|          | 22/7500 [00:01<08:18, 15.00it/s][A
  0%|          | 24/7500 [00:01<07:59, 15.60it/s][A
  0%|          | 26/7500 [00:01<07:48, 15.96it/s][A
  0%|          | 28/7500 [00:01<08:20, 14.92it/s][A
  0%|          | 30/7500 [00:01<08:13, 15.15it/s][A
  0%|          | 32/7500 [00:02<07:52, 15.81it/s][A
  0%|          | 34/7500 [00:02<07:56, 15.65it/s][A
  0%|          | 36/7500 [00:02<07:40, 16.22it/s][A
  1%|

0.7293333333333333 0.04853333333333333



  0%|          | 0/7500 [00:00<?, ?it/s][A
  0%|          | 2/7500 [00:00<07:41, 16.25it/s][A
  0%|          | 4/7500 [00:00<07:30, 16.65it/s][A
  0%|          | 6/7500 [00:00<07:50, 15.92it/s][A
  0%|          | 8/7500 [00:00<07:32, 16.55it/s][A
  0%|          | 10/7500 [00:00<07:20, 17.00it/s][A
  0%|          | 12/7500 [00:00<07:08, 17.50it/s][A
  0%|          | 14/7500 [00:00<07:28, 16.67it/s][A
  0%|          | 16/7500 [00:00<07:10, 17.37it/s][A
  0%|          | 18/7500 [00:01<07:06, 17.55it/s][A
  0%|          | 20/7500 [00:01<07:16, 17.15it/s][A
  0%|          | 22/7500 [00:01<07:00, 17.79it/s][A
  0%|          | 24/7500 [00:01<07:06, 17.54it/s][A
  0%|          | 26/7500 [00:01<06:56, 17.93it/s][A
  0%|          | 28/7500 [00:01<07:21, 16.93it/s][A
  0%|          | 30/7500 [00:01<07:09, 17.38it/s][A
  0%|          | 32/7500 [00:01<07:01, 17.73it/s][A
  0%|          | 34/7500 [00:01<07:00, 17.76it/s][A
  0%|          | 36/7500 [00:02<07:24, 16.81it/s][A
  1%|

0.7285333333333334 0.047733333333333336


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [None]:
G = 2./gamma
D = 2
N = X.shape[1] # change back to X.shape[1]

In [None]:
G = min(G, max(booster.grads))

In [None]:
t1 = (theoretical_regret)/(gamma * T)
t2 = (1.5 * G * np.sqrt(N))/N
t1 + t2

In [None]:
t1

In [None]:
t2