In [1]:
import numpy as np
from utils.data_utils import load_simulation_data
from sklearn.linear_model import LogisticRegression as LR
from numpy.linalg import norm

In [2]:
def grad_logis(X,y,model):
    """find gradients of cost function w.r.t. the coefficients in logistic regression"""
    prob = model.predict_proba(X)
    prob1 = prob[:,1]
    X1 = np.hstack([np.ones((X.shape[0],1)), X])
    grad = np.matmul(np.diag(prob1-y),X1)
    return grad

def grad_logis_prob(X,model):
    """find gradients of predicted probability w.r.t. the coefficients in logistic regression"""
    prob = model.predict_proba(X)
    prob01 = prob[:,0]*prob[:,1]
    X1 = np.hstack([np.ones((X.shape[0],1)), X])
    grad = -np.matmul(np.diag(prob01),X1)
    return grad

def grad_logis_ent(X, model):
    """find gradients of entropy w.r.t the coefficients in logistic regression"""
    logprob = model.predict_log_proba(X)
    logprob1m0 = -(logprob[:,1]-logprob[:,0])
    pgrad = grad_logis_prob(X,model)
    grad = np.matmul(np.diag(logprob1m0), pgrad)
    return grad

In [3]:
L0 = 200
B = 20
n_iter = 50
rep = 10
alpha = 0.5

In [4]:
acc_tr = np.zeros((rep,n_iter))
acc_tr0 = np.zeros((rep,n_iter))
acc_tr1 = np.zeros((rep,n_iter))
acc_L = np.zeros((rep,n_iter))
acc_L0 = np.zeros((rep,n_iter))
acc_L1 = np.zeros((rep,n_iter))
acc_U = np.zeros((rep,n_iter))
acc_U0 = np.zeros((rep,n_iter))
acc_U1 = np.zeros((rep,n_iter))
acc_te = np.zeros((rep,n_iter))
acc_te0 = np.zeros((rep,n_iter))
acc_te1 = np.zeros((rep,n_iter))

In [5]:
for state in range(rep):
    print(state)
    Xtr,Xte,ytr,yte,ztr,zte = load_simulation_data(simulation_params = {'p':100,'q':40, 'r':10, 'b':0, 't':0}, 
                                               n1=2000, n2=1000, svm=False, random_state=state, intercept=False, 
                                               train_frac = 0.7)
    # data preprocessing
    yytr = ytr.ravel()
    yyte = yte.ravel()
    zztr = ztr.ravel()
    zzte = zte.ravel()
    Xtr0 = Xtr[zztr==0]
    Xtr1 = Xtr[zztr==1]
    Xte0 = Xte[zzte==0]
    Xte1 = Xte[zzte==1]
    yytr0 = yytr[zztr==0]
    yytr1 = yytr[zztr==1]
    yyte0 = yyte[zzte==0]
    yyte1 = yyte[zzte==1]
    
    # initialization
    N = zztr.shape[0]
    perm = np.random.permutation(N)
    XL = Xtr[perm[0:L0],:]
    XU = Xtr[perm[L0:],:]
    yL = yytr[perm[0:L0]]
    yU = yytr[perm[L0:]]
    zL = zztr[perm[0:L0]]
    zU = zztr[perm[L0:]]
    XL0 = XL[zL==0]
    XL1 = XL[zL==1]
    yL0 = yL[zL==0]
    yL1 = yL[zL==1]
    XU0 = XU[zU==0]
    XU1 = XU[zU==1]
    yU0 = yU[zU==0]
    yU1 = yU[zU==1]
    
    for i in range(n_iter):
        # model fitting and prediction
        clf=LR(random_state=state).fit(XL, yL)
        acc_tr[state, i] = clf.score(Xtr, yytr)
        acc_tr0[state, i] = clf.score(Xtr0, yytr0)
        acc_tr1[state, i] = clf.score(Xtr1, yytr1)
        acc_L[state, i] = clf.score(XL, yL)
        acc_L0[state, i] = clf.score(XL0, yL0)
        acc_L1[state, i] = clf.score(XL1, yL1)
        acc_U[state, i] = clf.score(XU, yU)
        acc_U0[state, i] = clf.score(XU0, yU0)
        acc_U1[state, i] = clf.score(XU1, yU1)
        acc_te[state, i] = clf.score(Xte, yyte)
        acc_te0[state, i] = clf.score(Xte0, yyte0)
        acc_te1[state, i] = clf.score(Xte1, yyte1)
        
        # modify next labeled and ulabeled sets
        
        # worst group selection
        if acc_L0[state, i]<acc_L1[state, i]:
            wg = 0
            Xw = XL0
            yw = yL0
        else:
            wg = 1
            Xw = XL1
            yw = yL1
        
        # find gradients
        
        grad_w = np.mean(grad_logis(Xw, yw, clf), axis=0)
        # print(grad_w.shape)
        grad1 = grad_logis_prob(XU,clf)
        grad0 = -grad1
        # prob = clf.predict_proba(XU)[:,1]
        prob = clf.predict_proba(XU)
        prob2 = 2*prob[:,1]-1
        grad_U = np.multiply(grad1, prob2[:,None]) # expected grad
        # print(grad_U.shape)
        cossim = np.matmul(grad_U, grad_w)
        # cossim = np.abs(cossim)
        # print(cossim.shape)
        ent = -prob[:,0]*np.log(prob[:,0])-prob[:,1]*np.log(prob[:,1])
        score = alpha*ent+(1-alpha)*cossim
        
        # selection
        arg = np.argpartition(score, -B)
        lind = arg[-B:]
        uind = arg[:-B]
        XL = np.vstack([XL, XU[lind,:]])
        yL = np.append(yL, yU[lind])
        zL = np.append(zL, yU[lind])
        XU = XU[uind,:]
        yU = yU[uind]
        zU = zU[uind]
        XL0 = XL[zL==0]
        XL1 = XL[zL==1]
        yL0 = yL[zL==0]
        yL1 = yL[zL==1]
        XU0 = XU[zU==0]
        XU1 = XU[zU==1]
        yU0 = yU[zU==0]
        yU1 = yU[zU==1]
        

0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


2
3
4


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

5
6
7


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


8


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


9


In [6]:
print(np.mean(acc_te, axis=0))
print(np.mean(acc_te0, axis=0))
print(np.mean(acc_te1, axis=0))


[0.75566667 0.77188889 0.77666667 0.78311111 0.78722222 0.79333333
 0.79711111 0.79822222 0.80088889 0.80433333 0.80977778 0.811
 0.81211111 0.815      0.81777778 0.81766667 0.82044444 0.82388889
 0.82755556 0.82833333 0.82788889 0.83044444 0.83377778 0.83522222
 0.83733333 0.83844444 0.841      0.84277778 0.84422222 0.84333333
 0.84544444 0.84377778 0.84722222 0.85066667 0.85111111 0.85088889
 0.85244444 0.85422222 0.85533333 0.85511111 0.85666667 0.85988889
 0.86011111 0.86111111 0.86133333 0.86155556 0.86266667 0.86333333
 0.86444444 0.86455556]
[0.77135783 0.78625913 0.78995791 0.80091396 0.80380853 0.81204705
 0.816678   0.81742605 0.81894531 0.82078539 0.82739963 0.82793713
 0.82880853 0.83147865 0.83730385 0.83941355 0.84167957 0.84476994
 0.84823258 0.85085097 0.84884439 0.85134927 0.85751015 0.85945953
 0.86247282 0.86349438 0.86666745 0.8684289  0.86950505 0.86776332
 0.87097993 0.86976889 0.87211527 0.87685391 0.87639932 0.87591346
 0.8779547  0.87880776 0.88066713 0.8811544

In [8]:
results = {'acc_te':acc_te, 'acc_te0':acc_te0, 'acc_te1':acc_te1}

In [9]:
import pickle
with open('fairAL_alpha_agg.res','wb') as f:
    pickle.dump(results, f)

In [7]:
a=np.array([[3,4],[5,6],[7,8]])
b=np.array([1,2,3])
c=np.multiply(a,b[:,None])
print(c)
d= 2*b-1
print(d)

[[ 3  4]
 [10 12]
 [21 24]]
[1 3 5]
