In [1]:
import coba as cb
import torch
from pathlib import Path
from collections import defaultdict, Counter
from itertools import islice, chain, count, product, repeat
from contextlib import nullcontext

data_dir = "../data"

import coba as cb

import warnings

import time
import csv
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator
from sklearn.preprocessing import RobustScaler, QuantileTransformer, MinMaxScaler, StandardScaler, Binarizer
from sklearn.feature_selection import  mutual_info_classif, f_classif, GenericUnivariateSelect
from sklearn.model_selection import cross_validate, cross_val_predict, StratifiedKFold, GridSearchCV, LeaveOneGroupOut, StratifiedShuffleSplit
from sklearn.metrics import balanced_accuracy_score, mean_squared_error, mean_absolute_error
from sklearn.pipeline import Pipeline

import torch
from parameterfree import COCOB

from sklearn.dummy import DummyRegressor, DummyClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from xgboost import XGBClassifier

from concurrent.futures import ProcessPoolExecutor

import torch
import torch.utils
import torch.utils.data
import coba as cb
from typing import Tuple, Optional

try:
    torch.set_num_threads(3)
    torch.set_num_interop_threads(3)
except RuntimeError:
    pass

c0 = "#444"
c1 = "#0072B2"
c2 = "#E69F00"
c3 = "#009E73"
c4 = "#56B4E9"
c5 = "#D55E00"
c6 = "#F0E442"
c7 = "#CC79A7"
c8 = "#000000"
c9 = "#332288"

torch.set_default_device('cpu')
plt.rc('font', **{'size': 20})

df = pd.read_csv(f"{data_dir}/all_features_1h_v3.csv")

G  = df["id_participant"].to_numpy()
X1 = df[[c for c in df.columns if c.startswith("acc_")]].to_numpy()
X2 = df[[c for c in df.columns if c.startswith("acc_") or c.startswith("gps_") or c.startswith("motion_")]].to_numpy()
Y1 = df["ER_desire"].astype(float).to_numpy()
Y2 = (df["INT_availability"] == "yes").astype(float).to_numpy()

no_na = ~(np.isnan(X2).any(axis=1) | np.isnan(Y1) | np.isnan(Y2))

G  = G [no_na]
X1 = X1[no_na]
X2 = X2[no_na]
X3 = X2.copy()
X4 = X2.copy()
Y1 = np.expand_dims(Y1[no_na],axis=1)
Y2 = np.expand_dims(Y2[no_na],axis=1)

for g in set(G):
    Y1[G == g] = Binarizer(threshold=np.mean(Y1[G == g].squeeze())).fit_transform(Y1[G == g])
    X1[G == g] = StandardScaler().fit_transform(X1[G == g])
    X2[G == g] = StandardScaler().fit_transform(X2[G == g])
    X3[G == g] = StandardScaler().fit_transform(X3[G == g])
    X4[G == g] = StandardScaler().fit_transform(X4[G == g])

X3 = np.concatenate([X3, np.expand_dims((df["Platform"] == "Android").astype(float).to_numpy(),1)[no_na]],axis=1)
X3 = np.concatenate([X3, np.expand_dims((df["Platform"] == "iPhone" ).astype(float).to_numpy(),1)[no_na]],axis=1)

X4 = np.concatenate([X4, np.expand_dims((df["Platform"] == "Android").astype(float).to_numpy(),1)[no_na]],axis=1)
X4 = np.concatenate([X4, np.expand_dims((df["Platform"] == "iPhone" ).astype(float).to_numpy(),1)[no_na]],axis=1)
X4 = np.concatenate([X4, np.expand_dims((df["tag"] == "evening" ).astype(float).to_numpy(),1)[no_na]],axis=1)
X4 = np.concatenate([X4, np.expand_dims((df["tag"] == "morning" ).astype(float).to_numpy(),1)[no_na]],axis=1)
X4 = np.concatenate([X4, np.expand_dims((df["tag"] == "afternoon" ).astype(float).to_numpy(),1)[no_na]],axis=1)

In [31]:
import coba as cb

class FeedForward(torch.nn.Sequential):
    """A Generic implementation of Feedforward Neural Network"""

    class SkipModule(torch.nn.Module):
        def __init__(self, layers):
            super().__init__()
            self.layers = layers
        def forward(self,X):
            return X + self.layers(X)

    def make_layer(self,curr_dim,spec):
        if isinstance(spec,float):
            return torch.nn.Dropout(spec), curr_dim
        if curr_dim is None and isinstance(spec,int):
            return None, spec
        if isinstance(spec,int):
            return torch.nn.Linear(curr_dim,spec),spec
        if spec == 'r':
            return torch.nn.ReLU(),curr_dim
        if spec == 'l':
            return torch.nn.LayerNorm(curr_dim),curr_dim
        if spec == 'b':
            return torch.nn.BatchNorm1d(curr_dim), curr_dim
        if spec == 's':
            return torch.nn.Sigmoid(),curr_dim
        if isinstance(spec,list):                
            return FeedForward.SkipModule(FeedForward([curr_dim] + spec)), curr_dim
        raise Exception("Bad Layer")

    def __init__(self, specs, rng=1):
        """Instantiate a Feedfoward network according to specifications.

        Args:
            specs: A sequence of layer specifications as follows:
                -1 -- replaced with the input feature width
                <int> -- a LinearLayer with output width equal to <int>
                [0,1] -- a Dropout layer with the given probability
                'l' -- a LayerNorm
                'b' -- a BatchNorm1d
                'r' -- a ReLU layer
                's' -- a Sigmoid layer
                [] -- a skip layer with the given specifications
        """

        torch.manual_seed(rng)
        layers,width = [],None
        for spec in specs:
            layer,width = self.make_layer(width,spec)
            if layer: layers.append(layer)
        super().__init__(*(layers or [torch.nn.Identity()]))
        self.params = {"specs": specs, "rng": rng }

class MyEnvironment:
    def __init__(self, train_X, train_Y, test_X, test_Y, trn, g, rng):
        self.params = {'pid': g, 'rng': rng, 'trn':trn}
        self.train_X = train_X
        self.train_Y = train_Y.float()
        self.test_X = test_X
        self.test_Y = test_Y.float()

    def ssl(self,neg,sr,i):
        from itertools import compress, repeat, chain
        from operator import eq

        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.train_X)))

        X = self.train_X.tolist()    
        Y = self.train_Y[:,i]
        Y = list(map(tuple,Y.tolist()))

        X = list(map(X.__getitem__,rng_indexes))
        Y = list(map(Y.__getitem__,rng_indexes))

        eq_class  = {y: list(compress(X,map(eq,Y,repeat(y)))) for y in set(Y)}
        ne_class  = {y: list(chain(*[v for k,v in eq_class.items() if k != y ])) for y in set(Y)}

        rng = cb.CobaRandom(self.params['rng'])

        def choose_unique(items,item):
            while items[i:=rng.randint(0,len(items)-1)]==item:
                pass
            return items[i]

        def choose_n(items,n):
            indexes = set()
            while True:
                indexes.add(rng.randint(0,len(items)-1))
                if len(indexes)==n: return [items[i] for i in indexes]

        if sr < 1:
            anchor, positive, negative = [], [], []
            for i in range(int(len(X)*sr)):
                x,y = X[i],Y[i]
                anchor.append(x)
                positive.append(choose_unique(eq_class[y],x))
                negative.append(choose_n     (ne_class[y],neg))
            yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

        else:
            for _ in range(sr):
                anchor, positive, negative = [], [], []
                for x,y in zip(X,Y):
                    anchor.append(x)
                    positive.append(choose_unique(eq_class[y],x))
                    negative.append(choose_n     (ne_class[y],neg))

                yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

    def train(self):
        return self.train_X, self.train_Y

    def test(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.test_X)))
        return self.test_X[rng_indexes,:], self.test_Y[rng_indexes]

class MyEvaluator:
    def __init__(self, s1, s2, ssl_samps, ssl_neg, ssl_dropn, ssl_tau, ws_steps, y, n_models):

        self.s1  = s1  #ssl + sl
        self.s2  = s2  #sl

        self.ssl_samps = ssl_samps
        self.ssl_neg   = ssl_neg
        self.ssl_tau   = ssl_tau
        self.ssl_dropn = ssl_dropn

        self.ws_steps = ws_steps
        self.n_models = n_models

        self.y = y

        self.params = { 's1': s1, 's2':s2, 'ssl': (ssl_samps,ssl_neg,ssl_dropn,ssl_tau), 'ws': ws_steps, 'y': y, 'n_models': n_models}

    def evaluate(self, env, lrn):
        from sklearn.metrics import roc_auc_score

        torch.set_num_threads(1)
        torch.manual_seed(1)

        mods_opts = []
        opts = []

        for _ in range(self.n_models):
            s1 = FeedForward(self.s1)
            s2 = FeedForward(self.s2)
            sb = torch.nn.Sequential(*list(s1.children())[len(self.s1)-self.ssl_dropn:])
            s1 = torch.nn.Sequential(*list(s1.children())[:len(self.s1)-self.ssl_dropn])

            s1opt = COCOB(s1.parameters()) if list(s1.parameters()) else None
            sbopt = COCOB(sb.parameters()) if list(sb.parameters()) else None
            s2opt = COCOB(s2.parameters()) if list(s2.parameters()) else None

            mods = [s1,sb,s2]
            opts = [s1opt,sbopt,s2opt]
            mods_opts.append([mods,opts])

        for mods,_ in mods_opts:
            for l in mods: l.train()

        for mods,opts in mods_opts:
            [s1,sb,s2] = mods
            [s1opt,sbopt,s2opt] = opts

            if self.ssl_samps:

                if self.ssl_neg == 0: raise Exception("neg can't be 0")
                if self.ssl_tau == 0: raise Exception("Tau can't be 0")

                for A, P, N in env.ssl(self.ssl_neg,self.ssl_samps,self.y):

                    torch_dataset = torch.utils.data.TensorDataset(A,P,N)
                    torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=4,drop_last=True,shuffle=True)

                    for _A, _P, _N in torch_loader:
                        #https://arxiv.org/pdf/2002.05709
                        _A = sb(s1(_A.nan_to_num()))
                        _P = sb(s1(_P.nan_to_num()))
                        _N = sb(s1(_N.nan_to_num()))

                        p = torch.einsum("bi,bi->b",_A,_P)
                        n = torch.einsum("bi,bji->bj",_A,_N)

                        p /= (torch.linalg.norm(_A,dim=1)*torch.linalg.norm(_P,dim=1))
                        n /= (torch.linalg.norm(_A,dim=1).unsqueeze(1)*torch.linalg.norm(_N,dim=2))

                        p = torch.exp(p/self.ssl_tau)
                        n = torch.exp(n/self.ssl_tau)

                        if s1opt: s1opt.zero_grad()
                        if sbopt: sbopt.zero_grad()
                        (-torch.log(p/(p+n.sum(dim=1)))).mean().backward()
                        if s1opt: s1opt.step()
                        if sbopt: sbopt.step()

            if self.ws_steps:

                if s1opt: s1opt.zero_grad()
                if s2opt: s2opt.zero_grad()

                X, Y = env.train()
                Y = Y[:,self.y]

                torch_dataset = torch.utils.data.TensorDataset(X,Y)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=4,drop_last=True,shuffle=True)

                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps):
                    for _X,_y in torch_loader:

                        if s1opt: s1opt.zero_grad()
                        if s2opt: s2opt.zero_grad()
                        loss(s2(s1(_X.nan_to_num())),_y).backward()
                        if s2opt: s2opt.step()
                        if s1opt: s1opt.step()

        for mods,_ in mods_opts:
            for l in mods: l.eval()

        X, Y = env.test()
        Y = Y[:,self.y]

        def predict(X):
            preds = 0
            for mods, _ in mods_opts:
                [s1,_,s2] = mods
                preds += torch.sigmoid(s2(s1(X.nan_to_num())))
            return preds/len(mods_opts)

        def score(X,Y):
            with torch.no_grad():
                return {f'auc{y}': roc_auc_score(Y[:,i],predict(X)[:,i]) for i,y in enumerate(self.y) }

        yield score(X, Y)
        yield score(X, Y)

def make_envs(X, Y, G, N):
    X, Y, G = torch.tensor(X).float(), torch.tensor(Y).float(), torch.tensor(G)

    too_short = set(g for g in set(G.tolist()) if (g==G).sum() < 50)
    all_equal = set(g for g in set(G.tolist()) if any(len(set(y.tolist()))==1 for y in Y[g==G].T))

    if any(all_equal): print(f"All Equal, no environment added for {sorted(all_equal)}")

    for rng in range(N):

        for trn,tst in StratifiedShuffleSplit(10,random_state=rng).split(X,Y):
            yield MyEnvironment(X[trn], Y[trn], X[tst], Y[tst], 'fold', None, rng)

        for g in sorted(set(G.tolist())-all_equal-too_short):
            try:
                Xg, Yg = X[g==G], Y[g==G]

                trn,tst = next(StratifiedShuffleSplit(1,train_size=35/len(Xg),random_state=rng).split(Xg,Yg))
                yield MyEnvironment(Xg[trn], Yg[trn], Xg[tst], Yg[tst], 'self', g, rng)
                yield MyEnvironment(X[g!=G], Y[g!=G], Xg[tst], Yg[tst], 'rest', g, rng)

            except ValueError as e:
                if 'The least populated class in y has only 1 member' in str(e): continue
                raise

lrns = [ None ]
envs = list(make_envs(X3,np.hstack([Y1,Y2]),G,1))
vals = lambda x: [
    MyEvaluator((), (len(x),30,'l','r',30,1), 0, 0, 0, 0, 3, [1], 1),
]

cb.Experiment(envs,lrns,vals(X3[0])).run('../logs/6/1b.log',processes=35,quiet=True)

All Equal, no environment added for [54, 57, 81, 82, 87, 89, 97, 99, 103, 111, 117, 118, 121, 122, 137, 146, 181, 196, 203, 206, 227, 231, 246, 251, 293, 311, 357, 371, 373, 374, 377, 382, 390, 402, 405, 426, 442, 447, 451, 452, 455, 463, 469, 486, 536]
{'Learners': 1, 'Environments': 1, 'Interactions': 0}


In [None]:
import coba as cb

class FeedForward(torch.nn.Sequential):
    """A Generic implementation of Feedforward Neural Network"""

    class SkipModule(torch.nn.Module):
        def __init__(self, layers):
            super().__init__()
            self.layers = layers
        def forward(self,X):
            return X + self.layers(X)

    def make_layer(self,curr_dim,spec):
        if isinstance(spec,float):
            return torch.nn.Dropout(spec), curr_dim
        if curr_dim is None and isinstance(spec,int):
            return None, spec
        if isinstance(spec,int):
            return torch.nn.Linear(curr_dim,spec),spec
        if spec == 'r':
            return torch.nn.ReLU(),curr_dim
        if spec == 'l':
            return torch.nn.LayerNorm(curr_dim),curr_dim
        if spec == 'b':
            return torch.nn.BatchNorm1d(curr_dim), curr_dim
        if spec == 's':
            return torch.nn.Sigmoid(),curr_dim
        if isinstance(spec,list):                
            return FeedForward.SkipModule(FeedForward([curr_dim] + spec)), curr_dim
        raise Exception("Bad Layer")

    def __init__(self, specs, rng=1):
        """Instantiate a Feedfoward network according to specifications.

        Args:
            specs: A sequence of layer specifications as follows:
                -1 -- replaced with the input feature width
                <int> -- a LinearLayer with output width equal to <int>
                [0,1] -- a Dropout layer with the given probability
                'l' -- a LayerNorm
                'b' -- a BatchNorm1d
                'r' -- a ReLU layer
                's' -- a Sigmoid layer
                [] -- a skip layer with the given specifications
        """

        torch.manual_seed(rng)
        layers,width = [],None
        for spec in specs:
            layer,width = self.make_layer(width,spec)
            if layer: layers.append(layer)
        super().__init__(*(layers or [torch.nn.Identity()]))
        self.params = {"specs": specs, "rng": rng }

class MyEnvironment:
    def __init__(self, train_X, train_Y, test_X, test_Y, trn, g, rng):
        self.params = {'pid': g, 'rng': rng, 'trn':trn}
        self.train_X = train_X
        self.train_Y = train_Y.float()
        self.test_X = test_X
        self.test_Y = test_Y.float()

    def ssl(self,neg,sr,yi):
        from itertools import compress, repeat, chain
        from operator import eq

        rng = cb.CobaRandom(self.params['rng'])
        rng_order = rng.shuffle(range(len(self.train_X)))

        X = self.train_X.tolist()
        Y = self.train_Y[:,yi]
        Y = list(map(tuple,Y.tolist()))

        X = list(map(X.__getitem__,rng_order))
        Y = list(map(Y.__getitem__,rng_order))

        eq_class  = {y: list(compress(X,map(eq,Y,repeat(y)))) for y in set(Y)}
        ne_class  = {y: list(chain(*[v for k,v in eq_class.items() if k != y ])) for y in set(Y)}

        def choose_unique(items,given_i):
            for i in rng.randints(None,0,len(items)-1):
                if i != given_i:
                    return items[i]

        def choose_n(items,n):
            indexes = set()
            for i in rng.randints(None,0,len(items)-1):
                indexes.add(i)
                if len(indexes)==n:
                    return [items[i] for i in indexes]

        if sr < 1:
            anchor, positive, negative = [], [], []

            for i in range(int(len(X)*sr)):
                x,y = X[i],Y[i]
                anchor.append(x)
                positive.append(choose_unique(eq_class[y],i))
                negative.append(choose_n     (ne_class[y],neg))
            yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

        else:
            for _ in range(sr):
                anchor, positive, negative = [], [], []
                for i in range(len(X)):
                    x,y = X[i],Y[i]
                    anchor.append(x)
                    positive.append(choose_unique(eq_class[y],i))
                    negative.append(choose_n     (ne_class[y],neg))

                yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

    def train(self):
        return self.train_X, self.train_Y

    def test(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.test_X)))
        return self.test_X[rng_indexes,:], self.test_Y[rng_indexes]

class MyEvaluator:
    def __init__(self, s1, s2, ssl_samps, ssl_neg, ssl_dropn, ssl_tau, ws_steps, y, n_models):

        self.s1  = s1  #ssl + sl
        self.s2  = s2  #sl

        self.ssl_samps = ssl_samps
        self.ssl_neg   = ssl_neg
        self.ssl_tau   = ssl_tau
        self.ssl_dropn = ssl_dropn

        self.ws_steps = ws_steps
        self.n_models = n_models

        self.y = y

        self.params = { 's1': s1, 's2':s2, 'ssl': (ssl_samps,ssl_neg,ssl_dropn,ssl_tau), 'ws': ws_steps, 'y': y, 'n_models': n_models}

    def evaluate(self, env, lrn):
        from sklearn.metrics import roc_auc_score

        torch.set_num_threads(1)
        torch.manual_seed(1)

        mods_opts = []
        opts = []

        for _ in range(self.n_models):
            s1 = FeedForward(self.s1)
            s2 = FeedForward(self.s2)
            sb = torch.nn.Sequential(*list(s1.children())[len(self.s1)-self.ssl_dropn:])
            s1 = torch.nn.Sequential(*list(s1.children())[:len(self.s1)-self.ssl_dropn])

            s1opt = COCOB(s1.parameters()) if list(s1.parameters()) else None
            sbopt = COCOB(sb.parameters()) if list(sb.parameters()) else None
            s2opt = COCOB(s2.parameters()) if list(s2.parameters()) else None

            mods = [s1,sb,s2]
            opts = [s1opt,sbopt,s2opt]
            mods_opts.append([mods,opts])

        for mods,_ in mods_opts:
            for l in mods: l.train()

        for mods,opts in mods_opts:
            [s1,sb,s2] = mods
            [s1opt,sbopt,s2opt] = opts

            if self.ssl_samps:

                if self.ssl_neg == 0: raise Exception("neg can't be 0")
                if self.ssl_tau == 0: raise Exception("Tau can't be 0")

                for A, P, N in env.ssl(self.ssl_neg,self.ssl_samps,self.y):

                    torch_dataset = torch.utils.data.TensorDataset(A,P,N)
                    torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=64,drop_last=True,shuffle=True)

                    for _A, _P, _N in torch_loader:
                        #https://arxiv.org/pdf/2002.05709
                        _A = sb(s1(_A.nan_to_num()))
                        _P = sb(s1(_P.nan_to_num()))
                        _N = sb(s1(_N.nan_to_num()))

                        p = torch.einsum("bi,bi->b",_A,_P)
                        n = torch.einsum("bi,bji->bj",_A,_N)

                        p /= (torch.linalg.norm(_A,dim=1)*torch.linalg.norm(_P,dim=1))
                        n /= (torch.linalg.norm(_A,dim=1).unsqueeze(1)*torch.linalg.norm(_N,dim=2))

                        p = torch.exp(p/self.ssl_tau)
                        n = torch.exp(n/self.ssl_tau)

                        if s1opt: s1opt.zero_grad()
                        if sbopt: sbopt.zero_grad()
                        (-torch.log(p/(p+n.sum(dim=1)))).mean().backward()
                        if s1opt: s1opt.step()
                        if sbopt: sbopt.step()

            if self.ws_steps:

                if s1opt: s1opt.zero_grad()
                if s2opt: s2opt.zero_grad()

                X, Y = env.train()
                Y = Y[:,self.y]

                torch_dataset = torch.utils.data.TensorDataset(X,Y)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=16,drop_last=True,shuffle=True)

                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps):
                    for _X,_y in torch_loader:

                        if s1opt: s1opt.zero_grad()
                        if s2opt: s2opt.zero_grad()
                        loss(s2(s1(_X.nan_to_num())),_y).backward()
                        if s2opt: s2opt.step()
                        if s1opt: s1opt.step()

        for mods,_ in mods_opts:
            for l in mods: l.eval()

        X, Y = env.test()
        Y = Y[:,self.y]

        def predict(X):
            preds = 0
            for mods, _ in mods_opts:
                [s1,_,s2] = mods
                preds += torch.sigmoid(s2(s1(X.nan_to_num())))
            return preds/len(mods_opts)

        def score(X,Y):
            with torch.no_grad():
                return {f'auc{y}': roc_auc_score(Y[:,i],predict(X)[:,i]) for i,y in enumerate(self.y) }

        yield score(X, Y)
        yield score(X, Y)

def make_envs(X, Y, G, R):
    X, Y, G = torch.tensor(X).float(), torch.tensor(Y).float(), torch.tensor(G)

    too_short = set(g for g in set(G.tolist()) if (g==G).sum() < 50)
    all_equal = set(g for g in set(G.tolist()) if any(len(set(y.tolist()))==1 for y in Y[g==G].T))

    if any(all_equal): print(f"All Equal, no environment added for {sorted(all_equal)}")

    for rng,g in product(range(R),sorted(set(G.tolist())-all_equal-too_short)):
        _X, _Y = X[g==G], Y[g==G]
        try:
            trn,tst = next(StratifiedShuffleSplit(1,train_size=35/len(_X),random_state=rng).split(_X,_Y))

            yield MyEnvironment( X[g==G][trn], Y[g==G][trn], X[g==G][tst], Y[g==G][tst], 'self', g, rng)
        
        except ValueError as e:
            if 'The least populated class in y has only 1 member' in str(e): continue
            raise

lrns = [ None ]
envs = list(make_envs(X3,np.hstack([Y1,Y2]),G,1))
vals = lambda x: [
    MyEvaluator((), (len(x),30,'l','r',30,1), 0, 0, 0, 0, 2, [1], 2),
    MyEvaluator((), (len(x),30,'l','r',30,1), 0, 0, 0, 0, 3, [1], 2),
    MyEvaluator((), (len(x),30,'l','r',30,1), 0, 0, 0, 0, 4, [1], 2),
    MyEvaluator((), (len(x),45,'l','r',45,1), 0, 0, 0, 0, 2, [1], 2),
    MyEvaluator((), (len(x),45,'l','r',45,1), 0, 0, 0, 0, 3, [1], 2),
    MyEvaluator((), (len(x),45,'l','r',45,1), 0, 0, 0, 0, 4, [1], 2),
    MyEvaluator((), (len(x),30,'l','r',30,1), 0, 0, 0, 0, 5, [1], 2),
    MyEvaluator((), (len(x),30,'l','r',30,1), 0, 0, 0, 0, 6, [1], 2),
]

cb.Experiment(envs,lrns,vals(X3[0])).run('../logs/6/2.log',processes=35,quiet=True)

All Equal, no environment added for [54, 57, 81, 82, 87, 89, 97, 99, 103, 111, 117, 118, 121, 122, 137, 146, 181, 196, 203, 206, 227, 231, 246, 251, 293, 311, 357, 371, 373, 374, 377, 382, 390, 402, 405, 426, 442, 447, 451, 452, 455, 463, 469, 486, 536]
{'Learners': 1, 'Environments': 150, 'Interactions': 2400}


In [14]:
import coba as cb

class FeedForward(torch.nn.Sequential):
    """A Generic implementation of Feedforward Neural Network"""

    class SkipModule(torch.nn.Module):
        def __init__(self, layers):
            super().__init__()
            self.layers = layers
        def forward(self,X):
            return X + self.layers(X)

    def make_layer(self,curr_dim,spec):
        if isinstance(spec,float):
            return torch.nn.Dropout(spec), curr_dim
        if curr_dim is None and isinstance(spec,int):
            return None, spec
        if isinstance(spec,int):
            return torch.nn.Linear(curr_dim,spec),spec
        if spec == 'r':
            return torch.nn.ReLU(),curr_dim
        if spec == 'l':
            return torch.nn.LayerNorm(curr_dim),curr_dim
        if spec == 'b':
            return torch.nn.BatchNorm1d(curr_dim), curr_dim
        if spec == 's':
            return torch.nn.Sigmoid(),curr_dim
        if isinstance(spec,list):                
            return FeedForward.SkipModule(FeedForward([curr_dim] + spec)), curr_dim
        raise Exception("Bad Layer")

    def __init__(self, specs, rng=1):
        """Instantiate a Feedfoward network according to specifications.

        Args:
            specs: A sequence of layer specifications as follows:
                -1 -- replaced with the input feature width
                <int> -- a LinearLayer with output width equal to <int>
                [0,1] -- a Dropout layer with the given probability
                'l' -- a LayerNorm
                'b' -- a BatchNorm1d
                'r' -- a ReLU layer
                's' -- a Sigmoid layer
                [] -- a skip layer with the given specifications
        """

        torch.manual_seed(rng)
        layers,width = [],None
        for spec in specs:
            layer,width = self.make_layer(width,spec)
            if layer: layers.append(layer)
        super().__init__(*(layers or [torch.nn.Identity()]))
        self.params = {"specs": specs, "rng": rng }

class MyEnvironment:
    def __init__(self, train_X, train_Y, test_X, test_Y, trn, g, rng):
        self.params = {'pid': g, 'rng': rng, 'trn':trn}
        self.train_X = train_X
        self.train_Y = train_Y.float()
        self.test_X = test_X
        self.test_Y = test_Y.float()

    def ssl(self,neg,sr,yi):
        from itertools import compress, repeat, chain
        from operator import eq

        rng = cb.CobaRandom(self.params['rng'])
        rng_order = rng.shuffle(range(len(self.train_X)))

        X = self.train_X.tolist()
        Y = self.train_Y[:,yi]
        Y = list(map(tuple,Y.tolist()))

        X = list(map(X.__getitem__,rng_order))
        Y = list(map(Y.__getitem__,rng_order))

        eq_class  = {y: list(compress(X,map(eq,Y,repeat(y)))) for y in set(Y)}
        ne_class  = {y: list(chain(*[v for k,v in eq_class.items() if k != y ])) for y in set(Y)}

        def choose_unique(items,given_i):
            if len(items) == 1:  return items[0]
            for i in rng.randints(None,0,len(items)-1):
                if i != given_i:
                    return items[i]

        def choose_n(items,n):
            add_to_index = (indexes := set()).add if len(items) > n else (indexes := []).append
            for i in rng.randints(None,0,len(items)-1):
                add_to_index(i)
                if len(indexes)==n:
                    return [items[i] for i in indexes]

        if sr < 1:
            anchor, positive, negative = [], [], []

            for i in range(int(len(X)*sr)):
                x,y = X[i],Y[i]
                anchor.append(x)
                positive.append(choose_unique(eq_class[y],i))
                negative.append(choose_n     (ne_class[y],neg))
            yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

        else:
            for _ in range(sr):
                anchor, positive, negative = [], [], []
                for i in range(len(X)):
                    x,y = X[i],Y[i]
                    anchor.append(x)
                    positive.append(choose_unique(eq_class[y],i))
                    negative.append(choose_n     (ne_class[y],neg))

                yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

    def train(self):
        return self.train_X, self.train_Y

    def test(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.test_X)))
        return self.test_X[rng_indexes,:], self.test_Y[rng_indexes]

class MyEvaluator:
    def __init__(self, s1, s2, ssl_samps, ssl_neg, ssl_dropn, ssl_tau, ws_steps, y, n_models):

        self.s1  = s1  #ssl + sl
        self.s2  = s2  #sl

        self.ssl_samps = ssl_samps
        self.ssl_neg   = ssl_neg
        self.ssl_tau   = ssl_tau
        self.ssl_dropn = ssl_dropn

        self.ws_steps = ws_steps
        self.n_models = n_models

        self.y = y

        self.params = { 's1': s1, 's2':s2, 'ssl': (ssl_samps,ssl_neg,ssl_dropn,ssl_tau), 'ws': ws_steps, 'y': y, 'n_models': n_models}

    def evaluate(self, env, lrn):
        from sklearn.metrics import roc_auc_score

        torch.set_num_threads(1)
        torch.manual_seed(1)

        mods_opts = []
        opts = []

        for _ in range(self.n_models):
            s1 = FeedForward(self.s1)
            s2 = FeedForward(self.s2)
            sb = torch.nn.Sequential(*list(s1.children())[len(self.s1)-self.ssl_dropn:])
            s1 = torch.nn.Sequential(*list(s1.children())[:len(self.s1)-self.ssl_dropn])

            s1opt = COCOB(s1.parameters()) if list(s1.parameters()) else None
            sbopt = COCOB(sb.parameters()) if list(sb.parameters()) else None
            s2opt = COCOB(s2.parameters()) if list(s2.parameters()) else None

            mods = [s1,sb,s2]
            opts = [s1opt,sbopt,s2opt]
            mods_opts.append([mods,opts])

        for mods,_ in mods_opts:
            for l in mods: l.train()

        for mods,opts in mods_opts:
            [s1,sb,s2] = mods
            [s1opt,sbopt,s2opt] = opts

            if self.ssl_samps:

                if self.ssl_neg == 0: raise Exception("neg can't be 0")
                if self.ssl_tau == 0: raise Exception("Tau can't be 0")

                for A, P, N in env.ssl(self.ssl_neg,self.ssl_samps,self.y):

                    torch_dataset = torch.utils.data.TensorDataset(A,P,N)
                    torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                    for _A, _P, _N in torch_loader:
                        #https://arxiv.org/pdf/2002.05709
                        _A = sb(s1(_A.nan_to_num()))
                        _P = sb(s1(_P.nan_to_num()))
                        _N = sb(s1(_N.nan_to_num()))

                        p = torch.einsum("bi,bi->b",_A,_P)
                        n = torch.einsum("bi,bji->bj",_A,_N)

                        p /= (torch.linalg.norm(_A,dim=1)*torch.linalg.norm(_P,dim=1))
                        n /= (torch.linalg.norm(_A,dim=1).unsqueeze(1)*torch.linalg.norm(_N,dim=2))

                        p = torch.exp(p/self.ssl_tau)
                        n = torch.exp(n/self.ssl_tau)

                        if s1opt: s1opt.zero_grad()
                        if sbopt: sbopt.zero_grad()
                        (-torch.log(p/(p+n.sum(dim=1)))).mean().backward()
                        if s1opt: s1opt.step()
                        if sbopt: sbopt.step()

            if self.ws_steps:

                if s1opt: s1opt.zero_grad()
                if s2opt: s2opt.zero_grad()

                X, Y = env.train()
                Y = Y[:,self.y]

                torch_dataset = torch.utils.data.TensorDataset(X,Y)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps):
                    for _X,_y in torch_loader:

                        if s1opt: s1opt.zero_grad()
                        if s2opt: s2opt.zero_grad()
                        loss(s2(s1(_X.nan_to_num())),_y).backward()
                        if s2opt: s2opt.step()
                        if s1opt: s1opt.step()

        for mods,_ in mods_opts:
            for l in mods: l.eval()

        X, Y = env.test()
        Y = Y[:,self.y]

        def predict(X):
            preds = 0
            for mods, _ in mods_opts:
                [s1,_,s2] = mods
                preds += torch.sigmoid(s2(s1(X.nan_to_num())))
            return preds/len(mods_opts)

        def score(X,Y):
            with torch.no_grad():
                return {f'auc{y}': roc_auc_score(Y[:,i],predict(X)[:,i]) for i,y in enumerate(self.y) }

        yield score(X, Y)
        yield score(X, Y)

def make_envs(X, Y, G, R):
    X, Y, G = torch.tensor(X).float(), torch.tensor(Y).float(), torch.tensor(G)

    too_short = set(g for g in set(G.tolist()) if (g==G).sum() < 50)
    all_equal = set(g for g in set(G.tolist()) if any(len(set(y.tolist()))==1 for y in Y[g==G].T))

    if any(all_equal): print(f"All Equal, no environment added for {sorted(all_equal)}")

    for rng,g in product(range(R),sorted(set(G.tolist())-all_equal-too_short)):
        _X, _Y = X[g==G], Y[g==G]
        try:
            trn,tst = next(StratifiedShuffleSplit(1,train_size=35/len(_X),random_state=rng).split(_X,_Y))

            yield MyEnvironment( X[g==G][trn], Y[g==G][trn], X[g==G][tst], Y[g==G][tst], 'self', g, rng)

        except ValueError as e:
            if 'The least populated class in y has only 1 member' in str(e): continue
            raise

lrns = [ None ]
envs = list(make_envs(X3,np.hstack([Y1,Y2]),G,10))
vals = lambda x: [
    MyEvaluator((), (len(x),30,'l','r',30,1), 0, 0, 0, 0, 4, [1], 2),
    MyEvaluator((len(x),30,'l','r'), (30,1), 1, 1, 0, 1, 4, [1], 2),
    MyEvaluator((len(x),30,'l','r'), (30,1), 2, 1, 0, 1, 4, [1], 2),
    MyEvaluator((len(x),30,'l','r'), (30,1), 1, 2, 0, 1, 4, [1], 2),
    MyEvaluator((len(x),30,'l','r'), (30,1), 2, 2, 0, 1, 4, [1], 2),
    MyEvaluator((len(x),30,'l','r'), (30,1), 1, 1, 0, .5, 4, [1], 2),
    MyEvaluator((len(x),30,'l','r'), (30,1), 2, 1, 0, .5, 4, [1], 2),
    MyEvaluator((len(x),30,'l','r'), (30,1), 1, 2, 0, .5, 4, [1], 2),
    MyEvaluator((len(x),30,'l','r'), (30,1), 2, 2, 0, .5, 4, [1], 2),
]

cb.Experiment(envs,lrns,vals(X3[0])).run('../logs/6/3.log',processes=35,quiet=True)

All Equal, no environment added for [54, 57, 81, 82, 87, 89, 97, 99, 103, 111, 117, 118, 121, 122, 137, 146, 181, 196, 203, 206, 227, 231, 246, 251, 293, 311, 357, 371, 373, 374, 377, 382, 390, 402, 405, 426, 442, 447, 451, 452, 455, 463, 469, 486, 536]
{'Learners': 1, 'Environments': 1500, 'Interactions': 27000}


In [17]:
import coba as cb

class FeedForward(torch.nn.Sequential):
    """A Generic implementation of Feedforward Neural Network"""

    class SkipModule(torch.nn.Module):
        def __init__(self, layers):
            super().__init__()
            self.layers = layers
        def forward(self,X):
            return X + self.layers(X)

    def make_layer(self,curr_dim,spec):
        if isinstance(spec,float):
            return torch.nn.Dropout(spec), curr_dim
        if curr_dim is None and isinstance(spec,int):
            return None, spec
        if isinstance(spec,int):
            return torch.nn.Linear(curr_dim,spec),spec
        if spec == 'r':
            return torch.nn.ReLU(),curr_dim
        if spec == 'l':
            return torch.nn.LayerNorm(curr_dim),curr_dim
        if spec == 'b':
            return torch.nn.BatchNorm1d(curr_dim), curr_dim
        if spec == 's':
            return torch.nn.Sigmoid(),curr_dim
        if isinstance(spec,list):                
            return FeedForward.SkipModule(FeedForward([curr_dim] + spec)), curr_dim
        raise Exception("Bad Layer")

    def __init__(self, specs, rng=1):
        """Instantiate a Feedfoward network according to specifications.

        Args:
            specs: A sequence of layer specifications as follows:
                -1 -- replaced with the input feature width
                <int> -- a LinearLayer with output width equal to <int>
                [0,1] -- a Dropout layer with the given probability
                'l' -- a LayerNorm
                'b' -- a BatchNorm1d
                'r' -- a ReLU layer
                's' -- a Sigmoid layer
                [] -- a skip layer with the given specifications
        """

        torch.manual_seed(rng)
        layers,width = [],None
        for spec in specs:
            layer,width = self.make_layer(width,spec)
            if layer: layers.append(layer)
        super().__init__(*(layers or [torch.nn.Identity()]))
        self.params = {"specs": specs, "rng": rng }

class MyEnvironment:
    def __init__(self, train_X, train_Y, test_X, test_Y, trn, g, rng):
        self.params = {'pid': g, 'rng': rng, 'trn':trn}
        self.train_X = train_X
        self.train_Y = train_Y.float()
        self.test_X = test_X
        self.test_Y = test_Y.float()

    def ssl(self,neg,sr,yi):
        from itertools import compress, repeat, chain
        from operator import eq

        rng = cb.CobaRandom(self.params['rng'])
        rng_order = rng.shuffle(range(len(self.train_X)))

        X = self.train_X.tolist()
        Y = self.train_Y[:,yi]
        Y = list(map(tuple,Y.tolist()))

        X = list(map(X.__getitem__,rng_order))
        Y = list(map(Y.__getitem__,rng_order))

        eq_class  = {y: list(compress(X,map(eq,Y,repeat(y)))) for y in set(Y)}
        ne_class  = {y: list(chain(*[v for k,v in eq_class.items() if k != y ])) for y in set(Y)}

        def choose_unique(items,given_i):
            if len(items) == 1:  return items[0]
            for i in rng.randints(None,0,len(items)-1):
                if i != given_i:
                    return items[i]

        def choose_n(items,n):
            add_to_index = (indexes := set()).add if len(items) > n else (indexes := []).append
            for i in rng.randints(None,0,len(items)-1):
                add_to_index(i)
                if len(indexes)==n:
                    return [items[i] for i in indexes]

        if sr < 1:
            anchor, positive, negative = [], [], []

            for i in range(int(len(X)*sr)):
                x,y = X[i],Y[i]
                anchor.append(x)
                positive.append(choose_unique(eq_class[y],i))
                negative.append(choose_n     (ne_class[y],neg))
            yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

        else:
            for _ in range(sr):
                anchor, positive, negative = [], [], []
                for i in range(len(X)):
                    x,y = X[i],Y[i]
                    anchor.append(x)
                    positive.append(choose_unique(eq_class[y],i))
                    negative.append(choose_n     (ne_class[y],neg))

                yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

    def train(self):
        return self.train_X, self.train_Y

    def test(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.test_X)))
        return self.test_X[rng_indexes,:], self.test_Y[rng_indexes]

class MyEvaluator:
    def __init__(self, s1, s2, ssl_samps, ssl_neg, ssl_dropn, ssl_tau, ws_steps, y, n_models):

        self.s1  = s1  #ssl + sl
        self.s2  = s2  #sl

        self.ssl_samps = ssl_samps
        self.ssl_neg   = ssl_neg
        self.ssl_tau   = ssl_tau
        self.ssl_dropn = ssl_dropn

        self.ws_steps = ws_steps
        self.n_models = n_models

        self.y = y

        self.params = { 's1': s1, 's2':s2, 'ssl': (ssl_samps,ssl_neg,ssl_dropn,ssl_tau), 'ws': ws_steps, 'y': y, 'n_models': n_models}

    def evaluate(self, env, lrn):
        from sklearn.metrics import roc_auc_score

        torch.set_num_threads(1)
        torch.manual_seed(1)

        mods_opts = []
        opts = []

        for _ in range(self.n_models):
            s1 = FeedForward(self.s1)
            s2 = FeedForward(self.s2)
            sb = torch.nn.Sequential(*list(s1.children())[len(self.s1)-self.ssl_dropn:])
            s1 = torch.nn.Sequential(*list(s1.children())[:len(self.s1)-self.ssl_dropn])

            s1opt = COCOB(s1.parameters()) if list(s1.parameters()) else None
            sbopt = COCOB(sb.parameters()) if list(sb.parameters()) else None
            s2opt = COCOB(s2.parameters()) if list(s2.parameters()) else None

            mods = [s1,sb,s2]
            opts = [s1opt,sbopt,s2opt]
            mods_opts.append([mods,opts])

        for mods,_ in mods_opts:
            for l in mods: l.train()

        for mods,opts in mods_opts:
            [s1,sb,s2] = mods
            [s1opt,sbopt,s2opt] = opts

            if self.ssl_samps:

                if self.ssl_neg == 0: raise Exception("neg can't be 0")
                if self.ssl_tau == 0: raise Exception("Tau can't be 0")

                for A, P, N in env.ssl(self.ssl_neg,self.ssl_samps,self.y):

                    torch_dataset = torch.utils.data.TensorDataset(A,P,N)
                    torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                    for _A, _P, _N in torch_loader:
                        #https://arxiv.org/pdf/2002.05709
                        _A = sb(s1(_A.nan_to_num()))
                        _P = sb(s1(_P.nan_to_num()))
                        _N = sb(s1(_N.nan_to_num()))

                        p = torch.einsum("bi,bi->b",_A,_P)
                        n = torch.einsum("bi,bji->bj",_A,_N)

                        p /= (torch.linalg.norm(_A,dim=1)*torch.linalg.norm(_P,dim=1))
                        n /= (torch.linalg.norm(_A,dim=1).unsqueeze(1)*torch.linalg.norm(_N,dim=2))

                        p = torch.exp(p/self.ssl_tau)
                        n = torch.exp(n/self.ssl_tau)

                        if s1opt: s1opt.zero_grad()
                        if sbopt: sbopt.zero_grad()
                        (-torch.log(p/(p+n.sum(dim=1)))).mean().backward()
                        if s1opt: s1opt.step()
                        if sbopt: sbopt.step()

            if self.ws_steps:

                if s1opt: s1opt.zero_grad()
                if s2opt: s2opt.zero_grad()

                X, Y = env.train()
                Y = Y[:,self.y]

                torch_dataset = torch.utils.data.TensorDataset(X,Y)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps):
                    for _X,_y in torch_loader:

                        if s1opt: s1opt.zero_grad()
                        if s2opt: s2opt.zero_grad()
                        loss(s2(s1(_X.nan_to_num())),_y).backward()
                        if s2opt: s2opt.step()
                        if s1opt: s1opt.step()

        for mods,_ in mods_opts:
            for l in mods: l.eval()

        X, Y = env.test()
        Y = Y[:,self.y]

        def predict(X):
            preds = 0
            for mods, _ in mods_opts:
                [s1,_,s2] = mods
                preds += torch.sigmoid(s2(s1(X.nan_to_num())))
            return preds/len(mods_opts)

        def score(X,Y):
            with torch.no_grad():
                return {f'auc{y}': roc_auc_score(Y[:,i],predict(X)[:,i]) for i,y in enumerate(self.y) }

        yield score(X, Y)
        yield score(X, Y)

def make_envs(X, Y, G, R):
    X, Y, G = torch.tensor(X).float(), torch.tensor(Y).float(), torch.tensor(G)

    too_short = set(g for g in set(G.tolist()) if (g==G).sum() < 50)
    all_equal = set(g for g in set(G.tolist()) if any(len(set(y.tolist()))==1 for y in Y[g==G].T))

    if any(all_equal): print(f"All Equal, no environment added for {sorted(all_equal)}")

    for rng,g in product(range(R),sorted(set(G.tolist())-all_equal-too_short)):
        _X, _Y = X[g==G], Y[g==G]
        try:
            trn,tst = next(StratifiedShuffleSplit(1,train_size=35/len(_X),random_state=rng).split(_X,_Y))

            yield MyEnvironment( X[g==G][trn], Y[g==G][trn], X[g==G][tst], Y[g==G][tst], 'self', g, rng)

        except ValueError as e:
            if 'The least populated class in y has only 1 member' in str(e): continue
            raise

w = 45

lrns = [ None ]
envs = list(make_envs(X3,np.hstack([Y1,Y2]),G,5))
vals = lambda x: [
    MyEvaluator((), (len(x),w,'l','r',w,1), 0, 0, 0, 0, 4, [1], 2),
    MyEvaluator((len(x),w,'l','r'), (w,1), 1, 1, 0, .5, 4, [1], 2),
    MyEvaluator((len(x),w,'l','r'), (w,1), 2, 1, 0, .5, 4, [1], 2),
    MyEvaluator((len(x),w,'l','r'), (w,1), 1, 2, 0, .5, 4, [1], 2),
]

cb.Experiment(envs,lrns,vals(X3[0])).run('../logs/6/4.log',processes=35,quiet=True)

All Equal, no environment added for [54, 57, 81, 82, 87, 89, 97, 99, 103, 111, 117, 118, 121, 122, 137, 146, 181, 196, 203, 206, 227, 231, 246, 251, 293, 311, 357, 371, 373, 374, 377, 382, 390, 402, 405, 426, 442, 447, 451, 452, 455, 463, 469, 486, 536]
{'Learners': 1, 'Environments': 750, 'Interactions': 6000}


In [21]:
import coba as cb

class FeedForward(torch.nn.Sequential):
    """A Generic implementation of Feedforward Neural Network"""

    class SkipModule(torch.nn.Module):
        def __init__(self, layers):
            super().__init__()
            self.layers = layers
        def forward(self,X):
            return X + self.layers(X)

    def make_layer(self,curr_dim,spec):
        if isinstance(spec,float):
            return torch.nn.Dropout(spec), curr_dim
        if curr_dim is None and isinstance(spec,int):
            return None, spec
        if isinstance(spec,int):
            return torch.nn.Linear(curr_dim,spec),spec
        if spec == 'r':
            return torch.nn.ReLU(),curr_dim
        if spec == 'l':
            return torch.nn.LayerNorm(curr_dim),curr_dim
        if spec == 'b':
            return torch.nn.BatchNorm1d(curr_dim), curr_dim
        if spec == 's':
            return torch.nn.Sigmoid(),curr_dim
        if isinstance(spec,list):                
            return FeedForward.SkipModule(FeedForward([curr_dim] + spec)), curr_dim
        raise Exception("Bad Layer")

    def __init__(self, specs, rng=1):
        """Instantiate a Feedfoward network according to specifications.

        Args:
            specs: A sequence of layer specifications as follows:
                -1 -- replaced with the input feature width
                <int> -- a LinearLayer with output width equal to <int>
                [0,1] -- a Dropout layer with the given probability
                'l' -- a LayerNorm
                'b' -- a BatchNorm1d
                'r' -- a ReLU layer
                's' -- a Sigmoid layer
                [] -- a skip layer with the given specifications
        """

        torch.manual_seed(rng)
        layers,width = [],None
        for spec in specs:
            layer,width = self.make_layer(width,spec)
            if layer: layers.append(layer)
        super().__init__(*(layers or [torch.nn.Identity()]))
        self.params = {"specs": specs, "rng": rng }

class MyEnvironment:
    def __init__(self, train_X, train_Y, test_X, test_Y, trn, g, rng):
        self.params = {'pid': g, 'rng': rng, 'trn':trn}
        self.train_X = train_X
        self.train_Y = train_Y.float()
        self.test_X = test_X
        self.test_Y = test_Y.float()

    def ssl(self,neg,sr,yi):
        from itertools import compress, repeat, chain
        from operator import eq

        rng = cb.CobaRandom(self.params['rng'])
        rng_order = rng.shuffle(range(len(self.train_X)))

        X = self.train_X.tolist()
        Y = self.train_Y[:,yi]
        Y = list(map(tuple,Y.tolist()))

        X = list(map(X.__getitem__,rng_order))
        Y = list(map(Y.__getitem__,rng_order))

        eq_class  = {y: list(compress(X,map(eq,Y,repeat(y)))) for y in set(Y)}
        ne_class  = {y: list(chain(*[v for k,v in eq_class.items() if k != y ])) for y in set(Y)}

        def choose_unique(items,given_i):
            if len(items) == 1:  return items[0]
            for i in rng.randints(None,0,len(items)-1):
                if i != given_i:
                    return items[i]

        def choose_n(items,n):
            add_to_index = (indexes := set()).add if len(items) > n else (indexes := []).append
            for i in rng.randints(None,0,len(items)-1):
                add_to_index(i)
                if len(indexes)==n:
                    return [items[i] for i in indexes]

        if sr < 1:
            anchor, positive, negative = [], [], []

            for i in range(int(len(X)*sr)):
                x,y = X[i],Y[i]
                anchor.append(x)
                positive.append(choose_unique(eq_class[y],i))
                negative.append(choose_n     (ne_class[y],neg))
            yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

        else:
            for _ in range(sr):
                anchor, positive, negative = [], [], []
                for i in range(len(X)):
                    x,y = X[i],Y[i]
                    anchor.append(x)
                    positive.append(choose_unique(eq_class[y],i))
                    negative.append(choose_n     (ne_class[y],neg))

                yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

    def train(self):
        return self.train_X, self.train_Y

    def test(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.test_X)))
        return self.test_X[rng_indexes,:], self.test_Y[rng_indexes]

class MyEvaluator:
    def __init__(self, s1, s2, ssl_samps, ssl_neg, ssl_dropn, ssl_tau, ws_steps, y, n_models):

        self.s1  = s1  #ssl + sl
        self.s2  = s2  #sl

        self.ssl_samps = ssl_samps
        self.ssl_neg   = ssl_neg
        self.ssl_tau   = ssl_tau
        self.ssl_dropn = ssl_dropn

        self.ws_steps = ws_steps
        self.n_models = n_models

        self.y = y

        self.params = { 's1': s1, 's2':s2, 'ssl': (ssl_samps,ssl_neg,ssl_dropn,ssl_tau), 'ws': ws_steps, 'y': y, 'n_models': n_models}

    def evaluate(self, env, lrn):
        from sklearn.metrics import roc_auc_score

        torch.set_num_threads(1)
        torch.manual_seed(1)

        mods_opts = []
        opts = []

        for _ in range(self.n_models):
            s1 = FeedForward(self.s1)
            s2 = FeedForward(self.s2)
            sb = torch.nn.Sequential(*list(s1.children())[len(self.s1)-self.ssl_dropn:])
            s1 = torch.nn.Sequential(*list(s1.children())[:len(self.s1)-self.ssl_dropn])

            s1opt = COCOB(s1.parameters()) if list(s1.parameters()) else None
            sbopt = COCOB(sb.parameters()) if list(sb.parameters()) else None
            s2opt = COCOB(s2.parameters()) if list(s2.parameters()) else None

            mods = [s1,sb,s2]
            opts = [s1opt,sbopt,s2opt]
            mods_opts.append([mods,opts])

        for mods,_ in mods_opts:
            for l in mods: l.train()

        for mods,opts in mods_opts:
            [s1,sb,s2] = mods
            [s1opt,sbopt,s2opt] = opts

            if self.ssl_samps:

                if self.ssl_neg == 0: raise Exception("neg can't be 0")
                if self.ssl_tau == 0: raise Exception("Tau can't be 0")

                for A, P, N in env.ssl(self.ssl_neg,self.ssl_samps,self.y):

                    torch_dataset = torch.utils.data.TensorDataset(A,P,N)
                    torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                    for _A, _P, _N in torch_loader:
                        #https://arxiv.org/pdf/2002.05709
                        _A = sb(s1(_A.nan_to_num()))
                        _P = sb(s1(_P.nan_to_num()))
                        _N = sb(s1(_N.nan_to_num()))

                        p = torch.einsum("bi,bi->b",_A,_P)
                        n = torch.einsum("bi,bji->bj",_A,_N)

                        p /= (torch.linalg.norm(_A,dim=1)*torch.linalg.norm(_P,dim=1))
                        n /= (torch.linalg.norm(_A,dim=1).unsqueeze(1)*torch.linalg.norm(_N,dim=2))

                        p = torch.exp(p/self.ssl_tau)
                        n = torch.exp(n/self.ssl_tau)

                        if s1opt: s1opt.zero_grad()
                        if sbopt: sbopt.zero_grad()
                        (-torch.log(p/(p+n.sum(dim=1)))).mean().backward()
                        if s1opt: s1opt.step()
                        if sbopt: sbopt.step()

            if self.ws_steps:

                if s1opt: s1opt.zero_grad()
                if s2opt: s2opt.zero_grad()

                X, Y = env.train()
                Y = Y[:,self.y]

                torch_dataset = torch.utils.data.TensorDataset(X,Y)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps):
                    for _X,_y in torch_loader:

                        if s1opt: s1opt.zero_grad()
                        if s2opt: s2opt.zero_grad()
                        loss(s2(s1(_X.nan_to_num())),_y).backward()
                        if s2opt: s2opt.step()
                        if s1opt: s1opt.step()

        for mods,_ in mods_opts:
            for l in mods: l.eval()

        X, Y = env.test()
        Y = Y[:,self.y]

        def predict(X):
            preds = 0
            for mods, _ in mods_opts:
                [s1,_,s2] = mods
                preds += torch.sigmoid(s2(s1(X.nan_to_num())))
            return preds/len(mods_opts)

        def score(X,Y):
            with torch.no_grad():
                return {f'auc{y}': roc_auc_score(Y[:,i],predict(X)[:,i]) for i,y in enumerate(self.y) }

        yield score(X, Y)
        yield score(X, Y)

def make_envs(X, Y, G, R):
    X, Y, G = torch.tensor(X).float(), torch.tensor(Y).float(), torch.tensor(G)

    too_short = set(g for g in set(G.tolist()) if (g==G).sum() < 50)
    all_equal = set(g for g in set(G.tolist()) if any(len(set(y.tolist()))==1 for y in Y[g==G].T))

    if any(all_equal): print(f"All Equal, no environment added for {sorted(all_equal)}")

    for rng,g in product(range(R),sorted(set(G.tolist())-all_equal-too_short)):
        _X, _Y = X[g==G], Y[g==G]
        try:
            trn,tst = next(StratifiedShuffleSplit(1,train_size=35/len(_X),random_state=rng).split(_X,_Y))

            yield MyEnvironment( X[g==G][trn], Y[g==G][trn], X[g==G][tst], Y[g==G][tst], 'self', g, rng)

        except ValueError as e:
            if 'The least populated class in y has only 1 member' in str(e): continue
            raise

w = 45

lrns = [ None ]
envs = list(make_envs(X3,np.hstack([Y1,Y2]),G,5))
vals = lambda x: [
    MyEvaluator((), (len(x),w,'l','r',w,'l','r',w,1), 0, 0, 0, 0, 4, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r'), (w,1), 1, 1, 0, .5, 4, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r'), (w,1), 2, 1, 0, .5, 4, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r'), (w,1), 1, 2, 0, .5, 4, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r'), (w,1), 1, 2, 3, .5, 4, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r'), (w,1), 1, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r'), (w,1), 1, 2, 3, .5, 2, [1], 2),
    MyEvaluator((), (len(x),w,'l','r',w,'l','r',w,1), 0, 0, 0, 0, 3, [1], 2),
    MyEvaluator((), (len(x),w,'l','r',w,'l','r',w,1), 0, 0, 0, 0, 2, [1], 2),
    MyEvaluator((), (len(x),w,'l','r',w,'l','r',w,1), 0, 0, 0, 0, 1, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r'), (w,1), 1, 2, 0, .5, 1, [1], 2),
]

cb.Experiment(envs,lrns,vals(X3[0])).run('../logs/6/5.log',processes=35,quiet=True)

All Equal, no environment added for [54, 57, 81, 82, 87, 89, 97, 99, 103, 111, 117, 118, 121, 122, 137, 146, 181, 196, 203, 206, 227, 231, 246, 251, 293, 311, 357, 371, 373, 374, 377, 382, 390, 402, 405, 426, 442, 447, 451, 452, 455, 463, 469, 486, 536]
{'Learners': 1, 'Environments': 750, 'Interactions': 16500}


In [23]:
import coba as cb

class FeedForward(torch.nn.Sequential):
    """A Generic implementation of Feedforward Neural Network"""

    class SkipModule(torch.nn.Module):
        def __init__(self, layers):
            super().__init__()
            self.layers = layers
        def forward(self,X):
            return X + self.layers(X)

    def make_layer(self,curr_dim,spec):
        if isinstance(spec,float):
            return torch.nn.Dropout(spec), curr_dim
        if curr_dim is None and isinstance(spec,int):
            return None, spec
        if isinstance(spec,int):
            return torch.nn.Linear(curr_dim,spec),spec
        if spec == 'r':
            return torch.nn.ReLU(),curr_dim
        if spec == 'l':
            return torch.nn.LayerNorm(curr_dim),curr_dim
        if spec == 'b':
            return torch.nn.BatchNorm1d(curr_dim), curr_dim
        if spec == 's':
            return torch.nn.Sigmoid(),curr_dim
        if isinstance(spec,list):                
            return FeedForward.SkipModule(FeedForward([curr_dim] + spec)), curr_dim
        raise Exception("Bad Layer")

    def __init__(self, specs, rng=1):
        """Instantiate a Feedfoward network according to specifications.

        Args:
            specs: A sequence of layer specifications as follows:
                -1 -- replaced with the input feature width
                <int> -- a LinearLayer with output width equal to <int>
                [0,1] -- a Dropout layer with the given probability
                'l' -- a LayerNorm
                'b' -- a BatchNorm1d
                'r' -- a ReLU layer
                's' -- a Sigmoid layer
                [] -- a skip layer with the given specifications
        """

        torch.manual_seed(rng)
        layers,width = [],None
        for spec in specs:
            layer,width = self.make_layer(width,spec)
            if layer: layers.append(layer)
        super().__init__(*(layers or [torch.nn.Identity()]))
        self.params = {"specs": specs, "rng": rng }

class MyEnvironment:
    def __init__(self, train_X, train_Y, test_X, test_Y, trn, g, rng):
        self.params = {'pid': g, 'rng': rng, 'trn':trn}
        self.train_X = train_X
        self.train_Y = train_Y.float()
        self.test_X = test_X
        self.test_Y = test_Y.float()

    def ssl(self,neg,sr,yi):
        from itertools import compress, repeat, chain
        from operator import eq

        rng = cb.CobaRandom(self.params['rng'])
        rng_order = rng.shuffle(range(len(self.train_X)))

        X = self.train_X.tolist()
        Y = self.train_Y[:,yi]
        Y = list(map(tuple,Y.tolist()))

        X = list(map(X.__getitem__,rng_order))
        Y = list(map(Y.__getitem__,rng_order))

        eq_class  = {y: list(compress(X,map(eq,Y,repeat(y)))) for y in set(Y)}
        ne_class  = {y: list(chain(*[v for k,v in eq_class.items() if k != y ])) for y in set(Y)}

        def choose_unique(items,given_i):
            if len(items) == 1:  return items[0]
            for i in rng.randints(None,0,len(items)-1):
                if i != given_i:
                    return items[i]

        def choose_n(items,n):
            add_to_index = (indexes := set()).add if len(items) > n else (indexes := []).append
            for i in rng.randints(None,0,len(items)-1):
                add_to_index(i)
                if len(indexes)==n:
                    return [items[i] for i in indexes]

        if sr < 1:
            anchor, positive, negative = [], [], []

            for i in range(int(len(X)*sr)):
                x,y = X[i],Y[i]
                anchor.append(x)
                positive.append(choose_unique(eq_class[y],i))
                negative.append(choose_n     (ne_class[y],neg))
            yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

        else:
            for _ in range(sr):
                anchor, positive, negative = [], [], []
                for i in range(len(X)):
                    x,y = X[i],Y[i]
                    anchor.append(x)
                    positive.append(choose_unique(eq_class[y],i))
                    negative.append(choose_n     (ne_class[y],neg))

                yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

    def train(self):
        return self.train_X, self.train_Y

    def test(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.test_X)))
        return self.test_X[rng_indexes,:], self.test_Y[rng_indexes]

class MyEvaluator:
    def __init__(self, s1, s2, ssl_samps, ssl_neg, ssl_dropn, ssl_tau, ws_steps, y, n_models):

        self.s1  = s1  #ssl + sl
        self.s2  = s2  #sl

        self.ssl_samps = ssl_samps
        self.ssl_neg   = ssl_neg
        self.ssl_tau   = ssl_tau
        self.ssl_dropn = ssl_dropn

        self.ws_steps = ws_steps
        self.n_models = n_models

        self.y = y

        self.params = { 's1': s1, 's2':s2, 'ssl': (ssl_samps,ssl_neg,ssl_dropn,ssl_tau), 'ws': ws_steps, 'y': y, 'n_models': n_models}

    def evaluate(self, env, lrn):
        from sklearn.metrics import roc_auc_score

        torch.set_num_threads(1)
        torch.manual_seed(1)

        mods_opts = []
        opts = []

        for _ in range(self.n_models):
            s1 = FeedForward(self.s1)
            s2 = FeedForward(self.s2)
            sb = torch.nn.Sequential(*list(s1.children())[len(self.s1)-self.ssl_dropn:])
            s1 = torch.nn.Sequential(*list(s1.children())[:len(self.s1)-self.ssl_dropn])

            s1opt = COCOB(s1.parameters()) if list(s1.parameters()) else None
            sbopt = COCOB(sb.parameters()) if list(sb.parameters()) else None
            s2opt = COCOB(s2.parameters()) if list(s2.parameters()) else None

            mods = [s1,sb,s2]
            opts = [s1opt,sbopt,s2opt]
            mods_opts.append([mods,opts])

        for mods,_ in mods_opts:
            for l in mods: l.train()

        for mods,opts in mods_opts:
            [s1,sb,s2] = mods
            [s1opt,sbopt,s2opt] = opts

            if self.ssl_samps:

                if self.ssl_neg == 0: raise Exception("neg can't be 0")
                if self.ssl_tau == 0: raise Exception("Tau can't be 0")

                for A, P, N in env.ssl(self.ssl_neg,self.ssl_samps,self.y):

                    torch_dataset = torch.utils.data.TensorDataset(A,P,N)
                    torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                    for _A, _P, _N in torch_loader:
                        #https://arxiv.org/pdf/2002.05709
                        _A = sb(s1(_A.nan_to_num()))
                        _P = sb(s1(_P.nan_to_num()))
                        _N = sb(s1(_N.nan_to_num()))

                        p = torch.einsum("bi,bi->b",_A,_P)
                        n = torch.einsum("bi,bji->bj",_A,_N)

                        p /= (torch.linalg.norm(_A,dim=1)*torch.linalg.norm(_P,dim=1))
                        n /= (torch.linalg.norm(_A,dim=1).unsqueeze(1)*torch.linalg.norm(_N,dim=2))

                        p = torch.exp(p/self.ssl_tau)
                        n = torch.exp(n/self.ssl_tau)

                        if s1opt: s1opt.zero_grad()
                        if sbopt: sbopt.zero_grad()
                        (-torch.log(p/(p+n.sum(dim=1)))).mean().backward()
                        if s1opt: s1opt.step()
                        if sbopt: sbopt.step()

            if self.ws_steps:

                if s1opt: s1opt.zero_grad()
                if s2opt: s2opt.zero_grad()

                X, Y = env.train()
                Y = Y[:,self.y]

                torch_dataset = torch.utils.data.TensorDataset(X,Y)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps):
                    for _X,_y in torch_loader:

                        if s1opt: s1opt.zero_grad()
                        if s2opt: s2opt.zero_grad()
                        loss(s2(s1(_X.nan_to_num())),_y).backward()
                        if s2opt: s2opt.step()
                        if s1opt: s1opt.step()

        for mods,_ in mods_opts:
            for l in mods: l.eval()

        X, Y = env.test()
        Y = Y[:,self.y]

        def predict(X):
            preds = 0
            for mods, _ in mods_opts:
                [s1,_,s2] = mods
                preds += torch.sigmoid(s2(s1(X.nan_to_num())))
            return preds/len(mods_opts)

        def score(X,Y):
            with torch.no_grad():
                return {f'auc{y}': roc_auc_score(Y[:,i],predict(X)[:,i]) for i,y in enumerate(self.y) }

        yield score(X, Y)
        yield score(X, Y)

def make_envs(X, Y, G, R):
    X, Y, G = torch.tensor(X).float(), torch.tensor(Y).float(), torch.tensor(G)

    too_short = set(g for g in set(G.tolist()) if (g==G).sum() < 50)
    all_equal = set(g for g in set(G.tolist()) if any(len(set(y.tolist()))==1 for y in Y[g==G].T))

    if any(all_equal): print(f"All Equal, no environment added for {sorted(all_equal)}")

    for rng,g in product(range(R),sorted(set(G.tolist())-all_equal-too_short)):
        _X, _Y = X[g==G], Y[g==G]
        try:
            trn,tst = next(StratifiedShuffleSplit(1,train_size=35/len(_X),random_state=rng).split(_X,_Y))

            yield MyEnvironment( X[g==G][trn], Y[g==G][trn], X[g==G][tst], Y[g==G][tst], 'self', g, rng)

        except ValueError as e:
            if 'The least populated class in y has only 1 member' in str(e): continue
            raise

w = 45

lrns = [ None ]
envs = list(make_envs(X3,np.hstack([Y1,Y2]),G,5))
vals = lambda x: [
    MyEvaluator((len(x),w,'l','r',w,'l','r'), (w,1), 1, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 0, .5, 2, [1], 2),
]

cb.Experiment(envs,lrns,vals(X3[0])).run('../logs/6/6.log',processes=35,quiet=True)

All Equal, no environment added for [54, 57, 81, 82, 87, 89, 97, 99, 103, 111, 117, 118, 121, 122, 137, 146, 181, 196, 203, 206, 227, 231, 246, 251, 293, 311, 357, 371, 373, 374, 377, 382, 390, 402, 405, 426, 442, 447, 451, 452, 455, 463, 469, 486, 536]
{'Learners': 1, 'Environments': 750, 'Interactions': 6000}


In [35]:
import coba as cb

class FeedForward(torch.nn.Sequential):
    """A Generic implementation of Feedforward Neural Network"""

    class SkipModule(torch.nn.Module):
        def __init__(self, layers):
            super().__init__()
            self.layers = layers
        def forward(self,X):
            return X + self.layers(X)

    def make_layer(self,curr_dim,spec):
        if isinstance(spec,float):
            return torch.nn.Dropout(spec), curr_dim
        if curr_dim is None and isinstance(spec,int):
            return None, spec
        if isinstance(spec,int):
            return torch.nn.Linear(curr_dim,spec),spec
        if spec == 'r':
            return torch.nn.ReLU(),curr_dim
        if spec == 'l':
            return torch.nn.LayerNorm(curr_dim),curr_dim
        if spec == 'b':
            return torch.nn.BatchNorm1d(curr_dim), curr_dim
        if spec == 's':
            return torch.nn.Sigmoid(),curr_dim
        if isinstance(spec,list):                
            return FeedForward.SkipModule(FeedForward([curr_dim] + spec)), curr_dim
        raise Exception("Bad Layer")

    def __init__(self, specs, rng=1):
        """Instantiate a Feedfoward network according to specifications.

        Args:
            specs: A sequence of layer specifications as follows:
                -1 -- replaced with the input feature width
                <int> -- a LinearLayer with output width equal to <int>
                [0,1] -- a Dropout layer with the given probability
                'l' -- a LayerNorm
                'b' -- a BatchNorm1d
                'r' -- a ReLU layer
                's' -- a Sigmoid layer
                [] -- a skip layer with the given specifications
        """

        torch.manual_seed(rng)
        layers,width = [],None
        for spec in specs:
            layer,width = self.make_layer(width,spec)
            if layer: layers.append(layer)
        super().__init__(*(layers or [torch.nn.Identity()]))
        self.params = {"specs": specs, "rng": rng }

class MyEnvironment:
    def __init__(self, train_X, train_Y, test_X, test_Y, trn, g, rng):
        self.params = {'pid': g, 'rng': rng, 'trn':trn}
        self.train_X = train_X
        self.train_Y = train_Y.float()
        self.test_X = test_X
        self.test_Y = test_Y.float()

    def ssl(self,neg,sr,yi):
        from itertools import compress, repeat, chain
        from operator import eq

        rng = cb.CobaRandom(self.params['rng'])
        rng_order = rng.shuffle(range(len(self.train_X)))

        X = self.train_X.tolist()
        Y = self.train_Y[:,yi]
        Y = list(map(tuple,Y.tolist()))

        X = list(map(X.__getitem__,rng_order))
        Y = list(map(Y.__getitem__,rng_order))

        eq_class  = {y: list(compress(X,map(eq,Y,repeat(y)))) for y in set(Y)}
        ne_class  = {y: list(chain(*[v for k,v in eq_class.items() if k != y ])) for y in set(Y)}

        def choose_unique(items,given_i):
            if len(items) == 1:  return items[0]
            for i in rng.randints(None,0,len(items)-1):
                if i != given_i:
                    return items[i]

        def choose_n(items,n):
            add_to_index = (indexes := set()).add if len(items) > n else (indexes := []).append
            for i in rng.randints(None,0,len(items)-1):
                add_to_index(i)
                if len(indexes)==n:
                    return [items[i] for i in indexes]

        if sr < 1:
            anchor, positive, negative = [], [], []

            for i in range(int(len(X)*sr)):
                x,y = X[i],Y[i]
                anchor.append(x)
                positive.append(choose_unique(eq_class[y],i))
                negative.append(choose_n     (ne_class[y],neg))
            yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

        else:
            for _ in range(sr):
                anchor, positive, negative = [], [], []
                for i in range(len(X)):
                    x,y = X[i],Y[i]
                    anchor.append(x)
                    positive.append(choose_unique(eq_class[y],i))
                    negative.append(choose_n     (ne_class[y],neg))

                yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

    def train(self):
        return self.train_X, self.train_Y

    def test(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.test_X)))
        return self.test_X[rng_indexes,:], self.test_Y[rng_indexes]

class MyEvaluator:
    def __init__(self, s1, s2, ssl_samps, ssl_neg, ssl_dropn, ssl_tau, ws_steps, y, n_models):

        self.s1  = s1  #ssl + sl
        self.s2  = s2  #sl

        self.ssl_samps = ssl_samps
        self.ssl_neg   = ssl_neg
        self.ssl_tau   = ssl_tau
        self.ssl_dropn = ssl_dropn

        self.ws_steps = ws_steps
        self.n_models = n_models

        self.y = y

        self.params = { 's1': s1, 's2':s2, 'ssl': (ssl_samps,ssl_neg,ssl_dropn,ssl_tau), 'ws': ws_steps, 'y': y, 'n_models': n_models}

    def evaluate(self, env, lrn):
        from sklearn.metrics import roc_auc_score

        torch.set_num_threads(1)
        torch.manual_seed(1)

        mods_opts = []
        opts = []

        for _ in range(self.n_models):
            s1 = FeedForward(self.s1)
            s2 = FeedForward(self.s2)
            sb = torch.nn.Sequential(*list(s1.children())[len(self.s1)-self.ssl_dropn:])
            s1 = torch.nn.Sequential(*list(s1.children())[:len(self.s1)-self.ssl_dropn])

            s1opt = COCOB(s1.parameters()) if list(s1.parameters()) else None
            sbopt = COCOB(sb.parameters()) if list(sb.parameters()) else None
            s2opt = COCOB(s2.parameters()) if list(s2.parameters()) else None

            mods = [s1,sb,s2]
            opts = [s1opt,sbopt,s2opt]
            mods_opts.append([mods,opts])

        for mods,_ in mods_opts:
            for l in mods: l.train()

        for mods,opts in mods_opts:
            [s1,sb,s2] = mods
            [s1opt,sbopt,s2opt] = opts

            if self.ssl_samps:

                if self.ssl_neg == 0: raise Exception("neg can't be 0")
                if self.ssl_tau == 0: raise Exception("Tau can't be 0")

                for A, P, N in env.ssl(self.ssl_neg,self.ssl_samps,self.y):

                    torch_dataset = torch.utils.data.TensorDataset(A,P,N)
                    torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                    for _A, _P, _N in torch_loader:
                        #https://arxiv.org/pdf/2002.05709
                        _A = sb(s1(_A.nan_to_num()))
                        _P = sb(s1(_P.nan_to_num()))
                        _N = sb(s1(_N.nan_to_num()))

                        p = torch.einsum("bi,bi->b",_A,_P)
                        n = torch.einsum("bi,bji->bj",_A,_N)

                        p /= (torch.linalg.norm(_A,dim=1)*torch.linalg.norm(_P,dim=1))
                        n /= (torch.linalg.norm(_A,dim=1).unsqueeze(1)*torch.linalg.norm(_N,dim=2))

                        p = torch.exp(p/self.ssl_tau)
                        n = torch.exp(n/self.ssl_tau)

                        if s1opt: s1opt.zero_grad()
                        if sbopt: sbopt.zero_grad()
                        (-torch.log(p/(p+n.sum(dim=1)))).mean().backward()
                        if s1opt: s1opt.step()
                        if sbopt: sbopt.step()

            if self.ws_steps:

                if s1opt: s1opt.zero_grad()
                if s2opt: s2opt.zero_grad()

                X, Y = env.train()
                Y = Y[:,self.y]

                torch_dataset = torch.utils.data.TensorDataset(X,Y)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps):
                    for _X,_y in torch_loader:

                        if s1opt: s1opt.zero_grad()
                        if s2opt: s2opt.zero_grad()
                        loss(s2(s1(_X.nan_to_num())),_y).backward()
                        if s2opt: s2opt.step()
                        if s1opt: s1opt.step()

        for mods,_ in mods_opts:
            for l in mods: l.eval()

        X, Y = env.test()
        Y = Y[:,self.y]

        def predict(X):
            preds = 0
            for mods, _ in mods_opts:
                [s1,_,s2] = mods
                preds += torch.sigmoid(s2(s1(X.nan_to_num())))
            return preds/len(mods_opts)

        def score(X,Y):
            with torch.no_grad():
                return {f'auc{y}': roc_auc_score(Y[:,i],predict(X)[:,i]) for i,y in enumerate(self.y) }

        yield score(X, Y)
        yield score(X, Y)

def make_envs(X, Y, G, R):
    X, Y, G = torch.tensor(X).float(), torch.tensor(Y).float(), torch.tensor(G)

    too_short = set(g for g in set(G.tolist()) if (g==G).sum() < 50)
    all_equal = set(g for g in set(G.tolist()) if any(len(set(y.tolist()))==1 for y in Y[g==G].T))

    if any(all_equal): print(f"All Equal, no environment added for {sorted(all_equal)}")

    for rng,g in product(range(R),sorted(set(G.tolist())-all_equal-too_short)):
        _X, _Y = X[g==G], Y[g==G]
        try:
            trn,tst = next(StratifiedShuffleSplit(1,train_size=35/len(_X),random_state=rng).split(_X,_Y))

            yield MyEnvironment( X[g==G][trn], Y[g==G][trn], X[g==G][tst], Y[g==G][tst], 'self', g, rng)

        except ValueError as e:
            if 'The least populated class in y has only 1 member' in str(e): continue
            raise

w = 45

lrns = [ None ]
envs = list(make_envs(X3,np.hstack([Y1,Y2]),G,5))
vals = lambda x: [
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 2, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 2, 2, 0, .75, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 2, 2, 3, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 3, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 6, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 0, .25, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 6, .5, 2, [1], 2),
    MyEvaluator((len(x),.2,w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 6, .5, 2, [1], 2),
    MyEvaluator((len(x),.2,w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 6, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r',w,1), (), 1, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r',1), (), 1, 2, 0, .5, 2, [1], 2),
]

cb.Experiment(envs,lrns,vals(X3[0])).run('../logs/6/7.log',processes=35,quiet=True)

All Equal, no environment added for [54, 57, 81, 82, 87, 89, 97, 99, 103, 111, 117, 118, 121, 122, 137, 146, 181, 196, 203, 206, 227, 231, 246, 251, 293, 311, 357, 371, 373, 374, 377, 382, 390, 402, 405, 426, 442, 447, 451, 452, 455, 463, 469, 486, 536]
{'Learners': 1, 'Environments': 750, 'Interactions': 18000}


In [39]:
import coba as cb

class FeedForward(torch.nn.Sequential):
    """A Generic implementation of Feedforward Neural Network"""

    class SkipModule(torch.nn.Module):
        def __init__(self, layers):
            super().__init__()
            self.layers = layers
        def forward(self,X):
            return X + self.layers(X)

    def make_layer(self,curr_dim,spec):
        if isinstance(spec,float):
            return torch.nn.Dropout(spec), curr_dim
        if curr_dim is None and isinstance(spec,int):
            return None, spec
        if isinstance(spec,int):
            return torch.nn.Linear(curr_dim,spec),spec
        if spec == 'r':
            return torch.nn.ReLU(),curr_dim
        if spec == 'l':
            return torch.nn.LayerNorm(curr_dim),curr_dim
        if spec == 'b':
            return torch.nn.BatchNorm1d(curr_dim), curr_dim
        if spec == 's':
            return torch.nn.Sigmoid(),curr_dim
        if isinstance(spec,list):                
            return FeedForward.SkipModule(FeedForward([curr_dim] + spec)), curr_dim
        raise Exception("Bad Layer")

    def __init__(self, specs, rng=1):
        """Instantiate a Feedfoward network according to specifications.

        Args:
            specs: A sequence of layer specifications as follows:
                -1 -- replaced with the input feature width
                <int> -- a LinearLayer with output width equal to <int>
                [0,1] -- a Dropout layer with the given probability
                'l' -- a LayerNorm
                'b' -- a BatchNorm1d
                'r' -- a ReLU layer
                's' -- a Sigmoid layer
                [] -- a skip layer with the given specifications
        """

        torch.manual_seed(rng)
        layers,width = [],None
        for spec in specs:
            layer,width = self.make_layer(width,spec)
            if layer: layers.append(layer)
        super().__init__(*(layers or [torch.nn.Identity()]))
        self.params = {"specs": specs, "rng": rng }

class MyEnvironment:
    def __init__(self, train_X, train_Y, test_X, test_Y, trn, g, rng):
        self.params = {'pid': g, 'rng': rng, 'trn':trn}
        self.train_X = train_X
        self.train_Y = train_Y.float()
        self.test_X = test_X
        self.test_Y = test_Y.float()

    def ssl(self,neg,sr,yi):
        from itertools import compress, repeat, chain
        from operator import eq

        rng = cb.CobaRandom(self.params['rng'])
        rng_order = rng.shuffle(range(len(self.train_X)))

        X = self.train_X.tolist()
        Y = self.train_Y[:,yi]
        Y = list(map(tuple,Y.tolist()))

        X = list(map(X.__getitem__,rng_order))
        Y = list(map(Y.__getitem__,rng_order))

        eq_class  = {y: list(compress(X,map(eq,Y,repeat(y)))) for y in set(Y)}
        ne_class  = {y: list(chain(*[v for k,v in eq_class.items() if k != y ])) for y in set(Y)}

        def choose_unique(items,given_i):
            if len(items) == 1:  return items[0]
            for i in rng.randints(None,0,len(items)-1):
                if i != given_i:
                    return items[i]

        def choose_n(items,n):
            add_to_index = (indexes := set()).add if len(items) > n else (indexes := []).append
            for i in rng.randints(None,0,len(items)-1):
                add_to_index(i)
                if len(indexes)==n:
                    return [items[i] for i in indexes]

        if sr < 1:
            anchor, positive, negative = [], [], []

            for i in range(int(len(X)*sr)):
                x,y = X[i],Y[i]
                anchor.append(x)
                positive.append(choose_unique(eq_class[y],i))
                negative.append(choose_n     (ne_class[y],neg))
            yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

        else:
            for _ in range(sr):
                anchor, positive, negative = [], [], []
                for i in range(len(X)):
                    x,y = X[i],Y[i]
                    anchor.append(x)
                    positive.append(choose_unique(eq_class[y],i))
                    negative.append(choose_n     (ne_class[y],neg))

                yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

    def train(self):
        return self.train_X, self.train_Y

    def test(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.test_X)))
        return self.test_X[rng_indexes,:], self.test_Y[rng_indexes]

class MyEvaluator:
    def __init__(self, s1, s2, ssl_samps, ssl_neg, ssl_dropn, ssl_tau, ws_steps, y, n_models):

        self.s1  = s1  #ssl + sl
        self.s2  = s2  #sl

        self.ssl_samps = ssl_samps
        self.ssl_neg   = ssl_neg
        self.ssl_tau   = ssl_tau
        self.ssl_dropn = ssl_dropn

        self.ws_steps = ws_steps
        self.n_models = n_models

        self.y = y

        self.params = { 's1': s1, 's2':s2, 'ssl': (ssl_samps,ssl_neg,ssl_dropn,ssl_tau), 'ws': ws_steps, 'y': y, 'n_models': n_models}

    def evaluate(self, env, lrn):
        from sklearn.metrics import roc_auc_score

        torch.set_num_threads(1)
        torch.manual_seed(1)

        mods_opts = []
        opts = []

        for _ in range(self.n_models):
            s1 = FeedForward(self.s1)
            s2 = FeedForward(self.s2)
            sb = torch.nn.Sequential(*list(s1.children())[len(self.s1)-self.ssl_dropn:])
            s1 = torch.nn.Sequential(*list(s1.children())[:len(self.s1)-self.ssl_dropn])

            s1opt = COCOB(s1.parameters()) if list(s1.parameters()) else None
            sbopt = COCOB(sb.parameters()) if list(sb.parameters()) else None
            s2opt = COCOB(s2.parameters()) if list(s2.parameters()) else None

            mods = [s1,sb,s2]
            opts = [s1opt,sbopt,s2opt]
            mods_opts.append([mods,opts])

        for mods,_ in mods_opts:
            for l in mods: l.train()

        for mods,opts in mods_opts:
            [s1,sb,s2] = mods
            [s1opt,sbopt,s2opt] = opts

            if self.ssl_samps:

                if self.ssl_neg == 0: raise Exception("neg can't be 0")
                if self.ssl_tau == 0: raise Exception("Tau can't be 0")

                for A, P, N in env.ssl(self.ssl_neg,self.ssl_samps,self.y):

                    torch_dataset = torch.utils.data.TensorDataset(A,P,N)
                    torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                    for _A, _P, _N in torch_loader:
                        #https://arxiv.org/pdf/2002.05709
                        _A = sb(s1(_A.nan_to_num()))
                        _P = sb(s1(_P.nan_to_num()))
                        _N = sb(s1(_N.nan_to_num()))

                        p = torch.einsum("bi,bi->b",_A,_P)
                        n = torch.einsum("bi,bji->bj",_A,_N)

                        p /= (torch.linalg.norm(_A,dim=1)*torch.linalg.norm(_P,dim=1))
                        n /= (torch.linalg.norm(_A,dim=1).unsqueeze(1)*torch.linalg.norm(_N,dim=2))

                        p = torch.exp(p/self.ssl_tau)
                        n = torch.exp(n/self.ssl_tau)

                        if s1opt: s1opt.zero_grad()
                        if sbopt: sbopt.zero_grad()
                        (-torch.log(p/(p+n.sum(dim=1)))).mean().backward()
                        if s1opt: s1opt.step()
                        if sbopt: sbopt.step()

            if self.ws_steps:

                if s1opt: s1opt.zero_grad()
                if s2opt: s2opt.zero_grad()

                X, Y = env.train()
                Y = Y[:,self.y]

                torch_dataset = torch.utils.data.TensorDataset(X,Y)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps):
                    for _X,_y in torch_loader:

                        if s1opt: s1opt.zero_grad()
                        if s2opt: s2opt.zero_grad()
                        loss(s2(s1(_X.nan_to_num())),_y).backward()
                        if s2opt: s2opt.step()
                        if s1opt: s1opt.step()

        for mods,_ in mods_opts:
            for l in mods: l.eval()

        X, Y = env.test()
        Y = Y[:,self.y]

        def predict(X):
            preds = 0
            for mods, _ in mods_opts:
                [s1,_,s2] = mods
                preds += torch.sigmoid(s2(s1(X.nan_to_num())))
            return preds/len(mods_opts)

        def score(X,Y):
            with torch.no_grad():
                return {f'auc{y}': roc_auc_score(Y[:,i],predict(X)[:,i]) for i,y in enumerate(self.y) }

        yield score(X, Y)
        yield score(X, Y)

def make_envs(X, Y, G, R):
    X, Y, G = torch.tensor(X).float(), torch.tensor(Y).float(), torch.tensor(G)

    too_short = set(g for g in set(G.tolist()) if (g==G).sum() < 50)
    all_equal = set(g for g in set(G.tolist()) if any(len(set(y.tolist()))==1 for y in Y[g==G].T))

    if any(all_equal): print(f"All Equal, no environment added for {sorted(all_equal)}")

    for rng,g in product(range(R),sorted(set(G.tolist())-all_equal-too_short)):
        _X, _Y = X[g==G], Y[g==G]
        try:
            trn,tst = next(StratifiedShuffleSplit(1,train_size=35/len(_X),random_state=rng).split(_X,_Y))

            yield MyEnvironment( X[g==G][trn], Y[g==G][trn], X[g==G][tst], Y[g==G][tst], 'self', g, rng)

        except ValueError as e:
            if 'The least populated class in y has only 1 member' in str(e): continue
            raise

w = 45
r = lambda w:  ['l', 'r', w, 'l', 'r', w]

lrns = [ None ]
envs = list(make_envs(X3,np.hstack([Y1,Y2]),G,5))
vals = lambda x: [
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 6, .5, 2, [1], 2),
    MyEvaluator((len(x),w,r(w),r(w)), (w,1), 1, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,r(w),r(w)), (w,1), 10, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,r(w),r(w)), (w,1), 10, 2, 0, 1, 2, [1], 2),
]

cb.Experiment(envs,lrns,vals(X3[0])).run('../logs/6/8.log',processes=35,quiet=True)

All Equal, no environment added for [54, 57, 81, 82, 87, 89, 97, 99, 103, 111, 117, 118, 121, 122, 137, 146, 181, 196, 203, 206, 227, 231, 246, 251, 293, 311, 357, 371, 373, 374, 377, 382, 390, 402, 405, 426, 442, 447, 451, 452, 455, 463, 469, 486, 536]
{'Learners': 1, 'Environments': 750, 'Interactions': 6000}


In [None]:
import coba as cb

class FeedForward(torch.nn.Sequential):
    """A Generic implementation of Feedforward Neural Network"""

    class SkipModule(torch.nn.Module):
        def __init__(self, layers):
            super().__init__()
            self.layers = layers
        def forward(self,X):
            return X + self.layers(X)

    def make_layer(self,curr_dim,spec):
        if isinstance(spec,float):
            return torch.nn.Dropout(spec), curr_dim
        if curr_dim is None and isinstance(spec,int):
            return None, spec
        if isinstance(spec,int):
            return torch.nn.Linear(curr_dim,spec),spec
        if spec == 'r':
            return torch.nn.ReLU(),curr_dim
        if spec == 'l':
            return torch.nn.LayerNorm(curr_dim),curr_dim
        if spec == 'b':
            return torch.nn.BatchNorm1d(curr_dim), curr_dim
        if spec == 's':
            return torch.nn.Sigmoid(),curr_dim
        if isinstance(spec,list):                
            return FeedForward.SkipModule(FeedForward([curr_dim] + spec)), curr_dim
        raise Exception("Bad Layer")

    def __init__(self, specs, rng=1):
        """Instantiate a Feedfoward network according to specifications.

        Args:
            specs: A sequence of layer specifications as follows:
                -1 -- replaced with the input feature width
                <int> -- a LinearLayer with output width equal to <int>
                [0,1] -- a Dropout layer with the given probability
                'l' -- a LayerNorm
                'b' -- a BatchNorm1d
                'r' -- a ReLU layer
                's' -- a Sigmoid layer
                [] -- a skip layer with the given specifications
        """

        torch.manual_seed(rng)
        layers,width = [],None
        for spec in specs:
            layer,width = self.make_layer(width,spec)
            if layer: layers.append(layer)
        super().__init__(*(layers or [torch.nn.Identity()]))
        self.params = {"specs": specs, "rng": rng }

class MyEnvironment:
    def __init__(self, train_X, train_Y, test_X, test_Y, trn, g, rng):
        self.params = {'pid': g, 'rng': rng, 'trn':trn}
        self.train_X = train_X
        self.train_Y = train_Y.float()
        self.test_X = test_X
        self.test_Y = test_Y.float()

    def ssl(self,neg,sr,yi):
        from itertools import compress, repeat, chain
        from operator import eq

        rng = cb.CobaRandom(self.params['rng'])
        rng_order = rng.shuffle(range(len(self.train_X)))

        X = self.train_X.tolist()
        Y = self.train_Y[:,yi]
        Y = list(map(tuple,Y.tolist()))

        X = list(map(X.__getitem__,rng_order))
        Y = list(map(Y.__getitem__,rng_order))

        eq_class  = {y: list(compress(X,map(eq,Y,repeat(y)))) for y in set(Y)}
        ne_class  = {y: list(chain(*[v for k,v in eq_class.items() if k != y ])) for y in set(Y)}

        def choose_unique(items,given_i):
            if len(items) == 1:  return items[0]
            for i in rng.randints(None,0,len(items)-1):
                if i != given_i:
                    return items[i]

        def choose_n(items,n):
            add_to_index = (indexes := set()).add if len(items) > n else (indexes := []).append
            for i in rng.randints(None,0,len(items)-1):
                add_to_index(i)
                if len(indexes)==n:
                    return [items[i] for i in indexes]

        if sr < 1:
            anchor, positive, negative = [], [], []

            for i in range(int(len(X)*sr)):
                x,y = X[i],Y[i]
                anchor.append(x)
                positive.append(choose_unique(eq_class[y],i))
                negative.append(choose_n     (ne_class[y],neg))
            yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

        else:
            for _ in range(sr):
                anchor, positive, negative = [], [], []
                for i in range(len(X)):
                    x,y = X[i],Y[i]
                    anchor.append(x)
                    positive.append(choose_unique(eq_class[y],i))
                    negative.append(choose_n     (ne_class[y],neg))

                yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

    def train(self):
        return self.train_X, self.train_Y

    def test(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.test_X)))
        return self.test_X[rng_indexes,:], self.test_Y[rng_indexes]

class MyEvaluator:
    def __init__(self, s1, s2, ssl_samps, ssl_neg, ssl_dropn, ssl_tau, ws_steps, y, n_models):

        self.s1  = s1  #ssl + sl
        self.s2  = s2  #sl

        self.ssl_samps = ssl_samps
        self.ssl_neg   = ssl_neg
        self.ssl_tau   = ssl_tau
        self.ssl_dropn = ssl_dropn

        self.ws_steps = ws_steps
        self.n_models = n_models

        self.y = y

        self.params = { 's1': s1, 's2':s2, 'ssl': (ssl_samps,ssl_neg,ssl_dropn,ssl_tau), 'ws': ws_steps, 'y': y, 'n_models': n_models}

    def evaluate(self, env, lrn):
        from sklearn.metrics import roc_auc_score

        torch.set_num_threads(1)
        torch.manual_seed(1)

        mods_opts = []
        opts = []

        for _ in range(self.n_models):
            s1 = FeedForward(self.s1)
            s2 = FeedForward(self.s2)
            sb = torch.nn.Sequential(*list(s1.children())[len(self.s1)-self.ssl_dropn:])
            s1 = torch.nn.Sequential(*list(s1.children())[:len(self.s1)-self.ssl_dropn])

            s1opt = COCOB(s1.parameters()) if list(s1.parameters()) else None
            sbopt = COCOB(sb.parameters()) if list(sb.parameters()) else None
            s2opt = COCOB(s2.parameters()) if list(s2.parameters()) else None

            mods = [s1,sb,s2]
            opts = [s1opt,sbopt,s2opt]
            mods_opts.append([mods,opts])

        for mods,_ in mods_opts:
            for l in mods: l.train()

        for mods,opts in mods_opts:
            [s1,sb,s2] = mods
            [s1opt,sbopt,s2opt] = opts

            if self.ssl_samps:

                if self.ssl_neg == 0: raise Exception("neg can't be 0")
                if self.ssl_tau == 0: raise Exception("Tau can't be 0")

                for A, P, N in env.ssl(self.ssl_neg,self.ssl_samps,self.y):

                    torch_dataset = torch.utils.data.TensorDataset(A,P,N)
                    torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                    for _A, _P, _N in torch_loader:
                        #https://arxiv.org/pdf/2002.05709
                        _A = sb(s1(_A.nan_to_num()))
                        _P = sb(s1(_P.nan_to_num()))
                        _N = sb(s1(_N.nan_to_num()))

                        p = torch.einsum("bi,bi->b",_A,_P)
                        n = torch.einsum("bi,bji->bj",_A,_N)

                        p /= (torch.linalg.norm(_A,dim=1)*torch.linalg.norm(_P,dim=1))
                        n /= (torch.linalg.norm(_A,dim=1).unsqueeze(1)*torch.linalg.norm(_N,dim=2))

                        p = torch.exp(p/self.ssl_tau)
                        n = torch.exp(n/self.ssl_tau)

                        if s1opt: s1opt.zero_grad()
                        if sbopt: sbopt.zero_grad()
                        (-torch.log(p/(p+n.sum(dim=1)))).mean().backward()
                        if s1opt: s1opt.step()
                        if sbopt: sbopt.step()

            if self.ws_steps:

                if s1opt: s1opt.zero_grad()
                if s2opt: s2opt.zero_grad()

                X, Y = env.train()
                Y = Y[:,self.y]

                torch_dataset = torch.utils.data.TensorDataset(X,Y)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps):
                    for _X,_y in torch_loader:

                        if s1opt: s1opt.zero_grad()
                        if s2opt: s2opt.zero_grad()
                        loss(s2(s1(_X.nan_to_num())),_y).backward()
                        if s2opt: s2opt.step()
                        if s1opt: s1opt.step()

        for mods,_ in mods_opts:
            for l in mods: l.eval()

        X, Y = env.test()
        Y = Y[:,self.y]

        def predict(X):
            preds = 0
            for mods, _ in mods_opts:
                [s1,_,s2] = mods
                preds += torch.sigmoid(s2(s1(X.nan_to_num())))
            return preds/len(mods_opts)

        def score(X,Y):
            with torch.no_grad():
                return {f'auc{y}': roc_auc_score(Y[:,i],predict(X)[:,i]) for i,y in enumerate(self.y) }

        yield score(X, Y)
        yield score(X, Y)

def make_envs(X, Y, G, R):
    X, Y, G = torch.tensor(X).float(), torch.tensor(Y).float(), torch.tensor(G)

    too_short = set(g for g in set(G.tolist()) if (g==G).sum() < 50)
    all_equal = set(g for g in set(G.tolist()) if any(len(set(y.tolist()))==1 for y in Y[g==G].T))

    if any(all_equal): print(f"All Equal, no environment added for {sorted(all_equal)}")

    for rng,g in product(range(R),sorted(set(G.tolist())-all_equal-too_short)):
        _X, _Y = X[g==G], Y[g==G]
        try:
            trn,tst = next(StratifiedShuffleSplit(1,train_size=35/len(_X),random_state=rng).split(_X,_Y))

            yield MyEnvironment(X[g!=G], Y[g!=G], X[g==G][tst], Y[g==G][tst], 'rest', g, rng)

        except ValueError as e:
            if 'The least populated class in y has only 1 member' in str(e): continue
            raise

w = 45
r = lambda w:  ['l', 'r', w, 'l', 'r', w]

lrns = [ None ]
envs = list(make_envs(X3,np.hstack([Y1,Y2]),G,5))
vals = lambda x: [
    MyEvaluator((len(x),w,'l','r',w,'l','r',w,'l','r'), (w,1), 1, 2, 6, .5, 2, [1], 2),
    MyEvaluator((len(x),w,r(w),r(w)), (w,1), 1, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,r(w),r(w)), (w,1), 10, 2, 0, .5, 2, [1], 2),
    MyEvaluator((len(x),w,r(w),r(w)), (w,1), 10, 2, 0, 1, 2, [1], 2),
]

cb.Experiment(envs,lrns,vals(X3[0])).run('../logs/6/8.log',processes=35,quiet=True)

In [68]:
import coba as cb

class FeedForward(torch.nn.Sequential):
    """A Generic implementation of Feedforward Neural Network"""

    class SkipModule(torch.nn.Module):
        def __init__(self, layers):
            super().__init__()
            self.layers = layers
        def forward(self,X):
            return X + self.layers(X)

    def make_layer(self,curr_dim,spec):
        if isinstance(spec,float):
            return torch.nn.Dropout(spec), curr_dim
        if curr_dim is None and isinstance(spec,int):
            return None, spec
        if isinstance(spec,int):
            return torch.nn.Linear(curr_dim,spec),spec
        if spec == 'r':
            return torch.nn.ReLU(),curr_dim
        if spec == 'l':
            return torch.nn.LayerNorm(curr_dim),curr_dim
        if spec == 'b':
            return torch.nn.BatchNorm1d(curr_dim), curr_dim
        if spec == 's':
            return torch.nn.Sigmoid(),curr_dim
        if isinstance(spec,list):                
            return FeedForward.SkipModule(FeedForward([curr_dim] + spec)), curr_dim
        raise Exception("Bad Layer")

    def __init__(self, specs, rng=1):
        """Instantiate a Feedfoward network according to specifications.

        Args:
            specs: A sequence of layer specifications as follows:
                -1 -- replaced with the input feature width
                <int> -- a LinearLayer with output width equal to <int>
                [0,1] -- a Dropout layer with the given probability
                'l' -- a LayerNorm
                'b' -- a BatchNorm1d
                'r' -- a ReLU layer
                's' -- a Sigmoid layer
                [] -- a skip layer with the given specifications
        """

        torch.manual_seed(rng)
        layers,width = [],None
        for spec in specs:
            layer,width = self.make_layer(width,spec)
            if layer: layers.append(layer)
        super().__init__(*(layers or [torch.nn.Identity()]))
        self.params = {"specs": specs, "rng": rng }

class MyEnvironment:
    def __init__(self, train_X, train_Y, train_G, test_X, test_Y, trn, g, rng):
        self.params = {'pid': g, 'rng': rng, 'trn':trn}
        self.train_X = train_X
        self.train_Y = train_Y.float()
        self.train_G = train_G
        self.test_X = test_X
        self.test_Y = test_Y.float()

    def ssl(self,neg,sr,yi):
        from itertools import compress, repeat, chain
        from operator import eq

        rng = cb.CobaRandom(self.params['rng'])
        rng_order = rng.shuffle(range(len(self.train_X)))

        X = self.train_X.tolist()
        Y = self.train_Y[:,yi]
        Y = list(map(tuple,Y.tolist()))

        X = list(map(X.__getitem__,rng_order))
        Y = list(map(Y.__getitem__,rng_order))

        eq_class  = {y: list(compress(X,map(eq,Y,repeat(y)))) for y in set(Y)}
        ne_class  = {y: list(chain(*[v for k,v in eq_class.items() if k != y ])) for y in set(Y)}

        def choose_unique(items,given_i):
            if len(items) == 1:  return items[0]
            for i in rng.randints(None,0,len(items)-1):
                if i != given_i:
                    return items[i]

        def choose_n(items,n):
            add_to_index = (indexes := set()).add if len(items) > n else (indexes := []).append
            for i in rng.randints(None,0,len(items)-1):
                add_to_index(i)
                if len(indexes)==n:
                    return [items[i] for i in indexes]

        if sr < 1:
            anchor, positive, negative = [], [], []

            for i in range(int(len(X)*sr)):
                x,y = X[i],Y[i]
                anchor.append(x)
                positive.append(choose_unique(eq_class[y],i))
                negative.append(choose_n     (ne_class[y],neg))
            yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

        else:
            for _ in range(sr):
                anchor, positive, negative = [], [], []
                for i in range(len(X)):
                    x,y = X[i],Y[i]
                    anchor.append(x)
                    positive.append(choose_unique(eq_class[y],i))
                    negative.append(choose_n     (ne_class[y],neg))

                yield torch.tensor(anchor).float(), torch.tensor(positive).float(), torch.tensor(negative).float()

    def train(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.train_X)))
        return self.train_X[rng_indexes,:], self.train_Y[rng_indexes,:], self.train_G[rng_indexes]

    def test(self):
        rng_indexes = cb.CobaRandom(self.params['rng']).shuffle(range(len(self.test_X)))
        return self.test_X[rng_indexes,:], self.test_Y[rng_indexes]

class MyEvaluator:
    def __init__(self, s1, s2, s3, ssl_samps, ssl_neg, ssl_dropn, ssl_tau, ws_steps0, ws_steps1, y, n_models):

        self.s1  = s1  #ssl sl
        self.s2  = s2  #    sl
        self.s3  = s3  #    sl pers

        self.ssl_samps = ssl_samps
        self.ssl_neg   = ssl_neg
        self.ssl_tau   = ssl_tau
        self.ssl_dropn = ssl_dropn

        self.ws_steps0 = ws_steps0
        self.ws_steps1 = ws_steps1
        self.n_models  = n_models

        self.y = y

        self.params = { 's1': s1, 's2':s2, 's3':s3, 'ssl': (ssl_samps,ssl_neg,ssl_dropn,ssl_tau), 'ws': (ws_steps0,ws_steps1), 'y': y, 'n_models': n_models}

    def evaluate(self, env, lrn):
        from sklearn.metrics import roc_auc_score

        torch.set_num_threads(1)
        torch.manual_seed(1)

        mods_opts = []
        opts = []

        if self.ws_steps0:
            self.s2 = (*(self.s2)[:-1], len(set(env.train()[2].tolist()))*len(self.y))
            self.s3 = (len(set(env.train()[2].tolist()))*len(self.y), *(self.s3)[1:])

        for _ in range(self.n_models):
            s1 = FeedForward(self.s1)
            s2 = FeedForward(self.s2)
            s3 = FeedForward(self.s3)
            sb = torch.nn.Sequential(*list(s1.children())[len(self.s1)-self.ssl_dropn:])
            s1 = torch.nn.Sequential(*list(s1.children())[:len(self.s1)-self.ssl_dropn])

            s1opt = COCOB(s1.parameters()) if list(s1.parameters()) else None
            sbopt = COCOB(sb.parameters()) if list(sb.parameters()) else None
            s2opt = COCOB(s2.parameters()) if list(s2.parameters()) else None
            s3opt = COCOB(s3.parameters()) if list(s3.parameters()) else None

            mods = [s1,sb,s2,s3]
            opts = [s1opt,sbopt,s2opt,s3opt]
            mods_opts.append([mods,opts])

        for mods,_ in mods_opts:
            for l in mods: l.train()

        for mods,opts in mods_opts:
            [s1,sb,s2,s3] = mods
            [s1opt,sbopt,s2opt,s3opt] = opts

            if self.ssl_samps:

                if self.ssl_neg == 0: raise Exception("neg can't be 0")
                if self.ssl_tau == 0: raise Exception("Tau can't be 0")

                for A, P, N in env.ssl(self.ssl_neg,self.ssl_samps,self.y):

                    torch_dataset = torch.utils.data.TensorDataset(A,P,N)
                    torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=2,drop_last=True,shuffle=True)

                    for _A, _P, _N in torch_loader:
                        #https://arxiv.org/pdf/2002.05709
                        _A = sb(s1(_A.nan_to_num()))
                        _P = sb(s1(_P.nan_to_num()))
                        _N = sb(s1(_N.nan_to_num()))

                        p = torch.einsum("bi,bi->b",_A,_P)
                        n = torch.einsum("bi,bji->bj",_A,_N)

                        p /= (torch.linalg.norm(_A,dim=1)*torch.linalg.norm(_P,dim=1))
                        n /= (torch.linalg.norm(_A,dim=1).unsqueeze(1)*torch.linalg.norm(_N,dim=2))

                        p = torch.exp(p/self.ssl_tau)
                        n = torch.exp(n/self.ssl_tau)

                        if s1opt: s1opt.zero_grad()
                        if sbopt: sbopt.zero_grad()
                        (-torch.log(p/(p+n.sum(dim=1)))).mean().backward()
                        if s1opt: s1opt.step()
                        if sbopt: sbopt.step()

            if self.ws_steps0 or self.ws_steps1:

                if s1opt: s1opt.zero_grad()
                if s2opt: s2opt.zero_grad()
                if s3opt: s3opt.zero_grad()

                X, Y, G = env.train()
                Y = Y[:,self.y]

                i = defaultdict(lambda c= count(0):next(c))
                I = torch.tensor([[i[g]] for g in G.tolist()]) + torch.arange(len(self.y)).unsqueeze(0)
                R = torch.arange(len(Y)).unsqueeze(1)
                Z = torch.full((len(G),len(i)*len(self.y)), float('nan'))
                Z[R,I] = Y

                torch_dataset = torch.utils.data.TensorDataset(X,Z)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=8,drop_last=True,shuffle=True)
                
                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps0):
                    for _X,_z in torch_loader:
                        if s1opt: s1opt.zero_grad()
                        if s2opt: s2opt.zero_grad()
                        loss(s2(s1(_X.nan_to_num()))[~_z.isnan()],_z[~_z.isnan()]).backward()
                        if s2opt: s2opt.step()
                        if s1opt: s1opt.step()

                torch_dataset = torch.utils.data.TensorDataset(X,Y)
                torch_loader  = torch.utils.data.DataLoader(torch_dataset,batch_size=8,drop_last=True,shuffle=True)

                loss = torch.nn.BCEWithLogitsLoss()
                for _ in range(self.ws_steps1):
                    for _X,_y in torch_loader:
                        if s3opt: s3opt.zero_grad()
                        loss(s3(s2(s1(_X.nan_to_num()))),_y).backward()
                        if s3opt: s3opt.step()

        for mods,_ in mods_opts:
            for l in mods: l.eval()

        X, Y = env.test()
        Y = Y[:,self.y]

        def predict(X):
            preds = 0
            for mods, _ in mods_opts:
                [s1,_,s2,s3] = mods
                preds += torch.sigmoid(s3(s2(s1(X.nan_to_num()))))
            return preds/len(mods_opts)

        def score(X,Y):
            with torch.no_grad():
                return {f'auc{y}': roc_auc_score(Y[:,i],predict(X)[:,i]) for i,y in enumerate(self.y) }

        yield score(X, Y)
        yield score(X, Y)

def make_envs(X, Y, G, R):
    X, Y, G = torch.tensor(X).float(), torch.tensor(Y).float(), torch.tensor(G)

    too_short = set(g for g in set(G.tolist()) if (g==G).sum() < 50)
    all_equal = set(g for g in set(G.tolist()) if any(len(set(y.tolist()))==1 for y in Y[g==G].T))

    if any(all_equal): print(f"All Equal, no environment added for {sorted(all_equal)}")

    for rng,g in product(range(R),sorted(set(G.tolist())-all_equal-too_short)):
        _X, _Y = X[g==G], Y[g==G]
        try:
            trn,tst = next(StratifiedShuffleSplit(1,train_size=35/len(_X),random_state=rng).split(_X,_Y))
            yield MyEnvironment(X[g!=G], Y[g!=G], G[g!=G], X[g==G][tst], Y[g==G][tst], 'rest', g, rng)

        except ValueError as e:
            if 'The least populated class in y has only 1 member' in str(e): continue
            raise

w = 45
r = lambda w:  ['l', 'r', w, 'l', 'r', w]

lrns = [ None ]
envs = list(make_envs(X3,np.hstack([Y1,Y2]),G,1))
vals = lambda x: [
    MyEvaluator((), (len(x),30,'l','r',-1), (-1,1), 0, 0, 0, 0, 3, 3, [1], 1),
    MyEvaluator((), (), (len(x),30,'l','r',30,1), 0, 0, 0, 0, 0, 3, [1], 1),
    MyEvaluator((), (len(x),30,'l','r',-1), (-1,1), 0, 0, 0, 0, 5, 2, [1], 1),
    MyEvaluator((), (len(x),45,'l','r',-1), (-1,1), 0, 0, 0, 0, 5, 2, [1], 1),
    MyEvaluator((), (len(x),30,'l','r',-1), (-1,1), 0, 0, 0, 0, 5, 3, [1], 1),
    MyEvaluator((), (len(x),45,'l','r',-1), (-1,1), 0, 0, 0, 0, 6, 2, [1], 1),
    MyEvaluator((), (len(x),45,'l','r',-1), (-1,1), 0, 0, 0, 0, 4, 2, [1], 1),
    MyEvaluator((), (len(x),45,'l','r',-1), (-1,1), 0, 0, 0, 0, 4, 1, [1], 1),
    MyEvaluator((), (len(x),45,'l','r',-1), (-1,1), 0, 0, 0, 0, 5, 1, [1], 1),
]

cb.Experiment(envs,lrns,vals(X3[0])).run('../logs/6/9.log',processes=35,quiet=True)

All Equal, no environment added for [54, 57, 81, 82, 87, 89, 97, 99, 103, 111, 117, 118, 121, 122, 137, 146, 181, 196, 203, 206, 227, 231, 246, 251, 293, 311, 357, 371, 373, 374, 377, 382, 390, 402, 405, 426, 442, 447, 451, 452, 455, 463, 469, 486, 536]
{'Learners': 1, 'Environments': 150, 'Interactions': 2700}
