# ADULTS

This notebook details experiments with architecture and activation functions on the Adults dataset. I will be examining six combinations to see which outperform the best given this binary problem. Each will be trained for 5 epochs and run 5 times.

In [0]:
!pip install git+https://github.com/fastai/fastai.git

In [0]:
from fastai.tabular import *

In [0]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')

In [0]:
dep_var = 'salary'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [FillMissing, Categorify, Normalize]

## Get Functions

In [5]:
!git clone https://github.com/sdoria/mish
%cd mish
from rangerlars import *
from mish import *
from mxresnet import *
from ranger import *

Cloning into 'mish'...
remote: Enumerating objects: 46, done.[K
remote: Counting objects: 100% (46/46), done.[K
remote: Compressing objects: 100% (40/40), done.[K
remote: Total 46 (delta 21), reused 19 (delta 6), pack-reused 0[K
Unpacking objects: 100% (46/46), done.
/content/mish
Mish activation loaded...


In [0]:
class MishTabularModel(Module):
    "Basic model for tabular data."
    def __init__(self, emb_szs:ListSizes, n_cont:int, out_sz:int, layers:Collection[int], ps:Collection[float]=None,
                 emb_drop:float=0., y_range:OptRange=None, use_bn:bool=True, bn_final:bool=False):
        super().__init__()
        ps = ifnone(ps, [0]*len(layers))
        ps = listify(ps, layers)
        self.embeds = nn.ModuleList([embedding(ni, nf) for ni,nf in emb_szs])
        self.emb_drop = nn.Dropout(emb_drop)
        self.bn_cont = nn.BatchNorm1d(n_cont)
        n_emb = sum(e.embedding_dim for e in self.embeds)
        self.n_emb,self.n_cont,self.y_range = n_emb,n_cont,y_range
        sizes = self.get_sizes(layers, out_sz)
        actns = [mish() for _ in range(len(sizes)-2)] + [None]
        layers = []
        for i,(n_in,n_out,dp,act) in enumerate(zip(sizes[:-1],sizes[1:],[0.]+ps,actns)):
            layers += bn_drop_lin(n_in, n_out, bn=use_bn and i!=0, p=dp, actn=act)
        if bn_final: layers.append(nn.BatchNorm1d(sizes[-1]))
        self.layers = nn.Sequential(*layers)

    def get_sizes(self, layers, out_sz):
        return [self.n_emb + self.n_cont] + layers + [out_sz]

    def forward(self, x_cat:Tensor, x_cont:Tensor) -> Tensor:
        if self.n_emb != 0:
            x = [e(x_cat[:,i]) for i,e in enumerate(self.embeds)]
            x = torch.cat(x, 1)
            x = self.emb_drop(x)
        if self.n_cont != 0:
            x_cont = self.bn_cont(x_cont)
            x = torch.cat([x, x_cont], 1) if self.n_emb != 0 else x_cont
        x = self.layers(x)
        if self.y_range is not None:
            x = (self.y_range[1]-self.y_range[0]) * torch.sigmoid(x) + self.y_range[0]
        return x

In [0]:
def mtabular_learner(data:DataBunch, layers:Collection[int], emb_szs:Dict[str,int]=None, metrics=None,
        ps:Collection[float]=None, emb_drop:float=0., y_range:OptRange=None, use_bn:bool=True, **learn_kwargs):
    "Get a `Learner` using `data`, with `metrics`, including a `TabularModel` created using the remaining params."
    emb_szs = data.get_emb_szs(ifnone(emb_szs, {}))
    model = MishTabularModel(emb_szs, len(data.cont_names), out_sz=data.c, layers=layers, ps=ps, emb_drop=emb_drop,
                         y_range=y_range, use_bn=use_bn)
    return Learner(data, model, metrics=metrics, **learn_kwargs)

In [0]:
class mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        x = x *( torch.tanh(F.softplus(x)))
        return x

## Get Data

In [0]:
data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
                           .split_by_idx(list(range(800,1000)))
                           .label_from_df(cols=dep_var)
                           .databunch())

## Baseline Adam + TabularLearner

In [10]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.360587,0.390246,0.825,00:06
1,0.38082,0.381851,0.82,00:06
2,0.366999,0.380585,0.825,00:06
3,0.360348,0.365016,0.83,00:06
4,0.363611,0.366442,0.84,00:06
5,0.357309,0.370116,0.83,00:06
6,0.359602,0.35279,0.85,00:06
7,0.353435,0.340923,0.85,00:06
8,0.342541,0.343774,0.85,00:06
9,0.342925,0.345344,0.855,00:06


In [11]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.36874,0.377409,0.8,00:06
1,0.363573,0.375486,0.83,00:06
2,0.37029,0.392663,0.79,00:06
3,0.363879,0.355971,0.855,00:06
4,0.364408,0.369079,0.845,00:06
5,0.340408,0.374021,0.825,00:06
6,0.344257,0.37569,0.84,00:06
7,0.357481,0.354126,0.845,00:06
8,0.33915,0.418856,0.845,00:06
9,0.347156,0.585205,0.845,00:06


In [12]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.366772,0.396263,0.81,00:06
1,0.375704,0.389678,0.83,00:06
2,0.366073,0.378481,0.84,00:06
3,0.361924,0.37032,0.82,00:06
4,0.368719,0.382766,0.84,00:06
5,0.35132,0.360022,0.845,00:06
6,0.367149,0.354565,0.845,00:06
7,0.355031,0.341331,0.86,00:06
8,0.351497,0.345408,0.86,00:06
9,0.342615,0.338352,0.855,00:06


In [13]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.380577,0.372943,0.835,00:06
1,0.363914,0.378933,0.825,00:06
2,0.351882,0.371453,0.83,00:06
3,0.356771,0.37694,0.83,00:06
4,0.356644,0.364314,0.825,00:06
5,0.357634,0.349959,0.835,00:06
6,0.35171,0.353718,0.835,00:06
7,0.34187,0.343063,0.845,00:06
8,0.346897,0.33964,0.845,00:06
9,0.346758,0.339252,0.84,00:06


In [14]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.369489,0.405346,0.77,00:06
1,0.374224,0.399668,0.795,00:06
2,0.360916,0.379704,0.825,00:06
3,0.358865,0.388497,0.81,00:06
4,0.347779,0.354031,0.84,00:06
5,0.345156,0.355486,0.85,00:06
6,0.336116,0.357799,0.83,00:06
7,0.344629,0.347113,0.85,00:06
8,0.344492,0.343777,0.85,00:06
9,0.337468,0.344758,0.85,00:06


In [15]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.379706,0.408869,0.79,00:06
1,0.358928,0.383086,0.815,00:06
2,0.365045,0.375705,0.83,00:06
3,0.356504,0.386769,0.82,00:06
4,0.366027,0.370362,0.825,00:06
5,0.339482,0.358917,0.845,00:06
6,0.349608,0.363229,0.845,00:06
7,0.347743,0.357238,0.845,00:06
8,0.337229,0.356293,0.85,00:06
9,0.335421,0.352395,0.85,00:06


In [16]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.369869,0.392639,0.815,00:06
1,0.356898,0.392156,0.82,00:06
2,0.369024,0.387977,0.82,00:06
3,0.362919,0.385674,0.84,00:06
4,0.344941,0.379215,0.835,00:06
5,0.353867,0.377932,0.84,00:06
6,0.343408,0.370254,0.83,00:06
7,0.352945,0.375688,0.815,00:06
8,0.351601,0.36535,0.835,00:06
9,0.333222,0.368039,0.845,00:06


In [17]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.372633,0.387152,0.83,00:06
1,0.369788,0.386251,0.835,00:06
2,0.361315,0.389933,0.805,00:06
3,0.363268,0.368519,0.82,00:06
4,0.34517,0.378746,0.835,00:06
5,0.353807,0.356124,0.855,00:06
6,0.352935,0.356691,0.835,00:06
7,0.345866,0.349875,0.85,00:06
8,0.35093,0.350392,0.855,00:06
9,0.336825,0.349656,0.86,00:06


In [18]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.37164,0.385964,0.805,00:06
1,0.367408,0.390884,0.81,00:06
2,0.366744,0.388199,0.84,00:06
3,0.364305,0.369399,0.825,00:06
4,0.34962,0.368845,0.83,00:06
5,0.357085,0.371198,0.815,00:06
6,0.354202,0.35895,0.845,00:06
7,0.349034,0.353067,0.845,00:06
8,0.336971,0.348118,0.855,00:06
9,0.348616,0.350396,0.86,00:06


In [19]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.37688,0.398346,0.795,00:06
1,0.368757,0.384484,0.81,00:06
2,0.368214,0.388912,0.81,00:06
3,0.367278,0.370738,0.84,00:06
4,0.356095,0.3656,0.845,00:06
5,0.362426,0.362799,0.825,00:06
6,0.354515,0.349168,0.845,00:06
7,0.348105,0.341801,0.875,00:06
8,0.344961,0.340331,0.86,00:06
9,0.340403,0.341155,0.855,00:06


## One Cycle + Adam + Mish

In [20]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.367297,0.366249,0.8,00:06
1,0.384028,0.404639,0.805,00:06
2,0.362459,0.379871,0.82,00:06
3,0.354155,0.37105,0.835,00:06
4,0.356521,0.377252,0.82,00:06
5,0.35408,0.372171,0.83,00:06
6,0.359365,0.355731,0.845,00:06
7,0.357844,0.353025,0.835,00:06
8,0.340448,0.35239,0.845,00:06
9,0.341008,0.351678,0.84,00:06


In [21]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.357097,0.378478,0.825,00:06
1,0.365975,0.36815,0.84,00:06
2,0.360267,0.37826,0.845,00:06
3,0.359184,0.372422,0.845,00:06
4,0.356544,0.372828,0.825,00:06
5,0.361287,0.370172,0.84,00:06
6,0.347765,0.355366,0.84,00:06
7,0.342224,0.347023,0.85,00:06
8,0.351928,0.343563,0.855,00:06
9,0.340556,0.344157,0.85,00:06


In [22]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.366904,0.438319,0.77,00:06
1,0.370082,0.373431,0.83,00:06
2,0.35587,0.380307,0.83,00:06
3,0.35881,0.362308,0.845,00:06
4,0.375564,0.370972,0.835,00:06
5,0.35646,0.366123,0.835,00:06
6,0.347734,0.357961,0.84,00:07
7,0.35748,0.351199,0.85,00:06
8,0.344606,0.353038,0.85,00:06
9,0.349186,0.353228,0.85,00:06


In [23]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.363443,0.394078,0.815,00:07
1,0.358052,0.390451,0.82,00:06
2,0.355681,0.379431,0.82,00:06
3,0.369537,0.368754,0.835,00:06
4,0.359413,0.36334,0.84,00:06
5,0.351144,0.368105,0.84,00:06
6,0.352568,0.35507,0.835,00:06
7,0.359223,0.356322,0.835,00:06
8,0.345626,0.356934,0.85,00:06
9,0.356754,0.359895,0.85,00:06


In [24]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.369027,0.394994,0.81,00:06
1,0.379677,0.393893,0.825,00:06
2,0.36605,0.372022,0.83,00:06
3,0.363136,0.370612,0.83,00:06
4,0.35706,0.373105,0.84,00:06
5,0.356051,0.364885,0.84,00:07
6,0.355909,0.355709,0.845,00:06
7,0.35469,0.344561,0.855,00:06
8,0.344621,0.346837,0.845,00:06
9,0.339733,0.346219,0.845,00:06


In [25]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.36243,0.390956,0.82,00:06
1,0.369133,0.390953,0.82,00:06
2,0.36491,0.387363,0.83,00:06
3,0.353422,0.382355,0.815,00:06
4,0.356755,0.382196,0.8,00:06
5,0.352347,0.380995,0.835,00:06
6,0.35414,0.364877,0.845,00:06
7,0.345688,0.361337,0.855,00:06
8,0.341298,0.358212,0.85,00:06
9,0.351162,0.361227,0.85,00:06


In [26]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.365468,0.404245,0.785,00:06
1,0.368055,0.389052,0.815,00:06
2,0.359153,0.389845,0.815,00:06
3,0.369703,0.365379,0.84,00:06
4,0.354074,0.373635,0.85,00:06
5,0.36052,0.365449,0.845,00:06
6,0.363736,0.360666,0.835,00:06
7,0.346652,0.364622,0.85,00:07
8,0.34392,0.359656,0.825,00:06
9,0.355692,0.360826,0.845,00:06


In [27]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.367236,0.397158,0.82,00:06
1,0.371517,0.413633,0.77,00:07
2,0.369267,0.385404,0.825,00:07
3,0.363338,0.380407,0.82,00:06
4,0.354576,0.362338,0.83,00:07
5,0.355562,0.368242,0.825,00:06
6,0.354501,0.361462,0.855,00:06
7,0.356529,0.358752,0.84,00:06
8,0.350357,0.361177,0.845,00:06
9,0.35,0.358829,0.84,00:06


In [28]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.367138,0.381844,0.825,00:06
1,0.365265,0.384563,0.83,00:06
2,0.359244,0.398343,0.825,00:06
3,0.36935,0.384257,0.835,00:06
4,0.358839,0.36923,0.835,00:06
5,0.340562,0.358467,0.85,00:06
6,0.354112,0.355469,0.84,00:06
7,0.349349,0.361953,0.83,00:06
8,0.34226,0.358744,0.845,00:06
9,0.345114,0.357965,0.85,00:06


In [29]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.370415,0.416998,0.79,00:06
1,0.364388,0.418227,0.8,00:06
2,0.364868,0.388359,0.83,00:06
3,0.357303,0.386389,0.805,00:06
4,0.356049,0.379762,0.85,00:06
5,0.366858,0.366389,0.835,00:06
6,0.358212,0.358619,0.845,00:06
7,0.351346,0.355633,0.845,00:07
8,0.339175,0.355637,0.84,00:07
9,0.354992,0.357306,0.84,00:07


## Flatten + RangerLars

In [0]:
opt_func = partial(RangerLars, betas = (0.9,0.99), eps=1e-6)

In [31]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.52724,0.511217,0.79,00:11
1,0.423342,0.414887,0.805,00:11
2,0.356816,0.383637,0.83,00:11
3,0.351054,0.375273,0.82,00:11
4,0.353945,0.389013,0.78,00:11
5,0.363449,0.377501,0.82,00:11
6,0.365655,0.366435,0.815,00:11
7,0.35216,0.358362,0.835,00:11
8,0.347748,0.361807,0.84,00:11
9,0.34009,0.361645,0.84,00:11


In [32]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.484389,0.478729,0.76,00:11
1,0.365805,0.385999,0.83,00:11
2,0.353515,0.38817,0.83,00:11
3,0.357202,0.382422,0.82,00:11
4,0.363519,0.387464,0.815,00:11
5,0.35765,0.378324,0.83,00:11
6,0.353887,0.369651,0.835,00:11
7,0.364625,0.364497,0.84,00:11
8,0.358995,0.364825,0.84,00:11
9,0.359452,0.366944,0.835,00:11


In [33]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.38336,0.402171,0.795,00:11
1,0.358594,0.376933,0.845,00:11
2,0.35785,0.371071,0.83,00:11
3,0.351907,0.364888,0.835,00:11
4,0.34243,0.386192,0.84,00:11
5,0.358223,0.369187,0.835,00:11
6,0.353991,0.358895,0.835,00:11
7,0.353518,0.36703,0.845,00:11
8,0.350634,0.367959,0.845,00:11
9,0.348777,0.360027,0.85,00:11


In [34]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.437059,0.42593,0.815,00:11
1,0.360227,0.388869,0.81,00:11
2,0.356105,0.376296,0.825,00:11
3,0.363774,0.380711,0.835,00:11
4,0.359166,0.367463,0.845,00:11
5,0.354342,0.369795,0.815,00:11
6,0.350141,0.371446,0.83,00:11
7,0.356119,0.368903,0.82,00:11
8,0.337676,0.356985,0.845,00:11
9,0.344779,0.362447,0.84,00:11


In [35]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.39359,0.394677,0.805,00:11
1,0.363241,0.379765,0.815,00:11
2,0.35774,0.377162,0.835,00:11
3,0.361174,0.381285,0.83,00:11
4,0.360872,0.388571,0.825,00:11
5,0.360024,0.390924,0.815,00:11
6,0.356727,0.392044,0.815,00:11
7,0.355599,0.373593,0.825,00:11
8,0.342264,0.362543,0.84,00:11
9,0.355049,0.363975,0.845,00:11


In [37]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.369355,0.388475,0.815,00:11
1,0.363469,0.387963,0.835,00:11
2,0.345097,0.386696,0.82,00:11
3,0.360374,0.377542,0.825,00:11
4,0.357514,0.378309,0.825,00:11
5,0.353888,0.379854,0.815,00:11
6,0.352403,0.365562,0.845,00:11
7,0.357818,0.37156,0.84,00:11
8,0.352121,0.363693,0.84,00:11
9,0.345943,0.368257,0.835,00:11


In [38]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.500412,0.487117,0.79,00:11
1,0.359668,0.383574,0.83,00:11
2,0.346507,0.383513,0.835,00:11
3,0.360916,0.379362,0.835,00:11
4,0.366255,0.373152,0.815,00:11
5,0.357887,0.371834,0.825,00:11
6,0.350896,0.379512,0.835,00:11
7,0.358362,0.368474,0.83,00:11
8,0.360765,0.363095,0.835,00:11
9,0.33063,0.364092,0.82,00:11


In [39]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.508244,0.502648,0.75,00:11
1,0.367272,0.385773,0.83,00:11
2,0.354618,0.374942,0.83,00:11
3,0.364754,0.390973,0.81,00:11
4,0.355867,0.361159,0.85,00:11
5,0.347026,0.376889,0.84,00:11
6,0.351473,0.37058,0.83,00:11
7,0.357012,0.360913,0.835,00:11
8,0.350966,0.35874,0.825,00:11
9,0.351583,0.358868,0.83,00:11


In [40]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.395598,0.405928,0.8,00:11
1,0.351654,0.397572,0.82,00:11
2,0.368373,0.385037,0.845,00:11
3,0.369278,0.377416,0.835,00:11
4,0.362857,0.383515,0.84,00:11
5,0.352696,0.384817,0.825,00:11
6,0.357933,0.372564,0.795,00:11
7,0.348263,0.366582,0.835,00:11
8,0.35345,0.366976,0.835,00:11
9,0.355028,0.365446,0.845,00:11


In [41]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.539562,0.551757,0.755,00:11
1,0.444171,0.421424,0.81,00:11
2,0.371704,0.382338,0.835,00:11
3,0.363374,0.381772,0.82,00:11
4,0.360488,0.369131,0.83,00:11
5,0.36345,0.37356,0.845,00:11
6,0.359718,0.378695,0.83,00:11
7,0.353144,0.359492,0.84,00:11
8,0.35092,0.362321,0.84,00:11
9,0.348121,0.355955,0.84,00:11


## Flatten + RangerLars + Mish

In [0]:
opt_func = partial(RangerLars, betas = (0.9,0.99), eps=1e-6)

In [43]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.481471,0.483635,0.76,00:11
1,0.353294,0.390476,0.84,00:11
2,0.362882,0.38202,0.815,00:11
3,0.358656,0.36525,0.83,00:11
4,0.33994,0.364195,0.83,00:11
5,0.354604,0.374218,0.825,00:11
6,0.362088,0.376386,0.835,00:11
7,0.356323,0.364534,0.85,00:11
8,0.351709,0.37046,0.84,00:11
9,0.349032,0.369417,0.845,00:11


In [44]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.457098,0.425221,0.795,00:11
1,0.370099,0.425479,0.775,00:11
2,0.350469,0.384337,0.835,00:11
3,0.358147,0.384525,0.8,00:11
4,0.367874,0.368995,0.825,00:11
5,0.350696,0.36942,0.835,00:11
6,0.356126,0.363786,0.84,00:11
7,0.355478,0.36869,0.84,00:11
8,0.350842,0.371935,0.845,00:11
9,0.347725,0.361762,0.84,00:11


In [45]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.534953,0.494576,0.765,00:11
1,0.436063,0.42811,0.78,00:11
2,0.370956,0.387266,0.785,00:11
3,0.351137,0.3792,0.835,00:11
4,0.354423,0.379733,0.81,00:11
5,0.357995,0.375051,0.82,00:12
6,0.358518,0.372027,0.83,00:12
7,0.358921,0.37066,0.83,00:11
8,0.355476,0.365347,0.84,00:11
9,0.352747,0.364462,0.84,00:11


In [46]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.388401,0.408991,0.78,00:11
1,0.381286,0.378188,0.825,00:11
2,0.355628,0.381766,0.83,00:11
3,0.365093,0.387012,0.83,00:11
4,0.367546,0.366884,0.84,00:11
5,0.356465,0.383082,0.83,00:11
6,0.359281,0.374789,0.825,00:11
7,0.341625,0.370097,0.82,00:11
8,0.34218,0.359648,0.83,00:11
9,0.355975,0.358451,0.85,00:12


In [47]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.544605,0.459079,0.805,00:11
1,0.465574,0.456094,0.79,00:11
2,0.357386,0.388685,0.8,00:11
3,0.357866,0.386168,0.81,00:11
4,0.357871,0.373587,0.825,00:11
5,0.357521,0.381836,0.835,00:11
6,0.35453,0.373198,0.83,00:11
7,0.358201,0.368733,0.84,00:11
8,0.349035,0.366363,0.835,00:11
9,0.345586,0.363824,0.85,00:11


In [48]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.547684,0.49721,0.78,00:11
1,0.42791,0.407613,0.795,00:11
2,0.359613,0.382937,0.83,00:11
3,0.36403,0.381415,0.83,00:11
4,0.356494,0.381153,0.84,00:11
5,0.363972,0.380254,0.815,00:11
6,0.363173,0.373058,0.81,00:11
7,0.348495,0.36653,0.835,00:11
8,0.353597,0.365876,0.825,00:11
9,0.345226,0.367224,0.845,00:11


In [49]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.455054,0.442977,0.785,00:11
1,0.373149,0.39661,0.81,00:11
2,0.355997,0.381162,0.825,00:11
3,0.362043,0.391429,0.835,00:11
4,0.35665,0.377124,0.83,00:11
5,0.359281,0.38843,0.815,00:11
6,0.35996,0.374562,0.83,00:11
7,0.350306,0.368192,0.835,00:11
8,0.345851,0.360388,0.845,00:11
9,0.343458,0.361064,0.85,00:11


In [50]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.478394,0.45226,0.8,00:11
1,0.364261,0.382906,0.825,00:11
2,0.353745,0.386416,0.845,00:11
3,0.359444,0.386844,0.835,00:11
4,0.350325,0.378856,0.815,00:11
5,0.361806,0.372808,0.82,00:11
6,0.353624,0.369615,0.865,00:11
7,0.354758,0.359575,0.845,00:11
8,0.355081,0.365308,0.84,00:11
9,0.351942,0.360553,0.83,00:11


In [51]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.382167,0.401999,0.775,00:11
1,0.362367,0.397474,0.795,00:11
2,0.345224,0.388916,0.82,00:11
3,0.360535,0.378628,0.83,00:11
4,0.355643,0.380331,0.83,00:11
5,0.35797,0.373614,0.82,00:11
6,0.36123,0.38013,0.825,00:11
7,0.357402,0.369893,0.815,00:11
8,0.353193,0.359392,0.84,00:11
9,0.342475,0.35921,0.83,00:11


In [52]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.465651,0.456246,0.785,00:11
1,0.356995,0.42225,0.77,00:11
2,0.368313,0.389997,0.83,00:11
3,0.352765,0.388247,0.8,00:11
4,0.352316,0.382021,0.83,00:11
5,0.359806,0.368283,0.805,00:11
6,0.351284,0.369996,0.86,00:11
7,0.350055,0.36394,0.845,00:11
8,0.35602,0.366357,0.82,00:11
9,0.343755,0.36127,0.825,00:11


## Flatten + Ranger

In [0]:
opt_func = partial(Ranger, betas=(0.95,0.99), eps=1e-6)

In [54]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.362093,0.376233,0.81,00:06
1,0.362184,0.375803,0.83,00:06
2,0.354703,0.375221,0.835,00:06
3,0.363267,0.362552,0.835,00:06
4,0.359559,0.381321,0.835,00:06
5,0.352162,0.356524,0.835,00:06
6,0.344,0.365031,0.85,00:06
7,0.339747,0.349586,0.835,00:06
8,0.340373,0.344348,0.865,00:06
9,0.34275,0.342451,0.855,00:07


In [55]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.355502,0.398805,0.785,00:06
1,0.360202,0.386085,0.795,00:06
2,0.353512,0.381556,0.81,00:06
3,0.351147,0.365298,0.83,00:06
4,0.357239,0.354359,0.83,00:06
5,0.35736,0.365602,0.835,00:06
6,0.346208,0.349263,0.83,00:06
7,0.342581,0.352621,0.845,00:06
8,0.340715,0.348199,0.85,00:06
9,0.341274,0.346773,0.845,00:06


In [56]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.353483,0.397407,0.81,00:06
1,0.363479,0.377419,0.83,00:06
2,0.350282,0.375486,0.845,00:06
3,0.37297,0.378792,0.825,00:06
4,0.362785,0.384533,0.82,00:06
5,0.35809,0.358333,0.825,00:06
6,0.351816,0.361994,0.845,00:06
7,0.346876,0.36561,0.85,00:06
8,0.346603,0.372164,0.855,00:07
9,0.344447,0.370218,0.85,00:06


In [57]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.362189,0.382344,0.825,00:06
1,0.346465,0.362612,0.845,00:06
2,0.359486,0.367151,0.84,00:06
3,0.359904,0.359368,0.84,00:06
4,0.361786,0.362902,0.845,00:06
5,0.364959,0.356777,0.84,00:06
6,0.347415,0.365404,0.85,00:07
7,0.350454,0.355609,0.845,00:07
8,0.336319,0.361449,0.86,00:07
9,0.343106,0.357979,0.86,00:06


In [58]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.369565,0.384742,0.81,00:06
1,0.355293,0.389371,0.805,00:06
2,0.35889,0.351909,0.84,00:06
3,0.357692,0.363126,0.84,00:06
4,0.35453,0.350758,0.86,00:06
5,0.353006,0.384289,0.805,00:06
6,0.338495,0.354807,0.84,00:06
7,0.341394,0.344792,0.86,00:06
8,0.346667,0.331511,0.855,00:06
9,0.337306,0.328033,0.86,00:06


In [59]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.373586,0.404257,0.81,00:06
1,0.358694,0.380359,0.845,00:06
2,0.351259,0.35557,0.835,00:06
3,0.350661,0.359003,0.855,00:06
4,0.350461,0.348899,0.85,00:06
5,0.356473,0.344703,0.845,00:06
6,0.363469,0.353549,0.84,00:06
7,0.351356,0.347032,0.845,00:06
8,0.346511,0.348525,0.85,00:06
9,0.342042,0.344478,0.85,00:06


In [60]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.374096,0.385931,0.805,00:06
1,0.354772,0.372288,0.83,00:06
2,0.359262,0.38306,0.825,00:06
3,0.358947,0.371927,0.84,00:06
4,0.349902,0.371367,0.825,00:06
5,0.343848,0.368918,0.83,00:06
6,0.34781,0.3542,0.845,00:06
7,0.347548,0.351715,0.84,00:06
8,0.336882,0.350806,0.855,00:06
9,0.334745,0.347806,0.86,00:07


In [61]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.378197,0.392502,0.82,00:06
1,0.358558,0.37187,0.83,00:06
2,0.344418,0.366819,0.84,00:06
3,0.353866,0.370298,0.85,00:07
4,0.348564,0.355178,0.845,00:06
5,0.348074,0.344145,0.84,00:06
6,0.347052,0.376411,0.825,00:06
7,0.352413,0.357414,0.85,00:07
8,0.338828,0.353071,0.865,00:07
9,0.338262,0.356749,0.85,00:06


In [62]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.360908,0.3962,0.795,00:06
1,0.348979,0.380682,0.83,00:06
2,0.367322,0.37475,0.82,00:06
3,0.36836,0.374086,0.81,00:06
4,0.355218,0.355167,0.855,00:06
5,0.347552,0.366535,0.835,00:06
6,0.351372,0.356391,0.84,00:06
7,0.351772,0.350747,0.85,00:06
8,0.342554,0.353423,0.85,00:06
9,0.343193,0.350664,0.85,00:06


In [63]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.363704,0.380271,0.82,00:06
1,0.361667,0.379512,0.835,00:07
2,0.360073,0.388008,0.825,00:06
3,0.357012,0.360855,0.825,00:06
4,0.350683,0.357939,0.835,00:06
5,0.349047,0.364892,0.825,00:06
6,0.349703,0.359282,0.82,00:06
7,0.348035,0.359843,0.84,00:07
8,0.349844,0.350911,0.83,00:06
9,0.340154,0.349267,0.83,00:06


## Flatten + Ranger + Mish

In [0]:
opt_func = partial(Ranger, betas=(0.95,0.99), eps=1e-6)

In [65]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.371289,0.378865,0.83,00:07
1,0.358207,0.372189,0.835,00:07
2,0.348685,0.376901,0.835,00:07
3,0.362843,0.378601,0.84,00:07
4,0.355827,0.355765,0.85,00:07
5,0.346302,0.361852,0.825,00:06
6,0.347568,0.350005,0.845,00:07
7,0.35669,0.359637,0.855,00:07
8,0.352103,0.356576,0.835,00:07
9,0.347949,0.352376,0.85,00:07


In [66]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.369176,0.383486,0.83,00:07
1,0.360504,0.386102,0.825,00:07
2,0.360315,0.38956,0.835,00:07
3,0.36028,0.386941,0.82,00:07
4,0.360679,0.364217,0.83,00:07
5,0.375967,0.356032,0.83,00:07
6,0.35316,0.372102,0.845,00:07
7,0.353392,0.364618,0.845,00:07
8,0.346235,0.361752,0.845,00:07
9,0.357021,0.35957,0.835,00:07


In [67]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.364597,0.385753,0.83,00:07
1,0.35354,0.38573,0.825,00:07
2,0.359863,0.368965,0.82,00:07
3,0.367726,0.372901,0.805,00:06
4,0.360084,0.368966,0.85,00:07
5,0.360093,0.36499,0.835,00:06
6,0.358165,0.363803,0.835,00:06
7,0.348098,0.363452,0.825,00:06
8,0.35979,0.352886,0.85,00:07
9,0.345513,0.354562,0.855,00:07


In [68]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.361574,0.39794,0.825,00:07
1,0.366063,0.397555,0.805,00:06
2,0.355946,0.379989,0.83,00:07
3,0.356734,0.390473,0.82,00:06
4,0.359001,0.383101,0.825,00:06
5,0.340485,0.372556,0.835,00:07
6,0.361539,0.377398,0.82,00:06
7,0.347392,0.353015,0.845,00:07
8,0.349278,0.350851,0.84,00:07
9,0.363605,0.34905,0.85,00:07


In [69]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.369641,0.3853,0.805,00:07
1,0.36309,0.370756,0.84,00:06
2,0.36651,0.374237,0.84,00:06
3,0.355486,0.372457,0.83,00:07
4,0.360796,0.370712,0.82,00:07
5,0.353535,0.374411,0.845,00:06
6,0.360603,0.366464,0.83,00:06
7,0.350663,0.356436,0.835,00:07
8,0.350878,0.352357,0.845,00:07
9,0.342174,0.352314,0.835,00:07


In [70]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.370475,0.381625,0.825,00:07
1,0.364405,0.384259,0.81,00:07
2,0.353046,0.399005,0.805,00:07
3,0.363501,0.375666,0.83,00:06
4,0.358685,0.37233,0.82,00:07
5,0.352752,0.36018,0.845,00:06
6,0.34446,0.35843,0.84,00:07
7,0.347072,0.35786,0.84,00:07
8,0.351952,0.355336,0.835,00:07
9,0.347194,0.356287,0.845,00:07


In [71]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.376153,0.389483,0.805,00:07
1,0.364093,0.382,0.84,00:07
2,0.361559,0.385995,0.825,00:07
3,0.343157,0.384259,0.82,00:07
4,0.356785,0.376375,0.825,00:06
5,0.362251,0.356933,0.84,00:07
6,0.355541,0.362552,0.835,00:07
7,0.354123,0.356544,0.83,00:07
8,0.344339,0.359322,0.845,00:06
9,0.349908,0.353056,0.835,00:07


In [72]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.369479,0.38179,0.835,00:07
1,0.366994,0.385178,0.815,00:07
2,0.360559,0.373431,0.815,00:07
3,0.360402,0.365295,0.83,00:06
4,0.351987,0.374611,0.84,00:06
5,0.354562,0.348365,0.83,00:07
6,0.346578,0.356009,0.83,00:07
7,0.351241,0.365589,0.84,00:07
8,0.348665,0.359987,0.835,00:07
9,0.347274,0.35521,0.84,00:07


In [73]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.352757,0.381198,0.835,00:06
1,0.373625,0.386267,0.815,00:07
2,0.357296,0.390525,0.825,00:07
3,0.363708,0.367883,0.845,00:07
4,0.360936,0.371625,0.855,00:07
5,0.345904,0.361659,0.84,00:07
6,0.354706,0.357345,0.84,00:07
7,0.351485,0.359636,0.84,00:07
8,0.347212,0.350485,0.85,00:07
9,0.343488,0.35921,0.855,00:06


In [74]:
learn = mtabular_learner(data, layers=[200,100], metrics=accuracy, opt_func=opt_func)
learn.fit_fc(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.365567,0.388976,0.795,00:06
1,0.355462,0.387778,0.83,00:06
2,0.361245,0.369202,0.815,00:07
3,0.364505,0.368789,0.825,00:07
4,0.366122,0.374074,0.835,00:06
5,0.351544,0.366886,0.825,00:07
6,0.350867,0.362079,0.83,00:06
7,0.36133,0.364098,0.83,00:07
8,0.357174,0.360207,0.84,00:07
9,0.362653,0.354291,0.835,00:07
