In [1]:
from fastai import *
from fastai.vision import *

%matplotlib inline

PATH = os.path.abspath('..')

seed = 12345

In [2]:
%load_ext watermark
%watermark

2019-12-05T18:35:29+00:00

CPython 3.7.4
IPython 7.9.0

compiler   : GCC 7.3.0
system     : Linux
release    : 4.9.0-11-amd64
machine    : x86_64
processor  : 
CPU cores  : 8
interpreter: 64bit


# Helper functions

In [3]:
sys.path.append(f'{PATH}/src')

from mxresnet import *
from ranger import Ranger

Mish activation loaded...
Mish activation loaded...


In [4]:
def get_xgass_data():
    xg = (
        pd.read_csv(f"{PATH}/data/xGASS_representative_sample.csv")
          .join(pd.read_csv(f"{PATH}/data/xGASS-delta_5.csv", index_col="GASS"), on="GASS", rsuffix='_GASS')
    )
    
    xg['logfgas'] = xg.lgMHI - xg.lgMstar
    
    return xg.copy()


def get_a40_data():
    a40 = (
        pd.read_csv(f"{PATH}/data/a40-SDSS_gas-frac.csv")
          .join(pd.read_csv(f"{PATH}/data/a.40-delta_5.csv", index_col="AGCNr"), on="AGCNr", rsuffix='_a40')
          .join(pd.read_csv(f"{PATH}/data/a40.datafile3.csv", index_col="AGCNr"), on="AGCNr", rsuffix='_sdss')
    )

    return a40.copy()

In [5]:
def split_underdense(df, p=0.2, reverse=False):
    """Add boolean column that is True if delta_5 is less than 
    0.2 quantile (or given `p`).

    If reverse=False, then the highest density galaxies are 
    split out into this new column.
    """

    df['validation'] = ((df.delta_5 < df.delta_5.quantile(q=p)) \
                      if not reverse \
                      else (df.delta_5 > df.delta_5.quantile(q=1-p)))

    return df.copy()

In [6]:
def split_isolated(xg, split='isolated', remove_nonmembers=True):
    """Add boolean column that is True for isolated systems 
    (or other options below) in the Yang+08 catalog. By default,
    non-catalog members are excluded (env_code_B = -1).
    
    All options for `split`:
      isolated (centrals only)
      group (centrals + satellites)
      groupcentrals (centrals only)
      satellites (only group satellites)
    """
    
    
    df = xg[xg.env_code_B != -1].copy() if remove_nonmembers else xg.copy()
    
    if split == 'isolated':
        df['validation'] = (df.env_code_B == 1)
    elif split == 'group':
        df['validation'] = (df.env_code_B == 0) | (df.env_code_B == 2)
    elif split == 'groupcentrals':
        df['validation'] = (df.env_code_B == 2)
    elif split == 'satellites':
        df['validation'] = (df.env_code_B == 0)
    else:
        raise ValueError('Pleasure choose correct option for `split` parameter.')
    
    return df.copy()
    

# Options

In [7]:
import warnings
warnings.filterwarnings('ignore', category=UserWarning)

In [8]:
lr = 3e-2
n_epochs = 10

In [9]:
bs = 32
sz = 224

xGASS_stats = [tensor([-0.0169, -0.0105, -0.0004]), tensor([0.9912, 0.9968, 1.0224])]

tfms = get_transforms(
    do_flip=True,
    flip_vert=True,
    max_zoom=1.0,
    max_rotate=15.0,
    max_lighting=0,
    max_warp=0,
)

# ALFALFA $\alpha.40$

In [8]:
a40 = split_underdense(get_a40_data(), p=0.2)

In [9]:
a40.validation.value_counts()

False    5922
True     1477
Name: validation, dtype: int64

## Validate on underdense galaxies

In [13]:
# Train on overdense, validate on underdense

for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_a40_data()).copy()
    
    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-OC", suffix=".jpg", cols="AGCNr"
        )
        .split_from_df(col='validation')
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)

    with open(f'{PATH}/results/environment/delta_5/a40-valid-underdense.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)

## Validate on overdense galaxies

In [None]:
# Train on underdense, validate on overdense

for i in range(5):
    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_a40_data(), reverse=True)

    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-OC", suffix=".jpg", cols="AGCNr"
        )
        .split_from_df(col='validation')
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)
    with open(f'{PATH}/results/environment/delta_5/a40-valid-overdense.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


## Validate on randomized galaxies

In [13]:
# train on random

for i in range(5):
    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_a40_data(), reverse=True)

    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-OC", suffix=".jpg", cols="AGCNr"
        )
        .split_by_rand_pct(0.2)
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)
    with open(f'{PATH}/results/environment/delta_5/a40-valid-random.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


# xGASS (Split by $\delta_5$)

In [14]:
xg = split_underdense(get_xgass_data())

xg.validation.value_counts()

False    943
True     236
Name: validation, dtype: int64

##  Validate on underdense galaxies

In [15]:
# Train on overdense, validate on underdense

for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_xgass_data()).copy()
    
    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-xGASS", suffix=".jpg", cols="GASS"
        )
        .split_from_df(col='validation')
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)

    with open(f'{PATH}/results/environment/delta_5/xgass-valid-underdense.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)

## Validate on overdense

In [16]:
# Train on underdense, validate on overdense

for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_xgass_data(), reverse=True).copy()
    
    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-xGASS", suffix=".jpg", cols="GASS"
        )
        .split_from_df(col='validation')
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)

    with open(f'{PATH}/results/environment/delta_5/xgass-valid-overdense.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)

## Validate on randomized split

In [17]:
# Validate on randomized galaxies

for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = get_xgass_data()
    
    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-xGASS", suffix=".jpg", cols="GASS"
        )
        .split_by_rand_pct(0.2)
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)

    with open(f'{PATH}/results/environment/delta_5/xgass-valid-random.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)

# xGASS (Yang+08 group catalog)

In [12]:
xg = split_isolated(get_xgass_data())

xg.env_code_B.value_counts()

1    541
0    352
2    262
Name: env_code_B, dtype: int64

## Validate on isolated (541/1155)

In [20]:
# validate on all isolated

for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_isolated(get_xgass_data(), split='isolated')


    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-xGASS", suffix=".jpg", cols="GASS"
        )
        .split_from_df(col='validation')
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-3,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr)

    fgas_pred, fgas_true = learn.TTA(scale=1.)

    with open(f'{PATH}/results/environment/groupcat/xgass-valid-isolated.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


In [21]:
# validate on randoms (same proportion as isolated)

for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_isolated(get_xgass_data(), split='isolated')


    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-xGASS", suffix=".jpg", cols="GASS"
        )
        .split_by_rand_pct(541/1155)
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-3,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr)

    fgas_pred, fgas_true = learn.TTA(scale=1.)

    with open(f'{PATH}/results/environment/groupcat/xgass-valid-isolated_randoms.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


## Validate on groupcentrals (262/1155)

In [11]:
# validate on group centrals

for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_isolated(get_xgass_data(), split='groupcentrals')


    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-xGASS", suffix=".jpg", cols="GASS"
        )
        .split_from_df(col='validation')
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-3,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr)

    fgas_pred, fgas_true = learn.TTA(scale=1.)

    with open(f'{PATH}/results/environment/groupcat/xgass-valid-groupcentrals.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


In [12]:
# validate on randoms (same proportion as group centrals)

for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_isolated(get_xgass_data(), split='groupcentrals')


    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-xGASS", suffix=".jpg", cols="GASS"
        )
        .split_by_rand_pct(262/1155)
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-3,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr)

    fgas_pred, fgas_true = learn.TTA(scale=1.)

    with open(f'{PATH}/results/environment/groupcat/xgass-valid-groupcentral_randoms.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


## Validate on group satellites (352/1155)

In [13]:
# validate on group satellites

for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_isolated(get_xgass_data(), split='satellites')


    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-xGASS", suffix=".jpg", cols="GASS"
        )
        .split_from_df(col='validation')
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-3,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr)

    fgas_pred, fgas_true = learn.TTA(scale=1.)

    with open(f'{PATH}/results/environment/groupcat/xgass-valid-satellites.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


In [14]:
# validate on randoms (same proportion as group satellites)

for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_isolated(get_xgass_data(), split='satellites')


    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-xGASS", suffix=".jpg", cols="GASS"
        )
        .split_by_rand_pct(262/1155)
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-3,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr)

    fgas_pred, fgas_true = learn.TTA(scale=1.)

    with open(f'{PATH}/results/environment/groupcat/xgass-valid-satellite_randoms.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


# The overdensity transition

## q = 0.7
Note that q = 1-p, where p is the validation fraction

In [12]:
q = 0.7 
    
# overdensity
for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_a40_data(), p=1-q, reverse=True)

    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-OC", suffix=".jpg", cols="AGCNr"
        )
        .split_from_df(col='validation')
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)
    with open(f'{PATH}/results/environment/transition/overdense_{q:.2f}.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


# randomized
for i in range(5):
    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_a40_data(), p=1-q, reverse=True)

    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-OC", suffix=".jpg", cols="AGCNr"
        )
        .split_by_rand_pct(1-q)
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)
    with open(f'{PATH}/results/environment/transition/random_{q:.2f}.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)

## q = 0.6

In [13]:
q = 0.6 
    
# overdensity
for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_a40_data(), p=1-q, reverse=True)

    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-OC", suffix=".jpg", cols="AGCNr"
        )
        .split_from_df(col='validation')
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)
    with open(f'{PATH}/results/environment/transition/overdense_{q:.2f}.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


# randomized
for i in range(5):
    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_a40_data(), p=1-q, reverse=True)

    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-OC", suffix=".jpg", cols="AGCNr"
        )
        .split_by_rand_pct(1-q)
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)
    with open(f'{PATH}/results/environment/transition/random_{q:.2f}.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)

## q = 0.5

In [14]:
q = 0.5 
    
# overdensity
for i in range(5):

    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_a40_data(), p=1-q, reverse=True)

    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-OC", suffix=".jpg", cols="AGCNr"
        )
        .split_from_df(col='validation')
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)
    with open(f'{PATH}/results/environment/transition/overdense_{q:.2f}.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)


# randomized
for i in range(5):
    try:
        learn.purge();
    except NameError:
        pass

    df = split_underdense(get_a40_data(), p=1-q, reverse=True)

    src = (
        ImageList.from_df(
            df, path=PATH, folder="images-OC", suffix=".jpg", cols="AGCNr"
        )
        .split_by_rand_pct(1-q)
        .label_from_df(cols=["logfgas"], label_cls=FloatList)
    )

    data = (
        src.transform(tfms, size=sz)
        .databunch(bs=bs)
        .normalize(xGASS_stats)
    )

    learn = Learner(
        data,
        model=mxresnet34(),
        opt_func=partial(Ranger),
        loss_func=root_mean_squared_error,
        wd=1e-4,
        bn_wd=False,
        true_wd=True,
    )

    learn.model[-1] = nn.Linear(learn.model[-1].in_features, 1, bias=True).cuda()

    learn.fit_one_cycle(n_epochs, lr, )

    fgas_pred, fgas_true = learn.TTA(scale=1.)
    with open(f'{PATH}/results/environment/transition/random_{q:.2f}.txt', 'a') as f:
        print('{:.4f}'.format(root_mean_squared_error(fgas_pred, fgas_true)), file=f)