In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torchvision
import torch.nn.functional as F
from torch import nn
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

# manage ray's relative imports
# import ray
# runtime_env = {"working_dir": ".." }
# ray.init(runtime_env=runtime_env, dashboard_port=13065, include_dashboard=True)

from ray import tune
from ray.tune.suggest.optuna import OptunaSearch
from ray.tune import JupyterNotebookReporter

# manage beams's relative imports
import sys
sys.path.append('..')

from src.beam import beam_arguments, Experiment, Study
from src.beam import UniversalDataset, UniversalBatchSampler, PackedFolds
from src.beam import Algorithm
from src.beam import LinearNet, check_type, slice_to_index
from torchvision import transforms
import matplotlib.pyplot as plt

from src.beam import DataTensor
from src.beam.utils import is_notebook

from sklearn.datasets import fetch_covtype
import pandas as pd

In [2]:
dataset = fetch_covtype()

In [3]:
data = dataset['data']
columns = dataset['feature_names']

In [4]:
df = pd.DataFrame(data=data, columns=columns, index=10 * np.arange(len(data)))

In [5]:
# dt = DataTensor(df)
dt = DataTensor(data=data, columns=columns)

In [9]:
dt[['Aspect', 'Elevation']].loc[::2].loc[::2]

        Aspect  Elevation
0         51.0     2596.0
2        139.0     2804.0
4         45.0     2595.0
6         45.0     2606.0
8         45.0     2617.0
...        ...        ...
290496     0.0     2922.0
290498   282.0     2935.0
290500   324.0     2938.0
290502    41.0     2938.0
290504    64.0     2932.0

[145253 rows x 2 columns]

DataTensor:
device:		cpu
requires_grad:	False

In [15]:
dt['Aspect'].values

tensor([ 51.,  56., 139.,  ..., 159., 170., 165.], dtype=torch.float64)

In [72]:
universal_dataset = UniversalDataset(x=dt.values, y=dataset['target'])

In [76]:
universal_dataset = UniversalDataset(dt.values, dataset['target'])

In [5]:
# data_test = torch.LongTensor(dataset['data'][:100000])
# data_train = torch.LongTensor(dataset['data'][100000:])

data_org = dataset['data'][:100000]
data_expand = dataset['data'][100000:]

In [6]:
extra = np.random.randn(*data_expand.shape)

In [7]:
data_expand = np.concatenate([data_expand, extra], axis=1)

In [10]:
pf = PackedFolds({'basic': data_org, 'extra': data_expand}, quick_getitem=False)

In [11]:
pf2 = pf[99998:100002]

In [12]:
extra = pf.get_fold('extra')

In [11]:
info = extra.info

In [None]:
ind_index = info.inverse_map(info.index)

In [14]:
slice_to_index(slice(None), l=len(info.index), sliced=info.index)

tensor([100000, 100001, 100002,  ..., 581009, 581010, 581011])

In [13]:
extra.info.loc[:]

        fold  fold_index  offset
100000     0           0       0
100001     0           1       1
100002     0           2       2
100003     0           3       3
100004     0           4       4
...      ...         ...     ...
581007     0      481007  481007
581008     0      481008  481008
581009     0      481009  481009
581010     0      481010  481010
581011     0      481011  481011

[481012 rows x 3 columns]

DataTensor:
device:		cpu
requires_grad:	False

In [None]:
x = extra[:, :54]

In [35]:
extra.sampling_method

'offset'

In [165]:
extra.info.inverse_map(index)

tensor([], dtype=torch.int64)

In [166]:
extra.info.mapping_method

'sparse'

In [13]:
extra.info.mapping_method

'sparse'

In [14]:
info = extra.info

In [15]:
info.index_map

tensor(indices=tensor([[100000, 100001, 100002,  ..., 581009, 581010, 581011],
                       [     0,      0,      0,  ...,      0,      0,      0]]),
       values=tensor([     0,      1,      2,  ..., 481009, 481010, 481011]),
       size=(581012, 1), nnz=481012, layout=torch.sparse_coo)

In [17]:
torch.index_select(info.index_map, 0, torch.LongTensor([100001, 100002,  581009, 581010])).coalesce().values()

tensor([     1,      2, 481009, 481010])

In [35]:
s = pd.Series(index=info.index.cpu().numpy(), data=np.arange(len(info.index)))

In [37]:
s[info.index.cpu().numpy()[::2]]

100000         0
100002         2
100004         4
100006         6
100008         8
           ...  
581002    481002
581004    481004
581006    481006
581008    481008
581010    481010
Length: 240506, dtype: int64

In [25]:
torch.index_select(info.index_map, 0, info.index[:10000])

tensor(indices=tensor([[   0,    1,    2,  ..., 9997, 9998, 9999],
                       [   0,    0,    0,  ...,    0,    0,    0]]),
       values=tensor([   0,    1,    2,  ..., 9997, 9998, 9999]),
       size=(10000, 1), nnz=10000, layout=torch.sparse_coo)

In [25]:
pf2.get_fold('extra').info.loc[:]

        fold  fold_index  offset
100000     0           0       0
100001     0           1       1

DataTensor:
device:		cpu
requires_grad:	False

In [28]:
pfe = pf.get_fold('extra')

In [30]:
pfe.sampling_method

'folds'

In [14]:
f = lambda x: x[:, :54].values

In [13]:
pf.get_fold('basic').values

tensor([[2.5960e+03, 5.1000e+01, 3.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [2.5900e+03, 5.6000e+01, 2.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [2.8040e+03, 1.3900e+02, 9.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        ...,
        [3.0830e+03, 4.5000e+01, 1.8000e+01,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [3.0770e+03, 4.1000e+01, 1.5000e+01,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [3.0730e+03, 5.0000e+01, 1.2000e+01,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00]], dtype=torch.float64)

In [None]:
y = f(pf.get_fold('extra'))

(slice(None, None, None), slice(None, 54, None))


In [14]:
pfa = pf.apply({'basic': lambda x: x.values, 'extra': lambda x: x[:, :54].values})

In [15]:
pfa

{'basic': tensor([2.5960e+03, 5.1000e+01, 3.0000e+00, 2.5800e+02, 0.0000e+00, 5.1000e+02,
        2.2100e+02, 2.3200e+02, 1.4800e+02, 6.2790e+03, 1.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       dtype=torch.float64), 'extra': tensor([ 2.5900e+03,  5.6000e+01,  2.0000e+00,  2.1200e+02, -6.0000e+00,
         3.9000e+02,  2.2000e+02,  2.3500e+02,  1.5100e+02,  6.2250e+03,
         1.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0

In [59]:
pfa['basic'].values.shape

torch.Size([2, 54])

In [58]:
pfa['extra'].values.shape

torch.Size([2, 54])

In [11]:
from src.beam import slice_to_index

In [17]:
np.arange(100)[:-10:-1]

array([99, 98, 97, 96, 95, 94, 93, 92, 91])

In [18]:
slice_to_index(slice(None, -10, -1), l=100)

(99, 90, -1)


tensor([99, 98, 97, 96, 95, 94, 93, 92, 91])

In [125]:
torch.randn(*(*(1, 2, 3), *())).shape

torch.Size([1, 2, 3])

In [41]:
x = np.arange(100)

In [45]:
slice_to_array(slice(-10, None, -1), l=100)

(100, 90, -1)


tensor([100,  99,  98,  97,  96,  95,  94,  93,  92,  91])

In [47]:
x[:-10:-1]

array([99, 98, 97, 96, 95, 94, 93, 92, 91])

In [32]:
np.arange(90, 0, -1)

array([90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74,
       73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57,
       56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40,
       39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23,
       22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,
        5,  4,  3,  2,  1])

In [92]:
pf[99998:100002]

{'basic': tensor([[3.0770e+03, 4.1000e+01, 1.5000e+01, 3.0900e+02, 4.3000e+01, 3.8890e+03,
         2.2100e+02, 2.0500e+02, 1.1600e+02, 4.2140e+03, 1.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [3.0730e+03, 5.0000e+01, 1.2000e+01, 2.8300e+02, 5.2000e+01, 3.9130e+03,
         2.2500e+02, 2.1400e+02, 1.2100e+02, 4.1870e+03, 1.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+

In [26]:
check_type({'a': np.random.randn(100), 'b': np.random.randn(100)})

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()