### Imports

In [1]:
import sys
sys.path.append('../')

from wavy import *
import pandas as pd

### Load data

In [2]:
data_pd = pd.read_pickle('processed.pkl')
data_pd = data_pd.iloc[0:2,:]
data_pd = data_pd.loc[:, (['MSFT', 'AAPL'], ['Open', 'Close'])]
data_pd

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


### DataBlock from single level DataFrame

In [3]:
data = data_pd.loc[:, 'AAPL']
data

Unnamed: 0_level_0,Open,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2005-12-21,2.218566,2.246069
2005-12-22,2.258598,2.26196


In [4]:
datablock = from_dataframe(data, 'AAPL')
datablock

Unnamed: 0_level_0,AAPL,AAPL
Unnamed: 0_level_1,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2005-12-21,2.218566,2.246069
2005-12-22,2.258598,2.26196


In [5]:
type(datablock)

wavy.block.TimeBlock

### DataBlock from multi level DataFrame

In [6]:
datablock = from_dataframe(data_pd)
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


In [7]:
datablock.index

DatetimeIndex(['2005-12-21', '2005-12-22'], dtype='datetime64[ns]', name='Date', freq=None)

In [8]:
datablock.shape

(2, 4)

### Function `filter`

In [9]:
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


In [10]:
datablock.filter(assets=['AAPL'], channels=['Open'])

Unnamed: 0_level_0,AAPL
Unnamed: 0_level_1,Open
Date,Unnamed: 1_level_2
2005-12-21,2.218566
2005-12-22,2.258598


### Function `drop`

In [11]:
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


In [12]:
datablock.drop(assets=['AAPL'], channels=['Open'])

Unnamed: 0_level_0,MSFT
Unnamed: 0_level_1,Close
Date,Unnamed: 1_level_2
2005-12-21,19.475122
2005-12-22,19.373114


### Comparison between `add_level` and `new_add_level`

The old function `add_level` seems wrong, it is adding asset above Date, new function proposed.

In [13]:
level = data.loc[:, 'AAPL']
level = pd.concat({'level_0': level.T}, names=['level_0']).T
level

level_0,level_0,level_0
Unnamed: 0_level_1,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2005-12-21,2.218566,2.246069
2005-12-22,2.258598,2.26196


In [14]:
new_level = data.loc[:, 'AAPL']
new_level.columns = pd.MultiIndex.from_product([['level_0'], new_level.columns])
new_level

Unnamed: 0_level_0,level_0,level_0
Unnamed: 0_level_1,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2005-12-21,2.218566,2.246069
2005-12-22,2.258598,2.26196


### Rename assets

In [15]:
datablock
dict = {'AAPL': 'Apple', 'MSFT': 'Microsoft'}
datablock.rename_assets(dict)

Unnamed: 0_level_0,Microsoft,Microsoft,Apple,Apple
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


### Rename channels

In [16]:
datablock
dict = {'Open': 'Op', 'Close': 'Cl'}
datablock.rename_channels(dict)

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Op,Cl,Op,Cl
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


### Function `update`


In [17]:
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


In [18]:
datablock.update(values = datablock.matrix, assets=['Microsoft', 'Apple'], channels=['Op', 'Cl'])

Unnamed: 0_level_0,Microsoft,Microsoft,Apple,Apple
Unnamed: 0_level_1,Op,Cl,Op,Cl
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


In [19]:
datablock.update(values = datablock.tensor, assets=['Microsoft', 'Apple'], channels=['Op', 'Cl'])

Unnamed: 0_level_0,Microsoft,Microsoft,Apple,Apple
Unnamed: 0_level_1,Op,Cl,Op,Cl
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


### Function apply

In [20]:
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


In [21]:
datablock.apply(np.max, on='channels')

Unnamed: 0_level_0,MSFT,AAPL
Unnamed: 0_level_1,amax,amax
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2005-12-21,19.577126,2.246069
2005-12-22,19.460543,2.26196


### Function `swap_cols`

In [22]:
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


In [23]:
datablock.swap_cols()

Unnamed: 0_level_0,Open,Open,Close,Close
Unnamed: 0_level_1,MSFT,AAPL,MSFT,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,2.218566,19.475122,2.246069
2005-12-22,19.460543,2.258598,19.373114,2.26196


### Function `sort_assets`

In [24]:
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


In [25]:
datablock.sort_assets()

Unnamed: 0_level_0,AAPL,AAPL,MSFT,MSFT
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,2.218566,2.246069,19.577126,19.475122
2005-12-22,2.258598,2.26196,19.460543,19.373114


### Function `sort_channels`

In [26]:
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


In [27]:
datablock.sort_channels()

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Close,Open,Close,Open
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.475122,19.577126,2.246069,2.218566
2005-12-22,19.373114,19.460543,2.26196,2.258598


### Function `countna`

In [28]:
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,2.218566,2.246069
2005-12-22,19.460543,19.373114,2.258598,2.26196


In [29]:
datablock.matrix[0][2] = np.nan
datablock.matrix[1][1] = np.nan
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,,2.246069
2005-12-22,19.460543,,2.258598,2.26196


In [30]:
datablock['MSFT'].dropna()

Unnamed: 0_level_0,Open,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2005-12-21,19.577126,19.475122


In [31]:
s = pd.Series(dtype=int)
for asset in datablock.assets:
    # s[asset] = len(datablock[asset]) - len(datablock[asset].dropna())
    print(len(datablock[asset].dropna()))

1
1


In [32]:
datablock.countna('asset')

MSFT    1
AAPL    1
dtype: int64

In [33]:
datablock.countna('channel')

MSFT  Open     0
      Close    1
AAPL  Open     1
      Close    0
dtype: int64

### Function `fillna`

In [34]:
datablock

Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,,2.246069
2005-12-22,19.460543,,2.258598,2.26196


In [35]:
datablock.fillna(0)

  return super().fillna(value, method)


Unnamed: 0_level_0,MSFT,MSFT,AAPL,AAPL
Unnamed: 0_level_1,Open,Close,Open,Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2005-12-21,19.577126,19.475122,0.0,2.246069
2005-12-22,19.460543,0.0,2.258598,2.26196


### Function `split_assets`

In [None]:
datablock

In [None]:
datablock.split_assets()

### Function `from_dataframes`

#### From a `dict`

In [None]:
AAPL = data_pd.loc[:, 'AAPL']
MSFT = data_pd.loc[:, 'MSFT']

dict = {'AAPL': AAPL, 'MSFT': MSFT}
dict

In [None]:
from_dict(dict)

#### From a `data`

In [None]:
aapl = data_pd.loc[:, 'AAPL']
aapl

In [None]:
msft = data_pd.loc[:, 'MSFT']
msft

In [None]:
from_dataframes([aapl, msft])

#### From `data`, `assets`

In [None]:
aapl = data_pd.loc[:, 'AAPL']
aapl

In [None]:
msft = data_pd.loc[:, 'MSFT']
msft

In [None]:
from_dataframes([aapl, msft], ['AAPL', 'MSFT'])

### Try `from_dataframes` with multilevel

In [None]:
AAPL = data_pd.loc[:, 'AAPL']
AAPL.columns = pd.MultiIndex.from_product([['level_0'], AAPL.columns])
MSFT = data_pd.loc[:, 'MSFT']

try:
    datablock = from_dataframes([AAPL, MSFT], ['AAPL', 'MSFT'])
except Exception as e:
    print(e)

### Try `from_dataframes` with not a DataFrame

In [None]:
AAPL = data_pd.loc[:, 'AAPL']

try:
    datablock = from_dataframes([AAPL, 1], ['AAPL', 'MSFT'])
except Exception as e:
    print(e)

### Try `from_dataframes` with different number of channels

In [None]:
AAPL = data_pd.loc[:, 'AAPL'].copy()
AAPL.drop('Close', axis=1, inplace=True)
MSFT = data_pd.loc[:, 'MSFT']

try:
    datablock = from_dataframes([AAPL, MSFT], ['AAPL', 'MSFT'])
except Exception as e:
    print(e)

### Error using DataFrame instead of DataBlock

In [None]:
print(type(data_pd))
simp_data = data_pd.iloc[:, 0:4]
print(id(data_pd))
print(id(simp_data))
assets = simp_data.columns.levels[0]
assets


In [None]:
print(type(datablock))
# simp_datablock = datablock.iloc[:, 0:4]
# datablock.columns.levels[0]
simp_datablock.columns.levels[0]
print(id(datablock))
print(id(simp_datablock))
assets = simp_datablock.columns.levels[0]
assets

In [None]:
datablock.assets

### Function `from_matrix`

In [None]:
values = datablock.values
index = datablock.index
assets = datablock.assets
channels = datablock.channels

In [None]:
values

In [None]:
index

In [None]:
assets

In [None]:
from_matrix(values, index=index, assets=assets, channels=channels)

### Function `from_tensor`

In [None]:
simp_datablock = datablock.iloc[:, 0:10]
index = list(simp_datablock.index)
assets = list(simp_datablock.assets)
channels = list(simp_datablock.channels)
values = list(np.array([simp_datablock.iloc[:, i*2:i*2+2].values for i in range(2)]))

In [None]:
values

In [None]:
index

In [None]:
assets

In [None]:
from_tensor(values, index=index, assets=assets, channels=channels)

### Function as_dataframe

In [None]:
type(datablock)

In [None]:
type(datablock.as_dataframe())

# Properties
### matrix

In [None]:
datablock

In [None]:
datablock.matrix

### tensor

In [None]:
datablock

In [None]:
datablock.tensor

### Channels

In [None]:
datablock.channels

### Assets

In [None]:
datablock.assets

In [None]:
datablock.start

In [None]:
datablock.end

## Error test

In [None]:
data_pd = pd.read_pickle('processed.pkl')

datablock = from_dataframe(data_pd)
datablock

In [None]:
new_db = datablock.filter(assets=['AAPL', 'MSFT'])

In [None]:
new_db.columns