In [1]:
import pandas as pd
import numpy as np
def get_group_columns(df: pd.DataFrame, group: str):
    """
    get a group of columns from multi-index columns DataFrame

    Parameters
    ----------
    df : pd.DataFrame
        with multi of columns.
    group : str
        the name of the feature group, i.e. the first level value of the group index.
    """
    if group is None:
        return df.columns
    else:
        return df.columns[df.columns.get_loc(group)]

In [2]:
alphabet = np.array(list('abcdefg'))
digits = np.array(list(range(0,10)))

In [3]:
avalue = alphabet[np.random.randint(0,len(alphabet),100)]
dvalue = digits[np.random.randint(0,len(digits),100)]
value = np.random.normal(0, 1, 100)

In [4]:
df = pd.DataFrame({'a':avalue,'d':dvalue,'v':value}).set_index(['a','d'])

In [5]:
get_group_columns(df,'v')

'v'

In [6]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,v
a,d,Unnamed: 2_level_1
g,9,-0.607022
a,3,-2.799108
f,2,0.608779
c,6,0.325965
c,1,0.139376
...,...,...
d,6,0.427794
g,1,-0.127090
a,9,-0.975884
e,6,-1.427138


In [7]:
mask = np.random.rand(100)<0.1

In [8]:
df['v'][mask] = np.nan

In [9]:
get_group_columns(df, 'v')

'v'

In [10]:
df.loc(axis=0)['e']

Unnamed: 0_level_0,v
d,Unnamed: 1_level_1
1,0.724889
2,0.366817
0,
6,0.146869
2,-1.0036
6,-0.075696
9,0.28346
6,0.285292
5,1.172986
7,


In [3]:
import warnings
warnings.filterwarnings('ignore')
import qlib
from qlib.config import REG_CN
from qlib.contrib.data.handler import Alpha158

data_handler_config = {
    "start_time": "2015-01-01",
    "end_time": "2021-05-17",
    "fit_start_time": "2015-01-01",
    "fit_end_time": "2020-12-31",
    "instruments": "csi300",
}

if __name__ == "__main__":
    provider_uri = "~/.qlib/qlib_data/qlib_cn_1d"  # target_dir
    qlib.init(provider_uri=provider_uri, region=REG_CN)
    h = Alpha158(**data_handler_config)

    # get all the columns of the data
    print(h.get_cols())

    # fetch all the labels
    print(h.fetch(col_set="label"))

    # fetch all the features
    print(h.fetch(col_set="feature"))

[29879:MainThread](2021-05-18 00:31:41,451) INFO - qlib.Initialization - [config.py:275] - default_conf: client.
[29879:MainThread](2021-05-18 00:31:41,460) INFO - qlib.Initialization - [__init__.py:46] - qlib successfully initialized based on client settings.
[29879:MainThread](2021-05-18 00:31:41,462) INFO - qlib.Initialization - [__init__.py:47] - data_path=/Users/harry/.qlib/qlib_data/qlib_cn_1d


['KMID', 'KLEN', 'KMID2', 'KUP', 'KUP2', 'KLOW', 'KLOW2', 'KSFT', 'KSFT2', 'OPEN0', 'HIGH0', 'LOW0', 'VWAP0', 'ROC5', 'ROC10', 'ROC20', 'ROC30', 'ROC60', 'MA5', 'MA10', 'MA20', 'MA30', 'MA60', 'STD5', 'STD10', 'STD20', 'STD30', 'STD60', 'BETA5', 'BETA10', 'BETA20', 'BETA30', 'BETA60', 'RSQR5', 'RSQR10', 'RSQR20', 'RSQR30', 'RSQR60', 'RESI5', 'RESI10', 'RESI20', 'RESI30', 'RESI60', 'MAX5', 'MAX10', 'MAX20', 'MAX30', 'MAX60', 'MIN5', 'MIN10', 'MIN20', 'MIN30', 'MIN60', 'QTLU5', 'QTLU10', 'QTLU20', 'QTLU30', 'QTLU60', 'QTLD5', 'QTLD10', 'QTLD20', 'QTLD30', 'QTLD60', 'RANK5', 'RANK10', 'RANK20', 'RANK30', 'RANK60', 'RSV5', 'RSV10', 'RSV20', 'RSV30', 'RSV60', 'IMAX5', 'IMAX10', 'IMAX20', 'IMAX30', 'IMAX60', 'IMIN5', 'IMIN10', 'IMIN20', 'IMIN30', 'IMIN60', 'IMXD5', 'IMXD10', 'IMXD20', 'IMXD30', 'IMXD60', 'CORR5', 'CORR10', 'CORR20', 'CORR30', 'CORR60', 'CORD5', 'CORD10', 'CORD20', 'CORD30', 'CORD60', 'CNTP5', 'CNTP10', 'CNTP20', 'CNTP30', 'CNTP60', 'CNTN5', 'CNTN10', 'CNTN20', 'CNTN30', 'CNT

In [7]:
h.fetch(col_set="label").loc['2021-05-10', :]

Unnamed: 0_level_0,Unnamed: 1_level_0,LABEL0
datetime,instrument,Unnamed: 2_level_1
2021-05-10,SH600000,0.006965
2021-05-10,SH600004,0.003407
2021-05-10,SH600009,0.002862
2021-05-10,SH600010,0.033520
2021-05-10,SH600011,-0.007026
2021-05-10,...,...
2021-05-10,SZ300498,0.013778
2021-05-10,SZ300529,0.000210
2021-05-10,SZ300601,0.018535
2021-05-10,SZ300628,0.083596


In [16]:
import numpy as np
import pandas as pd
np.maximum(int(110 * 0.1), 1)

11