In [2]:
import pandas as pd
import numpy as np

In [63]:
import datetime

## Hierarchical Indexing

In [3]:
index = [['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
         [1, 2, 3, 1, 3, 1, 2, 2, 3]]
data = pd.Series(np.random.randn(9),
                 index=index)

In [5]:
data

a  1    0.480133
   2   -0.976361
   3   -1.572984
b  1    0.111526
   3   -1.391256
c  1   -0.892072
   2    0.645566
d  2   -0.732172
   3    0.725784
dtype: float64

In [8]:
data.index # MultiIndex

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [23]:
data.loc[slice('b', 'b', None), slice(1, 1, None)]

b  1    0.111526
dtype: float64

In [24]:
data.loc[:, 2]

a   -0.976361
c    0.645566
d   -0.732172
dtype: float64

In [28]:
wide_data = data.unstack()

In [32]:
wide_data

Unnamed: 0,1,2,3
a,0.480133,-0.976361,-1.572984
b,0.111526,,-1.391256
c,-0.892072,0.645566,
d,,-0.732172,0.725784


In [33]:
wide_data.stack()

a  1    0.480133
   2   -0.976361
   3   -1.572984
b  1    0.111526
   3   -1.391256
c  1   -0.892072
   2    0.645566
d  2   -0.732172
   3    0.725784
dtype: float64

### exercise 2

In [44]:
dates = ['20100101', '20100101', '20100102', '20100102', '20100102', '20100103', '20100103']

In [49]:
frame = pd.DataFrame({'trade_date': pd.DatetimeIndex(dates),
                      'symbol': ['AAPL', "AMD", "AAPL", "AMD", "INTEL", "AAPL", "AMD"],
                      'open': np.random.normal(100, 10, 7),
                      'close': np.random.normal(102, 10, 7)})

In [85]:
# 通常我们看到的dataframe大概率长这样
frame

Unnamed: 0,trade_date,symbol,open,close
0,2010-01-01,AAPL,103.277643,105.384355
1,2010-01-01,AMD,114.430775,85.496614
2,2010-01-02,AAPL,130.264213,96.959235
3,2010-01-02,AMD,87.339445,104.022293
4,2010-01-02,INTEL,98.08842,108.082473
5,2010-01-03,AAPL,108.211164,85.753338
6,2010-01-03,AMD,97.586595,105.150173


In [52]:
# 这时候我们应该使用, set_index, 转换为长表
frame_long = frame.set_index(['trade_date', 'symbol'])

In [53]:
frame_long

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close
trade_date,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,AAPL,103.277643,105.384355
2010-01-01,AMD,114.430775,85.496614
2010-01-02,AAPL,130.264213,96.959235
2010-01-02,AMD,87.339445,104.022293
2010-01-02,INTEL,98.08842,108.082473
2010-01-03,AAPL,108.211164,85.753338
2010-01-03,AMD,97.586595,105.150173


In [59]:
# 利用unstack(level=1)即可实现向宽表的转换
frame_long['close'].unstack(level=1)

symbol,AAPL,AMD,INTEL
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,105.384355,85.496614,
2010-01-02,96.959235,104.022293,108.082473
2010-01-03,85.753338,105.150173,


In [62]:
pd.Timestamp('20100101 00:00:00')

Timestamp('2010-01-01 00:00:00')

In [80]:
dt = datetime.datetime(year=2010,
                  month=1,
                  day=1,
                  hour=0,
                  minute=0,
                  second=0)
print(dt.strftime('%Y-%m-%d %H:%M:%S'))
datetime.datetime.strptime('20100101 00:00:00', '%Y%m%d %H:%M:%S')

2010-01-01 00:00:00


datetime.datetime(2010, 1, 1, 0, 0)

## Enum

In [86]:
from enum import Enum

class Color(Enum):
    RED = 1
    COLOR = 2
    GREEN = 3

In [92]:
Color.RED.va

1

## datetime

In [93]:
import datetime

In [96]:
dt0 = datetime.time(hour=9, minute=30, second=0)

In [98]:
dt0 + datetime.timedelta(minutes=15)

TypeError: unsupported operand type(s) for +: 'datetime.time' and 'datetime.timedelta'

In [102]:
dir(datetime)

['MAXYEAR',
 'MINYEAR',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'date',
 'datetime',
 'datetime_CAPI',
 'sys',
 'time',
 'timedelta',
 'timezone',
 'tzinfo']

In [113]:
pd.Series(pd.Timestamp('20010101')).values.astype(np.int64)

array([978307200000000000])

In [110]:
pd.Series(pd.Timestamp('20010101')).values.astype(np.int64)

array([978307200000000000])

In [114]:
from enum import IntEnum


ORDER_STATUS = IntEnum(
    "ORDER_STATUS",
    [
        "OPEN",
        "FILLED",
        "CANCELLED",
        "REJECTED",
        "HELD",
    ],
    start=0,
)

In [122]:
ORDER_STATUS(5)

ValueError: 5 is not a valid ORDER_STATUS

## ExchangeCalendar

In [123]:
from pandas_market_calendars import get_calendar

In [124]:
calendar = get_calendar('SSE')

In [127]:
dir(calendar)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_convert',
 '_customized_market_times',
 '_get_current_time',
 '_get_market_times',
 '_market_times',
 '_oc_market_times',
 '_off',
 '_prepare_regular_market_times',
 '_regmeta_class_registry',
 '_regular_market_timedeltas',
 '_set_time',
 '_special_dates',
 '_tdelta',
 '_tryholidays',
 'add_time',
 'adhoc_holidays',
 'aliases',
 'break_end',
 'break_end_on',
 'break_start',
 'break_start_on',
 'calendar_names',
 'change_time',
 'clean_dates',
 'close_offset',
 'close_time',
 'close_time_on',
 'days_at_time',
 'discontinued_market_times

In [154]:
calendar.market_times

['market_open', 'break_start', 'break_end', 'market_close']

In [155]:
calendar.close_time

datetime.time(15, 0, tzinfo=<DstTzInfo 'Asia/Shanghai' LMT+8:06:00 STD>)

In [157]:
calendar.open_time

datetime.time(9, 30, tzinfo=<DstTzInfo 'Asia/Shanghai' LMT+8:06:00 STD>)

In [142]:
calendar.valid_days(start_date='2005-01-04', end_date='2012-01-01', tz=None)

DatetimeIndex(['2005-01-04', '2005-01-05', '2005-01-06', '2005-01-07',
               '2005-01-10', '2005-01-11', '2005-01-12', '2005-01-13',
               '2005-01-14', '2005-01-17',
               ...
               '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
               '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
               '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', length=1701, freq='C')

In [130]:
calendar._prepare_regular_market_times()