# 熟悉DataFrame的创建方式

In [18]:
from pandas import DataFrame

In [19]:
data = [
    {'id':1, 'time':'2020-01-02', 'var_1':1},
    {'id':1, 'time':'2020-01-02', 'var_2':1},
    {'id':1, 'time':'2020-01-02', 'var_1':1},
    {'id':1, 'time':'2020-01-02', 'var_3':1},    
]

In [20]:
DataFrame(data)

Unnamed: 0,id,time,var_1,var_2,var_3
0,1,2020-01-02,1.0,,
1,1,2020-01-02,,1.0,
2,1,2020-01-02,1.0,,
3,1,2020-01-02,,,1.0


In [21]:
data = {
    'a':{'id':1, 'time':'2020-01-02', 'var_1':1},
    'b':{'id':1, 'time':'2020-01-02', 'var_1':1},        
}


In [22]:
DataFrame.from_dict(data, orient='index')

Unnamed: 0,id,time,var_1
a,1,2020-01-02,1
b,1,2020-01-02,1


In [23]:
data= {
    'a': [1, 2],
    'b': [3, 4]
}

In [24]:
DataFrame(data)

Unnamed: 0,a,b
0,1,3
1,2,4


In [25]:
DataFrame.from_dict(data, orient='index')

Unnamed: 0,0,1
a,1,2
b,3,4


## String和datetime的快捷操作

In [26]:
from pandas import to_datetime
df = DataFrame(
    {
        'str':['Mr Jabson william', 'miss lee'],
        'dt':to_datetime(['2019-03-01', '2018-04-02']),    
    }
)
df

Unnamed: 0,str,dt
0,Mr Jabson william,2019-03-01
1,miss lee,2018-04-02


In [27]:
(
    df['str']
    .str.title()
    .str.split(' ', expand=True)
)

Unnamed: 0,0,1,2
0,Mr,Jabson,William
1,Miss,Lee,


In [28]:
df['dt'].dt.day

0    1
1    2
Name: dt, dtype: int64

In [29]:
df['dt'].dt.year

0    2019
1    2018
Name: dt, dtype: int64

In [30]:
dir(df['dt'].dt )

['__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_accessors',
 '_add_delegate_accessors',
 '_constructor',
 '_delegate_method',
 '_delegate_property_get',
 '_delegate_property_set',
 '_deprecations',
 '_dir_additions',
 '_dir_deletions',
 '_ensure_type',
 '_freeze',
 '_get_values',
 '_reset_cache',
 'ceil',
 'date',
 'day',
 'day_name',
 'dayofweek',
 'dayofyear',
 'days_in_month',
 'daysinmonth',
 'floor',
 'freq',
 'hour',
 'is_leap_year',
 'is_month_end',
 'is_month_start',
 'is_quarter_end',
 'is_quarter_start',
 'is_year_end',
 'is_year_start',
 'microsecond',
 'minute',
 'month',
 'month_name',
 'nanosecond',
 'normalize',
 'quarter',
 'round'

# ```is```和```==```的区别
```==``` is based on value and ```is``` is based on identify

In [31]:
class Person:
    def __init__(self, name):
        self.name = name
        
    def __eq__(self, another):
        return self.name == another.name

In [32]:
p1  = Person('Jack')
p2  = Person('Jack')

In [33]:
p1 == p2

True

In [34]:
p1 is p2

False

In [35]:
hex(id(p1)), hex(id(p2))

('0x1109731d0', '0x110973190')

## 更多的例子

In [36]:
1 is 1

True

In [37]:
hex(id(1))

'0x10622ef10'

In [38]:
hex(id(1))

'0x10622ef10'

In [39]:
True is True

True

In [40]:
None is None

True

In [41]:
hex(id(None))

'0x1061e5128'

In [42]:
hex(id(None))

'0x1061e5128'

In [43]:
x = 'abc'
y = 'abc'

x is y

True

In [44]:
def create_abc():
    with open('text.txt', 'r') as f:
        return f.readline()
    
'abc' is create_abc()

False

In [45]:
x = 'abc def'
y = 'abc def'

x is y

False

# 利用loguru记录结构化信息

In [46]:
import time
from loguru import logger

logger.remove()
logger.add('log.txt', serialize=True)

1

In [47]:
def runTask(task_id):
    time.sleep(0.5)
    
    l = logger.bind(task_id=1) # 记录额外属性
    
    l.info('task done')

In [48]:
for i in range(10):
    runTask(i)

In [49]:
import json
def load_log(f):
    with open(f, 'r') as f:
        lines = f.readlines()
    data = [json.loads(l) for l in lines]
    
    return data

In [50]:
load_log('log.txt')[-1]

{'text': '2020-03-02 14:47:00.210 | INFO     | __main__:runTask:6 - task done\n',
 'record': {'elapsed': {'repr': '0:00:05.057090', 'seconds': 5.05709},
  'exception': None,
  'extra': {'task_id': 1},
  'file': {'name': '<ipython-input-47-be951309a907>',
   'path': '<ipython-input-47-be951309a907>'},
  'function': 'runTask',
  'level': {'icon': 'ℹ️', 'name': 'INFO', 'no': 20},
  'line': 6,
  'message': 'task done',
  'module': '<ipython-input-47-be951309a907>',
  'name': '__main__',
  'process': {'id': 23249, 'name': 'MainProcess'},
  'thread': {'id': 4620486080, 'name': 'MainThread'},
  'time': {'repr': '2020-03-02 14:47:00.210404+08:00',
   'timestamp': 1583131620.210404}}}