# 数据流例子

In [15]:
# !pip install faker
import sys
sys.path.append("../")

from fppy.lazy_list import LazyList
from faker import Faker
from IPython.display import display
from datetime import datetime as dt

## 演示如何使用`Faker`

In [10]:
fk = Faker()
display(fk.chrome())
display(fk.date_time_between_dates().strftime("%Y-%m-%d %H:%M:%S"))
display(fk.random_int(14, 50))
display(fk.name())

'Mozilla/5.0 (iPad; CPU iPad OS 9_3_6 like Mac OS X) AppleWebKit/533.1 (KHTML, like Gecko) CriOS/24.0.867.0 Mobile/77X748 Safari/533.1'

'2021-11-09 16:40:44'

44

'Joshua Nielsen'

## 制作一个生成假数据的生成器

In [26]:
def data_stream_gen():
    """数据流生成器
    """
    fk = Faker()
    Faker.seed(0) # 为了让下面的显示结果一致
    while True:
        yield {
            "create_time": (
                fk
                .date_time_between_dates()
                .strftime("%Y-%m-%d %H:%M:%S")),
            "name": fk.name(),
            "email": fk.email(),
            "age": fk.random_int(14, 50),
            "address": fk.address(),
            "device": {
                "browser": fk.chrome()
            }
        }

查看是不是可用：

In [24]:
LazyList(data_stream_gen()).take(3).collect()

[{'create_time': '2021-11-09 16:47:38',
  'name': 'Jennifer Green',
  'email': 'ysullivan@example.com',
  'age': 44,
  'address': '242 Christine Glen\nWest Corey, TX 43780',
  'device': {'browser': 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.2 (KHTML, like Gecko) Chrome/46.0.833.0 Safari/534.2'}},
 {'create_time': '2021-11-09 16:47:38',
  'name': 'Amy Stark',
  'email': 'johnponce@example.net',
  'age': 42,
  'address': '714 Mann Plaza Suite 839\nSeanfurt, OK 32234',
  'device': {'browser': 'Mozilla/5.0 (iPad; CPU iPad OS 10_3_4 like Mac OS X) AppleWebKit/533.0 (KHTML, like Gecko) CriOS/28.0.827.0 Mobile/32P094 Safari/533.0'}},
 {'create_time': '2021-11-09 16:47:38',
  'name': 'Joanne Keller',
  'email': 'christopher91@example.com',
  'age': 35,
  'address': '41352 Simmons Circle\nPort Dustinbury, OK 83627',
  'device': {'browser': 'Mozilla/5.0 (Linux; Android 4.0.2) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/26.0.893.0 Safari/535.2'}}]

## 制作一些处理流数据的函数


1. `drop_device`：删除设备信息
2. `user_info`：将所有用户信息归并在一起
3. `change_create_time`：修改时间格式

In [16]:
def drop_device(x):
    """删除device信息
    """
    return { k:v for k, v in x.items() if k != 'device'}

def user_info(x):
    """将用户信息包在一起
    """
    user_info_name = {"name", "email", "age", "adress"}
    return {**{
        'user_info': {
            k:v
            for k, v in x.items() if k in user_info_name
        },
    }, **{k: v for k, v in x.items() if k not in user_info_name}}

def change_create_time(x):
    """修改日期格式
    """
    res = x.copy()
    res['create_time'] = dt.strptime(
        res['create_time'],
        "%Y-%m-%d %H:%M:%S"
    ).timestamp()
    return res

## 测试处理结果

In [25]:
LazyList(data_stream_gen())\
    .map(drop_device)\
    .map(user_info)\
    .map(change_create_time)\
    .take(3)\
    .collect()

[{'user_info': {'name': 'Jennifer Green',
   'email': 'ysullivan@example.com',
   'age': 44},
  'create_time': 1636447661.0,
  'address': '242 Christine Glen\nWest Corey, TX 43780'},
 {'user_info': {'name': 'Amy Stark',
   'email': 'johnponce@example.net',
   'age': 42},
  'create_time': 1636447661.0,
  'address': '714 Mann Plaza Suite 839\nSeanfurt, OK 32234'},
 {'user_info': {'name': 'Joanne Keller',
   'email': 'christopher91@example.com',
   'age': 35},
  'create_time': 1636447661.0,
  'address': '41352 Simmons Circle\nPort Dustinbury, OK 83627'}]