In [10]:
from bs4 import BeautifulSoup

def parse_event(text, verbose=True):
    soup = BeautifulSoup(text, 'html.parser')
    props=[]
    images=[]
    for c in soup:
        if verbose:
            print(c.name,'~', c)
        if c.string is not None and len(c.string.strip())>0:
            if verbose:
                print('\t@', c.string)
            props.append(c.string)
    if verbose:
        print('** find images')
    imgs=soup.find_all('img')
    for img in imgs:
        if verbose:
            print(img)
            print('\t', img['src'])
        images.append(img['src'])
    return props, images

# text="<img src=\"https://p.qpic.cn/qqumall/0/15435437435c009bbf78be8.jpg/0\" referrerpolicy=\"no-referrer\" /><br>\n        <strong>价格:</strong> 128元"
text="更新时间：2018-07-08 01:00:00 <img referrerpolicy=\"no-referrer\" src=\"http://i0.hdslb.com/bfs/archive/9ab97b930842860a0db78dd0e80a32a01f77cf31.jpg@320w_200h\">"
props, images=parse_event(text, verbose=False)
print('☌', props)
print('❀', images)

☌ ['更新时间：2018-07-08 01:00:00 ']
❀ ['http://i0.hdslb.com/bfs/archive/9ab97b930842860a0db78dd0e80a32a01f77cf31.jpg@320w_200h']


In [8]:
text="无<br><img referrerpolicy=\"no-referrer\" src=\"//i1.hdslb.com/bfs/archive/a56c60806c846dd72bd473f752f142a5b9f8346b.png\">"
props, images=parse_event(text, verbose=False)
print('☌', props)
print('❀', images)

☌ ['无']
❀ ['//i1.hdslb.com/bfs/archive/a56c60806c846dd72bd473f752f142a5b9f8346b.png']


In [1]:
from sagas.ofbiz.entities import OfEntity as e
e('df').allWorkEffortType()

Unnamed: 0,lastUpdatedStamp,parentTypeId,workEffortTypeId,hasTable,createdTxStamp,createdStamp,description,lastUpdatedTxStamp
0,2019-03-03 22:33:19.221,,ACTIVITY,N,2019-03-03 22:33:19.142,2019-03-03 22:33:19.221,Workflow Activity,2019-03-03 22:33:19.142
1,2019-03-03 22:33:13.202,,ASSET_USAGE,N,2019-03-03 22:33:13.126,2019-03-03 22:33:13.202,Fixed Asset Usage (rental),2019-03-03 22:33:13.126
2,2019-03-03 22:33:19.224,,AVAILABLE,N,2019-03-03 22:33:19.142,2019-03-03 22:33:19.224,Available,2019-03-03 22:33:19.142
3,2019-03-03 22:33:13.335,EVENT,BUSINESS_TRAVEL,N,2019-03-03 22:33:13.126,2019-03-03 22:33:13.335,Business Travel,2019-03-03 22:33:13.126
4,2019-03-03 22:33:13.205,,EVENT,N,2019-03-03 22:33:13.126,2019-03-03 22:33:13.205,Event,2019-03-03 22:33:13.126
5,2019-03-03 22:33:13.338,EVENT,MEETING,N,2019-03-03 22:33:13.126,2019-03-03 22:33:13.338,Meeting,2019-03-03 22:33:13.126
6,2019-03-03 22:33:19.223,,MILESTONE,N,2019-03-03 22:33:19.142,2019-03-03 22:33:19.223,Milestone,2019-03-03 22:33:19.142
7,2019-03-03 22:33:13.341,EVENT,PERSONAL_TIMEOFF,N,2019-03-03 22:33:13.126,2019-03-03 22:33:13.341,Personal Time Off,2019-03-03 22:33:13.126
8,2019-03-03 22:33:19.227,,PHASE,N,2019-03-03 22:33:19.142,2019-03-03 22:33:19.227,Project Phase,2019-03-03 22:33:19.142
9,2019-03-03 22:33:27.115,TEMPLATE,PHASE_TEMPLATE,,2019-03-03 22:33:27.098,2019-03-03 22:33:27.115,Project Phase Template,2019-03-03 22:33:27.098


In [11]:
# [applications/workeffort/testdef/data/WorkEffortTestData.xml]
# partyId, partyTypeId, statusId, groupName
# contactMechTypeId
# workEffortId, currentStatusId, workEffortName, workEffortTypeId
# roleTypeId, fromDate
# timeEntryId, hours
# communicationEventId, communicationEventTypeId
# noteId, internalNote

In [41]:
import json_utils
import sagas.util.pandas_helper as ph
from sagas.util.date_time_util import *

def get_event_attrs(entry):
    text=entry['summary']
    props, images=parse_event(text, verbose=False)
    data={}
    for p in props:
        if p.startswith('更新时间：'):
            data['date']=p[len('更新时间：'):].strip()
    if len(images)>0:
        data['image']=images[0]
    return data

def extract_id(id):
    """
    id="https://www.bilibili.com/bangumi/play/ep216075"
    extract_id(id)
    :param id:
    :return:
    """
    prefix='/bangumi/'
    if id.startswith('https://space.'):
        prefix='space.bilibili.com/'
    elif id.startswith('https://www.bilibili.com/video/'):
        prefix='/video/'
    return id[id.index(prefix)+len(prefix):]

def get_entries(rss_doc, lang='zh'):    
    tuples=[]
    for entry in rss_doc['entry']:
        attrs=get_event_attrs(entry)
        if not 'date' in attrs:
            attrs['date']=entry['published']
        tuples.append((entry["title@"+lang], entry['published'], 
                      extract_id(entry['id']),
                      attrs['date'], attrs['image']
                     ))

        # data = {}    
    return tuples

def print_doc_attrs(rss_doc):
    print(rss_doc['title'], '✆', extract_id(rss_doc['link']))
    print('\t', rss_doc['subtitle'])
    print(to_jdbc(rss_doc['updated']))

json_file='./data/rss/bilibili_bangumi_media_9192.json'
rss_doc = json_utils.read_json_file(json_file)
print_doc_attrs(rss_doc)
tuples=get_entries(rss_doc)
ph.to_df(tuples, ['title', 'published', 'id', 'date', 'image'])

DARLING in the FRANXX（僅限港澳台地區） ✆ media/md9192/
	 遥远的未来，人类在荒废的大地上建设了移动要塞都市“种植园”，并讴歌着文明。在那当中建造的驾驶员居住设施“米斯特汀”，通称“鸟笼”。孩子们就住在那里，他们被告知的使命，只有战斗。敌人是一切都被谜团覆盖的巨大生命体“叫龙”。为了对抗尚未见过的敌人，孩子们乘上被称为“FRANXX”的机器人。有一位曾被称作神童的少年。代号016。名字是广。但他现在却跌落谷底。是不被人需要的存在。如果没有乘上FRANXX，就如同不存在一样。在这样的广面前，某天，一位被称作02的神秘少女出现了。她的额头，长着两根艳丽的角。“——找到了哦，我的DARLING” - Made with love by RSSHub(https://github.com/DIYgod/RSSHub)
2019-06-01 06:45:52+00:00


Unnamed: 0,title,published,id,date,image
0,第24话 不要離開我,2018-07-07T17:00:00+00:00,play/ep216076,2018-07-08 01:00:00,http://i0.hdslb.com/bfs/archive/9ab97b93084286...
1,第23话 DARLING in the FRANXX,2018-06-30T17:00:00+00:00,play/ep216075,2018-07-01 01:00:00,http://i0.hdslb.com/bfs/archive/b7d79f370e732d...
2,第22话 觀星者,2018-06-23T17:00:00+00:00,play/ep183822,2018-06-24 01:00:00,http://i0.hdslb.com/bfs/archive/5e8972b323825a...
3,第21话 為了最愛的你,2018-06-16T17:00:00+00:00,play/ep183821,2018-06-17 01:00:00,http://i0.hdslb.com/bfs/archive/2d9221d55f3873...
4,第20话 新世界,2018-06-09T17:00:00+00:00,play/ep183820,2018-06-10 01:00:00,http://i0.hdslb.com/bfs/archive/e607bf03ee5c66...
5,第SPII话 特番II,2018-06-02T17:00:00+00:00,play/ep183819,2018-06-03 01:00:00,http://i0.hdslb.com/bfs/archive/7fa1979aa0b3f5...
6,第19话 偽人類們,2018-05-26T17:00:00+00:00,play/ep183818,2018-05-27 01:00:00,http://i0.hdslb.com/bfs/archive/021a7d91835b4e...
7,第18话 櫻花綻開之時,2018-05-19T17:00:00+00:00,play/ep183817,2018-05-20 01:00:00,http://i0.hdslb.com/bfs/archive/4bcc79b0f25a33...
8,第17话 樂園,2018-05-12T17:00:00+00:00,play/ep183816,2018-05-13 01:00:00,http://i0.hdslb.com/bfs/archive/37c86c1121b1a7...
9,第16话 我們的日常,2018-05-05T17:00:00+00:00,play/ep183815,2018-05-06 01:00:00,http://i0.hdslb.com/bfs/archive/f6b93745fadf88...


In [37]:
extract_id('https://www.bilibili.com/video/av54083874')

'av54083874'

In [42]:
json_file='./data/rss/bilibili_user_video_286700005.json'
rss_doc = json_utils.read_json_file(json_file)
print_doc_attrs(rss_doc)
tuples=get_entries(rss_doc, 'ja')
ph.to_df(tuples, ['title', 'published', 'id', 'date', 'image'])

hololive 的 bilibili 空间 ✆ 286700005
	 hololive 的 bilibili 空间 - Made with love by RSSHub(https://github.com/DIYgod/RSSHub)
2019-06-01 07:20:33+00:00


Unnamed: 0,title,published,id,date,image
0,【しょぼんのアクション2】やる(´･ω･`)にかいめ【ホロライブ_戌神ころね】,2019-05-30T14:49:27+00:00,av54083874,2019-05-30T14:49:27+00:00,//i1.hdslb.com/bfs/archive/a56c60806c846dd72bd...
1,【つばさヘブン】クズ男に体裁を！はちゃめちゃ３人衆でヤンデレホラゲ実況！【犬山たまき_宗谷い...,2019-05-30T13:26:06+00:00,av54078712,2019-05-30T13:26:06+00:00,//i1.hdslb.com/bfs/archive/90422531734bd8f2381...
2,【#クソガキ指導室】クソガキと悪魔の保健医のなんでも相談室【御伽原江良_癒月ちょこ】,2019-05-30T13:07:18+00:00,av54077303,2019-05-30T13:07:18+00:00,//i1.hdslb.com/bfs/archive/4e61d4b305d534ae147...
3,【Vtuber】お寿司食べて幸せになりつつ雑談する。【ホロライブ_紫咲シオン】,2019-05-30T12:37:01+00:00,av54074900,2019-05-30T12:37:01+00:00,//i2.hdslb.com/bfs/archive/b5192b802a691c7dc19...
4,【ASMR】あなたのお耳に最上級の癒しのひとときを【囁き、吐息、耳かき、シチュエーション】E...,2019-05-30T12:10:16+00:00,av54068896,2019-05-30T12:10:16+00:00,//i1.hdslb.com/bfs/archive/ee3f93fb292519628a6...
5,『オタク美少女』と『吸血鬼』の奇妙な殺人事件 #2 【アルネの事件簿】,2019-05-30T11:35:35+00:00,av54066972,2019-05-30T11:35:35+00:00,//i2.hdslb.com/bfs/archive/56c3d22db63883593a5...
6,10位以下で即放送終了テトリス99!!【赤井はあと_ホロライブ】,2019-05-30T11:34:43+00:00,av54068089,2019-05-30T11:34:43+00:00,//i0.hdslb.com/bfs/archive/129ba0d3748375a61ca...
7,【はあとん集会】赤井はあとについてみんなで考える,2019-05-30T10:37:12+00:00,av54063185,2019-05-30T10:37:12+00:00,//i0.hdslb.com/bfs/archive/d7d8bb12d88ffff3c05...
8,【マリカDX】よわよわマリカ卒業！猛特訓！????,2019-05-30T10:36:53+00:00,av54062049,2019-05-30T10:36:53+00:00,//i0.hdslb.com/bfs/archive/a23179d77c81fcd43ca...
9,【テトリス99】時間内に99人の頂点に立つ！,2019-05-30T10:03:33+00:00,av54057806,2019-05-30T10:03:33+00:00,//i1.hdslb.com/bfs/archive/1198466176de15cbb86...
