In [73]:
import time, json, requests
import pandas as pd
import datetime

# 每日疫情数据

In [2]:
#download data
def downloadDailyData():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=&_=%d'%int(time.time()*1000)
    data = json.loads(requests.get(url=url).json()['data'])
    return data

In [3]:
# decode area data to data frame
def getAreaDataFrame(response_data):
    area_tree = response_data['areaTree']
    df_area = pd.DataFrame(columns=['Country','Province','City','AllOrToday','confirm','suspect','dead','heal'])
    for country in area_tree:
        df_area = df_area.append({'Country':country['name'],
                   'Province':'',
                   'City':'',               
                   'AllOrToday':'All',
                   'confirm':country['total']['confirm'],
                   'suspect':country['total']['suspect'],
                   'dead':country['total']['dead'],
                   'heal':country['total']['heal']
                  },ignore_index=True)
        df_area = df_area.append({'Country':country['name'],
                   'Province':'',
                   'City':'',               
                   'AllOrToday':'Today',
                   'confirm':country['today']['confirm'],
                   'suspect':country['today']['suspect'],
                   'dead':country['today']['dead'],
                   'heal':country['today']['heal']
                  },ignore_index=True)
        if 'children' in country:
            country_tree = country['children']
            for province in country_tree:
                #print('pv:{}'.format(province['name']))
                df_area = df_area.append({'Country':country['name'],
                   'Province':province['name'],
                   'City':'',               
                   'AllOrToday':'All',
                   'confirm':province['total']['confirm'],
                   'suspect':province['total']['suspect'],
                   'dead':province['total']['dead'],
                   'heal':province['total']['heal']
                  },ignore_index=True)
                df_area = df_area.append({'Country':country['name'],
                   'Province':province['name'],
                   'City':'',               
                   'AllOrToday':'Today',
                   'confirm':province['today']['confirm'],
                   'suspect':province['today']['suspect'],
                   'dead':province['today']['dead'],
                   'heal':province['today']['heal']
                  },ignore_index=True)
                if 'children' in province:
                    province_tree = province['children']
                    for city in province_tree:
                        df_area = df_area.append({'Country':country['name'],
                           'Province':province['name'],
                           'City':city['name'],               
                           'AllOrToday':'All',
                           'confirm':city['total']['confirm'],
                           'suspect':city['total']['suspect'],
                           'dead':city['total']['dead'],
                           'heal':city['total']['heal']
                          },ignore_index=True)
                        df_area = df_area.append({'Country':country['name'],
                           'Province':province['name'],
                           'City':city['name'],               
                           'AllOrToday':'Today',
                           'confirm':city['today']['confirm'],
                           'suspect':city['today']['suspect'],
                           'dead':city['today']['dead'],
                           'heal':city['today']['heal']
                          },ignore_index=True)
    return df_area

In [4]:
def chinaSummary(data):
    data['chinaTotal']['AllOrToday'] = 'All'
    data['chinaAdd']['AllOrToday'] = 'Today'
    china_summary = pd.DataFrame.from_records([data['chinaTotal'],data['chinaAdd']])
    return china_summary

In [5]:
def getHistory(data):
    china_history_sum = pd.DataFrame.from_records(data['chinaDayList'])
    china_history_add = pd.DataFrame.from_records(data['chinaDayAddList'])
    return china_history_sum, china_history_add

In [7]:
def saveAll(data):
    last_time = data['lastUpdateTime'].replace(':','-')
    print(last_time)
    area_df = getAreaDataFrame(data)
    area_df.to_csv('data/infections/area_{}.csv'.format(last_time),index=False)
    summary = chinaSummary(data)
    summary.to_csv('data/infections/summary_{}.csv'.format(last_time),index=False)
    history_sum,history_add = getHistory(data)
    history_sum.to_csv('data/infections/history_sum_{}.csv'.format(last_time),index=False)
    history_add.to_csv('data/infections/history_add_{}.csv'.format(last_time),index=False)

In [8]:
# Run this function daily to save data
def updateNow():
    data = downloadDailyData()
    saveAll(data)
    return data

In [15]:
data = updateNow()

2020-02-07 00-42-37


In [16]:
df_area = getAreaDataFrame(data)

# 数据查询

In [17]:
#今日概览
chinaSummary(data)

Unnamed: 0,confirm,suspect,dead,heal,AllOrToday
0,28140,24702,564,1348,All
1,80,0,0,195,Today


In [18]:
#全球数据： 
df_area[(df_area['Province']=='') & (df_area['AllOrToday']=='All')]

Unnamed: 0,Country,Province,City,AllOrToday,confirm,suspect,dead,heal
0,中国,,,All,28140,24702,564,1348
906,日本,,,All,45,0,0,1
908,新加坡,,,All,30,0,0,1
910,泰国,,,All,25,0,0,8
912,韩国,,,All,23,0,0,1
914,澳大利亚,,,All,15,0,0,5
916,马来西亚,,,All,14,0,0,1
918,越南,,,All,12,0,0,3
920,德国,,,All,12,0,0,0
922,美国,,,All,12,0,0,1


In [19]:
#全国数据： 
df_area[(df_area['Country']=='中国')& (df_area['City']=='') & (df_area['AllOrToday']=='All')]

Unnamed: 0,Country,Province,City,AllOrToday,confirm,suspect,dead,heal
0,中国,,,All,28140,24702,564,1348
2,中国,湖北,,All,19665,0,549,680
40,中国,广东,,All,970,0,0,59
84,中国,浙江,,All,954,0,0,95
108,中国,河南,,All,851,0,2,63
148,中国,湖南,,All,711,0,0,69
178,中国,江西,,All,600,0,0,37
202,中国,安徽,,All,591,0,0,45
236,中国,重庆,,All,400,0,2,24
316,中国,江苏,,All,373,0,0,36


In [20]:
#各省数据： 
df_area[(df_area['Province']=='北京')& (df_area['AllOrToday']=='All')]

Unnamed: 0,Country,Province,City,AllOrToday,confirm,suspect,dead,heal
420,中国,北京,,All,274,0,1,38
422,中国,北京,朝阳,All,48,0,0,0
424,中国,北京,海淀,All,47,0,0,0
426,中国,北京,西城,All,34,0,0,0
428,中国,北京,大兴,All,32,0,0,2
430,中国,北京,外地来京,All,21,0,0,2
432,中国,北京,丰台,All,20,0,0,0
434,中国,北京,昌平,All,15,0,0,0
436,中国,北京,通州,All,13,0,0,0
438,中国,北京,石景山,All,11,0,0,0


In [21]:
#新增数据： All --> Today
df_area[(df_area['Province']=='北京') & (df_area['AllOrToday']=='Today')]

Unnamed: 0,Country,Province,City,AllOrToday,confirm,suspect,dead,heal
421,中国,北京,,Today,0,0,0,0
423,中国,北京,朝阳,Today,0,0,0,0
425,中国,北京,海淀,Today,0,0,0,0
427,中国,北京,西城,Today,0,0,0,0
429,中国,北京,大兴,Today,0,0,0,0
431,中国,北京,外地来京,Today,0,0,0,0
433,中国,北京,丰台,Today,0,0,0,0
435,中国,北京,昌平,Today,0,0,0,0
437,中国,北京,通州,Today,0,0,0,0
439,中国,北京,石景山,Today,0,0,0,0


In [22]:
#累计确诊历史
getHistory(data)[0]

Unnamed: 0,confirm,suspect,dead,heal,date
0,41,0,1,0,1.13
1,41,0,1,0,1.14
2,41,0,2,5,1.15
3,45,0,2,8,1.16
4,62,0,2,12,1.17
5,198,0,3,17,1.18
6,275,0,4,18,1.19
7,291,54,6,25,1.2
8,440,37,9,25,1.21
9,571,393,17,25,1.22


In [23]:
#新增确诊历史
getHistory(data)[1]

Unnamed: 0,confirm,suspect,dead,heal,date
0,77,27,0,0,1.2
1,149,53,3,0,1.21
2,131,257,8,0,1.22
3,259,680,8,6,1.23
4,444,1118,16,3,1.24
5,688,1309,15,11,1.25
6,769,3806,24,2,1.26
7,1771,2077,26,9,1.27
8,1459,3248,26,43,1.28
9,1737,4148,38,21,1.29


# 患者小区查询
一个来源：腾讯 https://ncov.html5.qq.com/community?channelid=17


In [85]:
def getCommunityDataFrame():
    getCommunity_url = 'https://ncov.html5.qq.com/api/getCommunity?'
    data = requests.get(url=getCommunity_url).json()['community']
    all_records=[]
    for province in data:
        p_data = data[province]
        for city in p_data:
            c_data = p_data[city]
            for district in c_data:        
                d_data = c_data[district]
                for record in d_data:
                    all_records.append(record)                
    df_community = pd.DataFrame.from_records(all_records) 
    return df_community
def saveCommunityData(df_community):
    localtime = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-EST")
    df_community.to_csv('data/infections/community_{}.csv'.format(localtime),index=False)
def updateCommunity():
    df = getCommunityDataFrame()
    saveCommunityData(df)

In [86]:
updateCommunity()

# 患者同行数据
三个来源：搜狗/人民日报/腾讯

搜狗
https://sa.sogou.com/new-weball/page/sgs/epidemic/yyxw?type_page=yangshi&scene=2&clicktime=1580381964&enterid=1580381964&from=timeline&isappinstalled=0

In [29]:
sogou_trips = pd.read_json('https://hhyfeed.sogoucdn.com/js/common/epidemic-search/main.js')
sogou_trips.to_csv('data/trips/sogou_trips.csv')

人民日报 http://2019ncov.nosugartech.com/search.html?t_date=&t_no=&t_area=

In [30]:
url_trip = 'http://2019ncov.nosugartech.com/data.json?'
json_res = requests.get(url=url_trip).json()['data']
nosugar_trips = pd.DataFrame.from_records(json_res)
nosugar_trips.to_csv('data/trips/nosugar_trips.csv')

腾讯 https://rl.inews.qq.com/h5/trip?from=newsapp

In [87]:
tx_trip_url = 'https://rl.inews.qq.com/taf/travelFront'
json_res = requests.get(url=tx_trip_url).json()['data']['list']
tx_trips = pd.DataFrame.from_records(json_res)
tx_trips.to_csv('data/trips/tx_trips.csv')

In [27]:
tx_trips

Unnamed: 0,date,start,end,type,no,no_sub,memo,pos_start,pos_end,source,who
0,2020-02-03,,,1,MF810,,,,福州,https://m.weibo.cn/status/4469031496679960?ope...,
1,2020-02-03,,,2,G1408,13号车厢,,广州南,南昌西,https://m.weibo.cn/status/4468528998833555?ope...,湖南疾控
2,2020-02-03,,,4,湘A02906D,,,桂花村,,https://m.weibo.cn/1740557654/4468675682159071,金鹰955电台
3,2020-02-03,,,5,渭南6路,,1人确诊,火车站,第二医院,https://weibo.com/6072764925/IsOpetYEL?type=co...,渭南同城
4,2020-02-02,,,6,黑AE888Z,网约车,,南极国际小区,哈尔滨传染病院,https://mp.weixin.qq.com/s/RPfahGQ_Gg483fv3Bah4BQ,人民日报
...,...,...,...,...,...,...,...,...,...,...,...
1844,2020-01-04,,,2,K1282,6车厢,不详,深圳东,光山,http://www.huangchuan.gov.cn/xinwen/tzgg/2020B...,潢川人民政府
1845,2020-01-03,,,2,K628,8车厢,1人确诊,宜昌东,西安,https://baijiahao.baidu.com/s?id=1657495493717...,兰州新闻网
1846,2019-12-27,,,2,Z14,10号车厢,,广州东,沈阳北,http://www.zgcy.gov.cn/ZGCY/zwgk/20200131/0040...,朝阳市新型肺炎疫情防控指挥部办公室
1847,2019-12-27,,,2,Z11/Z14,10号车厢,,长沙,沈阳北,http://www.zgcy.gov.cn/ZGCY/zwgk/20200131/0040...,朝阳市新型冠状病毒感染的肺炎疫情防控指挥部办公室
