In [1]:
import time, json, requests
import pandas as pd

# 每日疫情数据

In [175]:
#download data
def downloadDailyData():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=&_=%d'%int(time.time()*1000)
    data = json.loads(requests.get(url=url).json()['data'])
    return data

In [163]:
# decode area data to data frame
def getAreaDataFrame(response_data):
    area_tree = response_data['areaTree']
    df_area = pd.DataFrame(columns=['Country','Province','City','AllOrToday','confirm','suspect','dead','heal'])
    for country in area_tree:
        df_area = df_area.append({'Country':country['name'],
                   'Province':'',
                   'City':'',               
                   'AllOrToday':'All',
                   'confirm':country['total']['confirm'],
                   'suspect':country['total']['suspect'],
                   'dead':country['total']['dead'],
                   'heal':country['total']['heal']
                  },ignore_index=True)
        df_area = df_area.append({'Country':country['name'],
                   'Province':'',
                   'City':'',               
                   'AllOrToday':'Today',
                   'confirm':country['today']['confirm'],
                   'suspect':country['today']['suspect'],
                   'dead':country['today']['dead'],
                   'heal':country['today']['heal']
                  },ignore_index=True)
        if 'children' in country:
            country_tree = country['children']
            for province in country_tree:
                #print('pv:{}'.format(province['name']))
                df_area = df_area.append({'Country':country['name'],
                   'Province':province['name'],
                   'City':'',               
                   'AllOrToday':'All',
                   'confirm':province['total']['confirm'],
                   'suspect':province['total']['suspect'],
                   'dead':province['total']['dead'],
                   'heal':province['total']['heal']
                  },ignore_index=True)
                df_area = df_area.append({'Country':country['name'],
                   'Province':province['name'],
                   'City':'',               
                   'AllOrToday':'Today',
                   'confirm':province['today']['confirm'],
                   'suspect':province['today']['suspect'],
                   'dead':province['today']['dead'],
                   'heal':province['today']['heal']
                  },ignore_index=True)
                if 'children' in province:
                    province_tree = province['children']
                    for city in province_tree:
                        df_area = df_area.append({'Country':country['name'],
                           'Province':province['name'],
                           'City':city['name'],               
                           'AllOrToday':'All',
                           'confirm':city['total']['confirm'],
                           'suspect':city['total']['suspect'],
                           'dead':city['total']['dead'],
                           'heal':city['total']['heal']
                          },ignore_index=True)
                        df_area = df_area.append({'Country':country['name'],
                           'Province':province['name'],
                           'City':city['name'],               
                           'AllOrToday':'Today',
                           'confirm':city['today']['confirm'],
                           'suspect':city['today']['suspect'],
                           'dead':city['today']['dead'],
                           'heal':city['today']['heal']
                          },ignore_index=True)
    return df_area

In [164]:
def chinaSummary(data):
    data['chinaTotal']['AllOrToday'] = 'All'
    data['chinaAdd']['AllOrToday'] = 'Today'
    china_summary = pd.DataFrame.from_records([data['chinaTotal'],data['chinaAdd']])
    return china_summary

In [165]:
def getHistory(data):
    china_history_sum = pd.DataFrame.from_records(data['chinaDayList'])
    china_history_add = pd.DataFrame.from_records(data['chinaDayAddList'])
    return china_history_sum, china_history_add

In [166]:
data['lastUpdateTime']

'2020-02-03 13:54:57'

In [172]:
def saveAll(data):
    last_time = data['lastUpdateTime'].replace(':','-')
    area_df = getAreaDataFrame(data)
    area_df.to_csv('data/infections/area_{}.csv'.format(last_time),index=False)
    summary = chinaSummary(data)
    summary.to_csv('data/infections/summary_{}.csv'.format(last_time),index=False)
    history_sum,history_add = getHistory(data)
    history_sum.to_csv('data/infections/history_sum_{}.csv'.format(last_time),index=False)
    history_add.to_csv('data/infections/history_add_{}.csv'.format(last_time),index=False)


In [176]:
# Run this function daily to save data
def updateNow():
    data = downloadDailyData()
    saveAll(data)

In [174]:
updateNow()

# 数据查询

In [122]:
df_area[(df_area['Province']=='北京') & (df_area['AllOrToday']=='All')]

Unnamed: 0,Country,Province,City,AllOrToday,confirm,suspect,dead,heal
462,中国,北京,,All,191,0,1,9
464,中国,北京,海淀,All,41,0,0,0
466,中国,北京,朝阳,All,35,0,0,0
468,中国,北京,西城,All,22,0,0,0
470,中国,北京,大兴,All,22,0,0,2
472,中国,北京,丰台,All,16,0,0,0
474,中国,北京,通州,All,13,0,0,0
476,中国,北京,昌平,All,12,0,0,0
478,中国,北京,外地来京,All,11,0,0,2
480,中国,北京,顺义,All,6,0,0,0


# 患者同行数据
两个来源：搜狗/人民日报

搜狗
https://sa.sogou.com/new-weball/page/sgs/epidemic/yyxw?type_page=yangshi&scene=2&clicktime=1580381964&enterid=1580381964&from=timeline&isappinstalled=0

In [170]:
sogou_trips = pd.read_json('https://hhyfeed.sogoucdn.com/js/common/epidemic-search/main.js')
sogou_trips.to_csv('data/trips/sogou_trips.csv')

人民日报 http://2019ncov.nosugartech.com/search.html?t_date=&t_no=&t_area=

In [171]:
url_trip = 'http://2019ncov.nosugartech.com/data.json?'
json_res = requests.get(url=url_trip).json()['data']
nosugar_trips = pd.DataFrame.from_records(json_res)
nosugar_trips.to_csv('data/trips/nosugar_trips.csv')