In [1]:
import pandas as pd
import time
import json
import pycountry as pc
import requests

## 下载文件

In [2]:
r = requests.get("https://coronavirus-tracker-api.herokuapp.com/all") 
with open("raw/all.json", "wb") as f:
    f.write(r.content)

## 读取文件

In [3]:
with open('raw/all.json','r',encoding='utf8')as fp:
    json_data = json.load(fp)
    print(json_data.keys())
    print(type(json_data))

dict_keys(['confirmed', 'deaths', 'recovered', 'latest'])
<class 'dict'>


In [4]:
json_data['confirmed'].keys()

dict_keys(['locations', 'latest', 'last_updated', 'source'])

In [5]:
confirmed = json_data['confirmed']['locations']
deaths = json_data['deaths']['locations']
recovered = json_data['recovered']['locations']

In [6]:
print(len(confirmed), len(deaths), len(recovered))

266 266 253


## 中国各省

In [7]:
china_indexes = []
for (index, item) in enumerate(json_data['confirmed']['locations']):
    if item['country'] == 'China':
        china_indexes.append(index)
print(china_indexes)

[49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81]


In [8]:
provinces = []
for i in china_indexes:
    p_dict = {}
    p_dict['country'] = confirmed[i]['country']
    p_dict['country_code'] = pc.countries.get(
        alpha_2=confirmed[i]['country_code']).alpha_3
    p_dict['province'] = confirmed[i]['province']
    p_dict['coordinates'] = confirmed[i]['coordinates']
    p_dict['latest'] = {
        'confirmed': confirmed[i]['latest'],
        'deaths': deaths[i]['latest'],
        'recovered': recovered[i]['latest']
    }

    timeline = []
    for date in confirmed[i]['history'].keys():
        date_ = time.strftime("%Y-%m-%d", time.strptime(date, "%m/%d/%y"))
        #     print({'date': date_, 'confirmed': cnt})
        timeline.append({
            'date': date_,
            'confirmed': confirmed[i]['history'][date],
            'deaths': deaths[i]['history'][date],
            'recovered': recovered[i]['history'][date]
        })
    p_dict['timeline'] = timeline

    provinces.append(p_dict)

In [9]:
china_result = pd.DataFrame(provinces)

In [10]:
china_result.to_json("china_timeline.json", orient='records')

## 世界各国

In [11]:
DF_confirmed_groups = pd.DataFrame(confirmed).groupby('country')
DF_deaths_groups = pd.DataFrame(deaths).groupby('country')
DF_recovered_groups = pd.DataFrame(recovered).groupby('country')

In [12]:
# pd.DataFrame(confirmed).country.drop_duplicates().tolist()

In [13]:
print(len(DF_confirmed_groups), len(DF_deaths_groups), len(DF_recovered_groups))

188 188 188


In [14]:
cntry_names = list(DF_confirmed_groups.groups.keys())

In [15]:
# DF_confirmed_groups.get_group('China')

In [16]:
countries = []
for cntry in cntry_names:
    df = DF_confirmed_groups.get_group(cntry)
    df_deaths = DF_deaths_groups.get_group(cntry)
    df_recovered = DF_recovered_groups.get_group(cntry)

    dates = list(df.history.iloc[0].keys())
    timeline = []

    for d in dates:
        date_ = time.strftime("%Y-%m-%d", time.strptime(d, "%m/%d/%y"))

        # confirmed each day
        sum_confirmed = 0
        for h in df.history:
            sum_confirmed += h[d]

        # deaths each day
        sum_deaths = 0
        for h in df_deaths.history:
            sum_deaths += h[d]

        # recovered each day
        sum_recovered = 0
        for h in df_recovered.history:
            sum_recovered += h[d]

        timeline.append({
            'date': date_,
            'confirmed': sum_confirmed,
            'deaths': sum_deaths,
            'recovered': sum_recovered
        })
    
    # The cruise ship Diamond Princess has no country code, marked as 'XXX'
    try:
        country_code_3 = pc.countries.get(alpha_2=df.iloc[0].country_code).alpha_3
    except:
        country_code_3 = 'XXX'
        
    countries.append({
        'country':
        cntry,
        'country_code':
        country_code_3,
        'latest': {
            'confirmed': sum_confirmed,
            'deaths': sum_deaths,
            'recovered': sum_recovered
        },
        'timeline':
        timeline
    })

In [17]:
countries_result = pd.DataFrame(countries)

In [18]:
countries_result.to_json("world_timeline.json", orient='records')

## We are the world

In [19]:
countries_result.head()

Unnamed: 0,country,country_code,latest,timeline
0,Afghanistan,AFG,"{'confirmed': 8145, 'deaths': 187, 'recovered'...","[{'date': '2020-01-22', 'confirmed': 0, 'death..."
1,Albania,ALB,"{'confirmed': 964, 'deaths': 31, 'recovered': ...","[{'date': '2020-01-22', 'confirmed': 0, 'death..."
2,Algeria,DZA,"{'confirmed': 7542, 'deaths': 568, 'recovered'...","[{'date': '2020-01-22', 'confirmed': 0, 'death..."
3,Andorra,AND,"{'confirmed': 762, 'deaths': 51, 'recovered': ...","[{'date': '2020-01-22', 'confirmed': 0, 'death..."
4,Angola,AGO,"{'confirmed': 52, 'deaths': 3, 'recovered': 17}","[{'date': '2020-01-22', 'confirmed': 0, 'death..."


In [20]:
global_confirmed, global_deaths, global_recovered = 0, 0, 0
for entry in countries_result.latest:
    global_confirmed += entry['confirmed']
    global_deaths += entry['deaths']
    global_recovered += entry['recovered']

global_latest = {
    'confirmed': global_confirmed,
    'deaths': global_deaths,
    'recovered': global_recovered
}
print(global_latest)

{'confirmed': 4996472, 'deaths': 328115, 'recovered': 1897466}


In [21]:
global_timeline = countries_result.iloc[0].timeline
print("Data in {} days.".format(len(global_timeline)))
for i in range(len(global_timeline)):
    global_daily_confirmed = 0
    global_daily_deaths = 0
    global_daily_recovered = 0
    for tl in countries_result.timeline:
        global_daily_confirmed += tl[i]['confirmed']
        global_daily_deaths += tl[i]['deaths']
        global_daily_recovered += tl[i]['recovered']
    global_timeline[i]['confirmed'] = global_daily_confirmed
    global_timeline[i]['deaths'] = global_daily_deaths
    global_timeline[i]['recovered'] = global_daily_recovered
print(global_timeline)

Data in 120 days.
[{'date': '2020-01-22', 'confirmed': 555, 'deaths': 17, 'recovered': 28}, {'date': '2020-01-23', 'confirmed': 654, 'deaths': 18, 'recovered': 30}, {'date': '2020-01-24', 'confirmed': 941, 'deaths': 26, 'recovered': 36}, {'date': '2020-01-25', 'confirmed': 1434, 'deaths': 42, 'recovered': 39}, {'date': '2020-01-26', 'confirmed': 2118, 'deaths': 56, 'recovered': 52}, {'date': '2020-01-27', 'confirmed': 2927, 'deaths': 82, 'recovered': 61}, {'date': '2020-01-28', 'confirmed': 5578, 'deaths': 131, 'recovered': 107}, {'date': '2020-01-29', 'confirmed': 6166, 'deaths': 133, 'recovered': 126}, {'date': '2020-01-30', 'confirmed': 8234, 'deaths': 171, 'recovered': 143}, {'date': '2020-01-31', 'confirmed': 9927, 'deaths': 213, 'recovered': 222}, {'date': '2020-02-01', 'confirmed': 12038, 'deaths': 259, 'recovered': 284}, {'date': '2020-02-02', 'confirmed': 16787, 'deaths': 362, 'recovered': 472}, {'date': '2020-02-03', 'confirmed': 19881, 'deaths': 426, 'recovered': 623}, {'dat

In [22]:
global_data = {
    'country': 'global',
    'latest': global_latest,
    'timeline': global_timeline
}
with open('global_timeline.json', 'w') as f:
    json.dump(global_data, f)