### 要搜集的数据
1. 时间上最近的文件是哪个
- 最近一个病例
- 总**病例数**和总死亡数 (按/不按国家分组)
- 总**活跃病例数**+总康复数 (按国家分组)
*计算公式可以参考https://towardsdatascience.com/calculating-live-covid-19-cases-from-deaths-e70e5df45f60*
- 除了第一天外，每天和每周的总病例数和总死亡数
- 最后一天的总病例数+总死亡数 (按国家分组)

In [1]:
import os
import csv
from datetime import datetime

In [2]:
def getfiletable(file):
    with open('./covid-data/%s' % file) as csvfile:
        reader = csv.reader(csvfile)
        table = [ row for row in reader ]
    return table

In [3]:
dirs = os.listdir('./covid-data')
files = []
for file in dirs:
    filetype = os.path.splitext(file)[1]
    if filetype == ".csv":
        files.append(file)
files.sort(reverse=True)
print(files)

['09-14-2021.csv', '09-13-2021.csv', '09-12-2021.csv', '09-11-2021.csv', '09-10-2021.csv', '09-09-2021.csv', '09-08-2021.csv', '09-07-2021.csv', '09-06-2021.csv', '09-05-2021.csv', '09-04-2021.csv', '09-03-2021.csv', '09-02-2021.csv', '09-01-2021.csv', '08-31-2021.csv', '08-30-2021.csv', '08-29-2021.csv', '08-28-2021.csv', '08-27-2021.csv', '08-26-2021.csv', '08-25-2021.csv', '08-24-2021.csv', '08-23-2021.csv', '08-22-2021.csv', '08-21-2021.csv', '08-20-2021.csv', '08-19-2021.csv', '08-18-2021.csv', '08-17-2021.csv', '08-16-2021.csv']


In [4]:
table = getfiletable(files[0])[1:]
dt1 = datetime.strptime(table[1][4], '%Y-%m-%d %H:%M:%S')
dt2 = datetime.strptime(table[79][4], '%Y-%m-%d %H:%M:%S')
# 星期一为 0，星期天为 6
print(dt1, datetime.weekday(dt1))
print(dt2, datetime.weekday(dt2))
# 两个datetime日期相减得到的是一个时间间隔对象(imedelta)
print(datetime.now() - dt1)
# 时间间隔对象(imedelta)可以比较大小
print(datetime.now() - dt1 == datetime.now() - dt2)

2021-09-15 03:22:37 2
2020-12-21 13:27:30 0
39 days, 10:49:50.852193
False


In [5]:
# 定个规则：{国家：[人口, 病例数, 死亡数]}
incident_rate_map = dict()
# 计算人口公式：number of cases * 100_000 / incident rate
for row in table:
    # You can ignore those rows where Incident_Rate is empty.
    if row[-2] == '' or float(row[-2]) == 0:
        continue
    # 获取国家： table[X][-3].split(',')[-1].strip()
    country = row[-3].split(',')[-1].strip()
    cases, deaths, rate = int(row[7]), int(row[8]), float(row[-2])
    data = incident_rate_map.setdefault(country, [])
    if len(data) == 0:
        data.append(cases * 100_000 / rate)
        data.append(cases)
        data.append(deaths)
    else:
        data[0] += cases * 100_000 / rate
        data[1] += cases
        data[2] += deaths
print(incident_rate_map)

{'Afghanistan': [38928341.0, 154180, 7171], 'Albania': [2877800.0, 158431, 2553], 'Algeria': [43851043.0, 200528, 5614], 'Andorra': [77265.0, 15099, 130], 'Angola': [32866268.000000004, 51047, 1358], 'Antigua and Barbuda': [97928.0, 2304, 48], 'Argentina': [45195776.99999999, 5229848, 113816], 'Armenia': [2963234.0, 249803, 5034], 'Australia': [25459700.0, 78544, 1116], 'Austria': [9006400.0, 713269, 10849], 'Azerbaijan': [10139175.0, 463326, 6167], 'Bahamas': [393248.0, 19601, 463], 'Bahrain': [1701583.0000000002, 273916, 1388], 'Bangladesh': [164689383.0, 1534440, 27007], 'Barbados': [287371.00000000006, 6053, 52], 'Belarus': [9449321.0, 506591, 3941], 'Belgium': [11431406.0, 1188296, 0], 'Belize': [397621.0, 17967, 383], 'Benin': [12123198.000000002, 21450, 146], 'Bhutan': [771611.9999999999, 2596, 3], 'Bolivia': [11673029.0, 495612, 18603], 'Bosnia and Herzegovina': [3280815.0, 222927, 10099], 'Botswana': [2351624.9999999995, 165644, 2337], 'Brazil': [210147125.0, 21019830, 587797]