In [1]:
import pandas as pd
from os import path
from datetime import date, timedelta

In [2]:
duration_start = date.today() - timedelta(days=7)

districts = pd.read_table(path.join('..', 'raw_data', 'shanghai_districts_detailed.tsv')).astype({'计数':'int64'})

districts = districts[districts['日期'] >= duration_start.strftime("%Y-%m-%d")]
districts = districts[districts['来源口径'] != "转诊"]

In [3]:
districts

Unnamed: 0,日期,区县,诊断,来源口径,计数
3360,2022-05-07,浦东,确诊,闭环,22
3361,2022-05-07,浦东,确诊,社会面,0
3363,2022-05-07,浦东,无症状,闭环,641
3364,2022-05-07,浦东,无症状,社会面,6
3365,2022-05-07,黄浦,确诊,闭环,7
...,...,...,...,...,...
3914,2022-05-13,奉贤,无症状,社会面,0
3915,2022-05-13,崇明,确诊,闭环,0
3916,2022-05-13,崇明,确诊,社会面,0
3918,2022-05-13,崇明,无症状,闭环,2


In [4]:
districts = districts.groupby(['区县','日期','来源口径'])['计数'].agg(计数='sum')

In [5]:
districts

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,计数
区县,日期,来源口径,Unnamed: 3_level_1
嘉定,2022-05-07,社会面,0
嘉定,2022-05-07,闭环,108
嘉定,2022-05-08,社会面,0
嘉定,2022-05-08,闭环,110
嘉定,2022-05-09,社会面,1
...,...,...,...
黄浦,2022-05-11,闭环,228
黄浦,2022-05-12,社会面,0
黄浦,2022-05-12,闭环,274
黄浦,2022-05-13,社会面,0


In [6]:
daily_subtotal = districts.rename(columns={'计数':'单日计'}).sum(level=[0,1]).assign(来源口径='单日计').set_index('来源口径', append=True)

In [7]:
districts

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,计数
区县,日期,来源口径,Unnamed: 3_level_1
嘉定,2022-05-07,社会面,0
嘉定,2022-05-07,闭环,108
嘉定,2022-05-08,社会面,0
嘉定,2022-05-08,闭环,110
嘉定,2022-05-09,社会面,1
...,...,...,...
黄浦,2022-05-11,闭环,228
黄浦,2022-05-12,社会面,0
黄浦,2022-05-12,闭环,274
黄浦,2022-05-13,社会面,0


In [8]:
districts = pd.concat([districts,daily_subtotal], axis=1, join='outer',levels=[0,1,2])

In [9]:
# final_districts[]
districts = districts.fillna(0)
districts = districts.astype('int32')
districts['单项'] = districts['计数'] + districts['单日计']
districts = districts.drop(columns=['计数', '单日计'])

In [10]:
yesterday = date.today() - timedelta(days=1)

export_districts = districts.unstack().unstack()
export_districts = export_districts.sort_values([('单项','单日计', yesterday.strftime("%Y-%m-%d"))], ascending=True)

In [11]:
export_districts

Unnamed: 0_level_0,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项,单项
来源口径,单日计,单日计,单日计,单日计,单日计,单日计,单日计,社会面,社会面,社会面,社会面,社会面,社会面,社会面,闭环,闭环,闭环,闭环,闭环,闭环,闭环
日期,2022-05-07,2022-05-08,2022-05-09,2022-05-10,2022-05-11,2022-05-12,2022-05-13,2022-05-07,2022-05-08,2022-05-09,...,2022-05-11,2022-05-12,2022-05-13,2022-05-07,2022-05-08,2022-05-09,2022-05-10,2022-05-11,2022-05-12,2022-05-13
区县,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
奉贤,11,2,5,0,5,1,0,2,0,0,...,1,0,0,9,2,5,0,4,1,0
崇明,51,13,6,8,4,6,3,0,0,0,...,0,0,1,51,13,6,8,4,6,2
金山,0,2,1,0,3,1,7,0,0,0,...,0,0,0,0,2,1,0,3,1,7
松江,29,30,20,15,13,16,9,0,5,0,...,0,0,0,29,25,20,15,13,16,9
青浦,32,48,54,16,35,19,15,0,0,1,...,0,0,0,32,48,53,16,35,19,15
普陀,54,47,49,25,27,28,25,0,0,0,...,0,0,0,54,47,49,25,27,28,25
长宁,62,72,52,14,39,59,37,0,0,0,...,0,0,0,62,72,52,14,39,59,37
嘉定,108,110,132,15,73,67,40,0,0,1,...,0,0,0,108,110,131,15,73,67,40
徐汇,295,291,119,62,65,91,83,1,1,0,...,1,0,0,294,290,119,62,64,91,83
虹口,262,279,250,100,97,99,88,1,0,0,...,0,0,0,261,279,250,100,97,99,88


In [12]:
basepath= path.join('..', '..', 'COVID_JS_INTERACTIVE', 'covid_5cities_jsdist', 'dailyCovid')

with open(path.join(basepath, 'districtwise_within_quarantine.csv'), 'w+', encoding='utf-8', newline='') as chart_dataset:
    export_districts['单项']['闭环'].to_csv(chart_dataset)

with open(path.join(basepath, 'districtwise_outside_quarantine.csv'), 'w+', encoding='utf-8', newline='') as chart_dataset:
    export_districts['单项']['社会面'].to_csv(chart_dataset)