# Get and calculate the data
Read the file ["Readme.ipynb"](Readme.ipynb) for more information.

## Modules
Needed to use non-Python functionalities already programmed by someone else.

In [1]:
# Used to convert the API data from json-format into a Python list
from json2xml.utils import readfromurl
import json    # to save the data in "json"-format in a file
# Used to check if there is a local file with the data or if a new API pull is inevitable
import os.path
import datetime    # to convert Unix time to UTC

In [2]:
# Used to cast the polygons into np.arrays and afterwards transpose them easily with the .T function
import numpy as np
import matplotlib.pyplot as plt    # to plot the counties

## Control
Set variables to "True" to trigger the action described by the comment and the variable's name.<br/><br/>
If multiple of the three variables "covid19_use_api", "covid19_use_api_backup" and "covid19_use_polished_data" are set to "True", the last one overwrites all data collected by the others. It is best practice to only set one variable to "True".<br/><br/>
If one data source seems to provide faulty data or the necessary files do not exist, try out the other options.

In [3]:
# The program uses polished geographical data about the counties (True)
# or calls get_geographical_data_of_german_counties.ipynb to produce that data (False)
counties_geography_use_polished_data = True

covid19_use_api = False    # pulls current COVID-19 case numbers from the API
covid19_use_api_backup = False    # polishes backup of old API pull
covid19_use_polished_data = True    # takes old, already polished data

### Check the Controls
Check if the necessary files to run the choices made by the controls above exist. Otherwise the data must be taken from somewhere else.<br/>
Pulling from the API takes a lot of time and ressources. If the user therefore chooses in the controls above not to pull from the API, this choice should only be changed if it is unavoidable.
<br/><br/>
There are three ways how data could be missing:
- Neither polished nor unpolished data about the German COVID-19 cases are saved on the machine. In this case a new pull from the API is inevitable.
- No polished version of the data exists on the machine, but a backup of an old API pull does. Therefore the program initiates a pull from the API or a "pull" from the backup file with the unpolished data.
- No backup of an old API pull exists, but a polished version of the data does. If not, the program initiates a pull from the API or uses the polished data. The file with the polished data exists due to the first condition.

In each respective case the global control variables are changed accordingly.

In [4]:
if (not(os.path.isfile("polished_data/german_covid19.txt")) and 
    not(os.path.isfile("unpolished_data/covid19/dates.txt"))):    # no files
    covid19_use_polished_data = False
    covid19_use_api_backup = False
    covid19_use_api = True
elif not(os.path.isfile("polished_data/german_covid19.txt")):    # no polished version
    # and os.path.isfile("unpolished_data/covid19/dates.txt") due to first condition
    covid19_use_polished_data = False
    # ensuring that one of the other two data sources is used
    covid19_use_api_backup = not(covid19_use_api)
elif not(os.path.isfile("unpolished_data/covid19/dates.txt")):    # no backup
    # and os.path.isfile("polished_data/german_covid19.txt") due to first condition
    covid19_use_api_backup = False
    # ensuring that one of the others is used
    covid19_use_polished_data = not(covid19_use_api)

The "number_of_counties" is also set here: It determines how many counties must be present in the data. If there are fewer or more, the current data source is declared a fail and (if possible) another one is used.

In [5]:
number_of_counties = 412

## Get the Geographical Data of Every German County
If "counties_geography_use_polished_data" is set to "True" and the required file exists, the polished data from that file is used. <br/>
If "counties_geography_use_polished_data" is set to "False" by the user or if the required file does not exist, the file "get_geographical_data_of_german_counties.ipynb" is called to provide new polished data.<br/>
For more information on where the data comes from and how it is polished check out the file "get_geographical_data_of_german_counties.ipynb". 

In [6]:
if not(os.path.isfile("polished_data/german_counties_geography.txt")):
    counties_geography_use_polished_data = False

In [7]:
if counties_geography_use_polished_data:
    with open("polished_data/german_counties_geography.txt", "r") as file:
        counties_geography = json.loads(file.read())
    print("Polished county data from file is ready to go!")
else:
    no_outputs_from_file_get_shapes_of_german_counties = True
    %run get_geographical_data_of_german_counties.ipynb

Polished county data from file is ready to go!


## Get the COVID-19 Cases of Every German County
Saves the COVID-19 cases of every German county since the start of the pandemic in the dictionary "covid19" (reachable by the countys AdmUnitID) and the corresponding dates in the dictionary "non_county_specific_data".

### Helper Functions
**url_county(AdmUnitID, True_for_dates_False_for_covid19_cases = False)**: returns url<br/>
Used to get the url for the COVID-19 cases of the German county determined by the AdmUnitID.<br/>
*AdmUnitId*<br/>
-> identifier of the county whichs covid19 cases should be requested<br/>
*True_for_dates_False_for_covid19_cases* (default: False)<br/>
-> Determines whether the dates in Unix time format or the actual COVID-19 cases should be requested

In [8]:
def url_county(AdmUnitID, True_for_dates_False_for_covid19_cases = False):
    url = ("https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/" +
           "rki_history_hubv/FeatureServer/0/query?where=AdmUnitId%3D" +
           str(AdmUnitID) + "&outFields=")
    if True_for_dates_False_for_covid19_cases:
        return url + "Datum&orderByFields=Datum&f=pjson"
    return url + "KumFall&orderByFields=Datum&f=pjson"

**find_alternative_source_of_data_and_activate_it()**: returns void (modifys multiple global variables)<br/>
Gets called when the data from a data source is faulty. Deletes faulty data to prevent use of faulty data. Checks which other data source could be used and modifies the global variables accordingly.

In [9]:
def find_alternative_source_of_data_and_activate_it():
    global covid19_use_api
    global covid19_use_api_backup
    global covid19_use_polished_data
    global copy_of_covid19_for_debugging_purposes
    global covid19
    global non_county_specific_data
    copy_of_non_county_specific_data_for_debugging_purposes = non_county_specific_data.copy()
    copy_of_covid19_for_debugging_purposes = covid19.copy()
    del non_county_specific_data    # to prevent accidental use of faulty data
    del covid19    # to prevent accidental use of faulty data
    # check if a local pull of the API exists otherwise use the polished data
    if os.path.isfile("unpolished_data/covid19/dates.txt"):
        covid19_use_api_backup = True
    elif os.path.isfile("polished_data/german_covid19.txt"):
        covid19_use_polished_data = True
    # neither local backup nor polished data found
    if not(covid19_use_api_backup) and not(covid19_use_polished_data):
        raise Exception("No usable data found!")

### Pull from API
If "covid19_use_api" is set to "True", the program pulls from the ["COVID-19 Datenhub"](https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/6d78eb3b86ad4466a8e264aa2e32a2e4_0). The data of each county must be pulled separatedly because the API only allows for 1,000 datapoints at a time and all counties times the number of days is well over 100,000. The identifiers of the counties originate from the keys of the dictionary "counties_geography".<br/><br/>
First, the received data is checked: If any county has fewer timestamps than the dates stored in "non_county_specific_data['unixtime']", all data gets deleted to prevent the use of faulty data and an alternative data source is chosen.<br/><br/>
If the unpolished data passes this rudimentary test, it is stored as it is in a ".txt-file" with its AdmUnitID as its name in the folder "covid19" inside the folder "unpolished_data". If any of the folders or any of the files do not yet exist, they are created.<br/>
This file can be used in further executions as local backup of the API-pull.
<br/><br/>
At the end of this chapter the polished version of the data is stored in the dictionary "covid19".

In [10]:
if covid19_use_api:
    print("Pulling from API...")
    covid19 = dict()
    non_county_specific_data = dict()
    # check if the needed directory is available - otherwise create it
    if not(os.path.isdir("unpolished_data/covid19")): os.makedirs("unpolished_data/covid19")
    number_of_timestamps = -1
    
    for AdmUnitID in list(counties_geography.keys()):
        # get dates of first county
        if number_of_timestamps == -1:
            raw_dates = readfromurl(url_county(AdmUnitID, True))
            if len(raw_dates['features']) < 200:
                print("The dates of {} sends to little timestamps ({}) - check the url"
                      .format(AdmUnitID, len(raw_dates['features'])))
                find_alternative_source_of_data_and_activate_it()
                covid19_use_api = False
                break
            number_of_timestamps = len(raw_dates['features'])
            non_county_specific_data['unixtime'] = [e['attributes']['Datum'] for e in raw_dates['features']]
            # save raw data
            with open("unpolished_data/covid19/dates.txt", "w") as file:
                file.write(json.dumps(raw_dates))

        # get countys covid19 data
        raw_covid19_data = readfromurl(url_county(AdmUnitID))
        if number_of_timestamps != len(raw_covid19_data['features']):
            print("The provided data from the API does not have the same number of timestamps of " +
                  "{}, it has {}.".format(number_of_timestamps, len(raw_covid19_data['features'])))
            find_alternative_source_of_data_and_activate_it()
            covid19_use_api = False
            break
        with open("unpolished_data/covid19/" + AdmUnitID + ".txt", "w") as file:
            file.write(json.dumps(raw_covid19_data))
        covid19[AdmUnitID] = dict()
        covid19[AdmUnitID]['cases'] = [e['attributes']['KumFall'] for e in raw_covid19_data['features']]
        
    if covid19_use_api:
        covid19_use_polished_data = False
        covid19_use_api_backup = False
        print("Covid19 Data directly from API is ready to go!")

### "Pull" from Local API Backup
If the use of the data from a local backup of the API-pull is requested and possible, the data is read from the files in the folder "covid19" inside the folder "unpolished_data". The name of the files should represent the "AdmUnitID" of the county.<br/>
The data is polished and stored in the dictionary "covid19" during the reading progress.
<br/><br/>
The received data is checked: If any county has fewer timestamps than the dates stored in "non_county_specific_data['unixtime']", all data gets deleted to prevent the use of faulty data and an alternative data source is chosen.

In [11]:
if not(covid19_use_api) and covid19_use_api_backup:
    print("Reading backup of old API pull...")
    covid19 = dict()
    non_county_specific_data = dict()
    list_of_countys = list(counties_geography.keys())
    # get the dates
    with open("unpolished_data/covid19/dates.txt", "r") as file:
        raw_dates = json.loads(file.read())
    non_county_specific_data['unixtime'] = [e['attributes']['Datum'] for e in raw_dates['features']]
    number_of_timestamps = len(non_county_specific_data['unixtime'])

    for root, dirs, files in os.walk('unpolished_data/covid19'):
        # to little dates - something is wrong. Checking here to skip for-loop
        if len(raw_dates['features']) < 200:
            print("There are only {} dates - check your backup or make a new pull from the api."
                  .format(len(raw_dates['features'])))
            find_alternative_source_of_data_and_activate_it()
            covid19_use_api = False
            break
        for filename in files:
            AdmUnitID = filename[:-4]
            if AdmUnitID == 'dates':    # already done
                continue

            list_of_countys.remove(AdmUnitID)
            covid19[AdmUnitID] = dict()
            with open(os.path.join(root, filename), "r") as file:
                covid19[AdmUnitID]['cases'] = [e['attributes']['KumFall'] for e in
                                               json.loads(file.read())['features']]

            if number_of_timestamps != len(covid19[AdmUnitID]['cases']):
                print("The data from file {} does not have {} timestamps, it has {}."
                      .format(filename, number_of_timestamps, len(covid19[AdmUnitID])))
                find_alternative_source_of_data_and_activate_it()
                covid19_use_api_backup = False
                break

    if len(list_of_countys) > 0 and covid19_use_api_backup:
        print("No backup found for {}".format(list_of_countys))
        find_alternative_source_of_data_and_activate_it()
        covid19_use_api_backup = False

    if covid19_use_api_backup:
        covid19_use_polished_data = False
        covid19_use_api = False
        print("Covid19 Data from (maybe old) API-pull-backup is ready to go!")

### Calculate the Seven Days Incidence and Get the Cases, Incidences and Inhabitants of Germany
The calculation of the incidence needs the number of cases seven days prior (set to zero if not defined), the cases of the current day (both from "covid19[AdmUnitID]['cases']") and the number of inhabitants of the county ("counties_geography[AdmUnitID]['population']").
<br/><br/>
To get all new cases in that county within the last seven days the program subtracts the accumulated cases seven days earlier from the accumulated cases of the current day. Afterwards this number of cases is divided by the county's population. In order to scale it to 100,000 inhabitants, the result is multiplied by 100,000.<br/>
This is done for every case number of every county.
<br/><br/>
The highest and lowest seven days incidence and the highest and lowest case number are stored in the dictionary "non_county_specific_data" as a reference.
<br/><br/><br/>
The number of inhabitants of Germany is calculated by adding the number of inhabitants of the counties. The same applies to accumulated number of COVID-19 cases for every day. The seven days incidence is calculated as described above.

In [12]:
if not(covid19_use_polished_data):
    non_county_specific_data['population_germany'] = 0
    for county in counties_geography.values():
        non_county_specific_data['population_germany'] += county['population']

    ncsd = non_county_specific_data
    ncsd['cases_germany'] = len(ncsd['unixtime'])*[0]

    non_county_specific_data['highest_case_number'] = 0
    non_county_specific_data['lowest_case_number'] = 100000000000000
    non_county_specific_data['highest_incidence'] = 0
    non_county_specific_data['lowest_incidence'] = 100000000000000
    for AdmUnitID in covid19.keys():
        covid19[AdmUnitID]['incidences'] = list()
        for timestamp in range(len(covid19[AdmUnitID]['cases'])):
            cases_7_days_prior = 0
            cases_on_day = covid19[AdmUnitID]['cases'][timestamp]
            non_county_specific_data['cases_germany'][timestamp] = (cases_on_day +
            non_county_specific_data['cases_germany'][timestamp])

            if timestamp >= 7:
                cases_7_days_prior = covid19[AdmUnitID]['cases'][timestamp - 7]
            incidence = (((cases_on_day - cases_7_days_prior) * 100000) /
                         counties_geography[AdmUnitID]['population'])
            covid19[AdmUnitID]['incidences'].append(incidence)
            if non_county_specific_data['highest_case_number'] < cases_on_day:
                non_county_specific_data['highest_case_number'] = cases_on_day
            if non_county_specific_data['lowest_case_number'] > cases_on_day:
                non_county_specific_data['lowest_case_number'] = cases_on_day
            if non_county_specific_data['highest_incidence'] < incidence:
                non_county_specific_data['highest_incidence'] = incidence
            if non_county_specific_data['lowest_incidence'] > incidence:
                non_county_specific_data['lowest_incidence'] = incidence

    non_county_specific_data['incidences_germany'] = list()
    for timestamp in range(len(non_county_specific_data['cases_germany'])):
        cases_7_days_prior = 0
        cases_on_day = non_county_specific_data['cases_germany'][timestamp]
        if timestamp >= 7:
            cases_7_days_prior = non_county_specific_data['cases_germany'][timestamp - 7]
        incidence = (((cases_on_day - cases_7_days_prior) * 100000) /
                     non_county_specific_data['population_germany'])
        non_county_specific_data['incidences_germany'].append(incidence)

## Get the Names of the German Federal States
This data is hardcoded because it is unlikely to change. Even if the names of the federal states become outdated and do not fit the current official name, the functionality of this project will not be affected.
The names are taken from the ["COVID-19 Datenhub"](https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/rki_admunit_hubv/FeatureServer/0/query?where=AdmUnitId%3C20&resultType=none&outFields=*&f=pjson).

In [13]:
if not(covid19_use_polished_data):
    non_county_specific_data['states'] = {
        "1" : "Schleswig-Holstein",
        "2" : "Hamburg",
        "3" : "Niedersachsen",
        "4" : "Bremen",
        "5" : "Nordrhein-Westfalen",
        "6" : "Hessen",
        "7" : "Rheinland-Pfalz",
        "8" : "Baden-Württemberg",
        "9" : "Bayern",
        "10" : "Saarland",
        "11" : "Berlin",
        "12" : "Brandenburg",
        "13" : "Mecklenburg-Vorpommern",
        "14" : "Sachsen",
        "15" : "Sachsen-Anhalt",
        "16" : "Thüringen"}

## Check and Save the Polished Covid19 Data
Before the COVID-19 cases are saved in the file "german_covid19.txt" inside the folder "polished_data", they are checked once again to ensure that during the polishing nothing gets lost or is changed.
<br/>
It is checked if there are fewer or more counties than defined in the variable number_of_counties and if every list of cases is as long as the dedicated dates.

In [14]:
if not(covid19_use_polished_data):
    covid19_data_seems_to_be_flawless = True    # Assume everything is correct
    if len(covid19) != number_of_counties:
        print("covid19 has not the right amount of counties: {} instead of {}."
                .format(len(covid19), number_of_counties))
        covid19_data_seems_to_be_flawless = False
    for AdmUnitID in covid19.keys():
        if len(covid19[AdmUnitID]['cases']) != len(non_county_specific_data['unixtime']):
            print("The county {} has not the right amount of dates: {} instead of {}."
                    .format(county, len(covid19[AdmUnitID]['cases']),
                            len(non_county_specific_data['unixtime'])))
            covid19_data_seems_to_be_flawless = False

In [15]:
if not(covid19_use_polished_data) and covid19_data_seems_to_be_flawless:
    # check if the needed directory is availlable - otherwise create it
    if not(os.path.isdir("polished_data")): os.makedirs("polished_data")
    with open("polished_data/german_covid19.txt", "w") as file:
        file.write(json.dumps((covid19, non_county_specific_data)))
    print("Saved seemingly flawless covid19 data.")

## Get the Polished Data
If the pull from the API and/or the "pull" from the local backup failed or the user chose to use the polished data, the file "german_covid19.txt" inside the folder "polished_data" is opened and the data is stored in the variables "covid19" and "non_county_specific_data".

In [16]:
if covid19_use_polished_data:
    covid19_use_api_backup = False
    covid19_use_api = False
    with open("polished_data/german_covid19.txt", "r") as file:
        covid19, non_county_specific_data = json.loads(file.read())
    print("Polished covid19 data from file is ready to go!")

Polished covid19 data from file is ready to go!


##  Add UTC Time and Additional Dates
Humans are generally not used to the Unix time; this is why the more accessible kind of time format UTC is chosen. The exact hour in Germany and the time shift are not taken to account because the data is only compared to other data with the same time shift.
<br/><br/>
The UTC time is added after saving the data because the UTC time format cannot be saved in json format. Therefore it must always be generated anew. Calculating it inside the file "get_data.ipynb" keeps the plotting of the data strictly separated from the pulling and polishing of the data.

In [17]:
non_county_specific_data['UTC'] = [datetime.date.fromtimestamp(date//1000).strftime('%d.%m.%Y')
                           for date in non_county_specific_data['unixtime']]

In [18]:
non_county_specific_data['UTC+7days'] = non_county_specific_data['UTC'].copy()
for e in range(1,8):
    non_county_specific_data['UTC+7days'].insert(0,
    datetime.date.fromtimestamp((non_county_specific_data['unixtime'][0]
                                - (e*86400000))//1000).strftime('%d.%m.%Y'))

In [19]:
def create_url_reports(AdmUnitID):
    return ("https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_COVID19/FeatureServer/0/"+
    "query?where=IdLandkreis%3D" + AdmUnitID + "&orderByFields=Meldedatum&f=pjson&outFields="+
            "*")
    #"Altersgruppe%2C+Geschlecht%2C+AnzahlFall%2C+AnzahlTodesfall%2C+AnzahlGenesen%2C+Meldedatum%2C+Refdatum")

In [20]:
for AdmUnitID, county in covid19.items():
    print(AdmUnitID)
    reports = readfromurl(create_url_reports(AdmUnitID))['features']
    
    county["deaths_on_day"] = len(county["cases"])*[0]
    county["cases_on_day_from_reports"] = len(county["cases"])*[0]
    county["recovered_on_day"] = len(county["cases"])*[0]

    for report in reports:
        #if report['attributes']['Meldedatum'] != report['attributes']['Refdatum']:
            #print(str(report['attributes']['Meldedatum']) + "   " +
                 #str(report['attributes']['Refdatum']))
        if report['attributes']['Meldedatum'] <= non_county_specific_data["unixtime"][-1]:
            if report['attributes']['Meldedatum'] < non_county_specific_data["unixtime"][0]:
                date_index = 0
            else:
                date_index = non_county_specific_data["unixtime"].index(report['attributes']['Meldedatum'])
            county["deaths_on_day"][date_index] += report['attributes']['AnzahlTodesfall']
            county["recovered_on_day"][date_index] += report['attributes']['AnzahlGenesen']
            
        if report['attributes']['Refdatum'] <= non_county_specific_data["unixtime"][-1]:
            if report['attributes']['Refdatum'] < non_county_specific_data["unixtime"][0]:
                date_index = 0
            else:
                date_index = non_county_specific_data["unixtime"].index(report['attributes']['Refdatum'])
                
            county["cases_on_day_from_reports"][date_index] += report['attributes']['AnzahlFall']

1001
1002
1003
1004
1051
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
2000
3101
3102
3103
3151
3153
3154
3155
3157
3158
3159
3241
3251
3252
3254
3255
3256
3257
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3401
3402
3403
3404
3405
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
4011
4012
5111
5112
5113
5114
5116
5117
5119
5120
5122
5124
5154
5158
5162
5166
5170
5314
5315
5316
5334
5358
5362
5366
5370
5374
5378
5382
5512
5513
5515
5554
5558
5562
5566
5570
5711
5754
5758
5762
5766
5770
5774
5911
5913
5914
5915
5916
5954
5958
5962
5966
5970
5974
5978
6411
6412
6413
6414
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6531
6532
6533
6534
6535
6611
6631
6632
6633
6634
6635
6636
7111
7131
7132
7133
7134
7135
7137
7138
7140
7141
7143
7211
7231
7232
7233
7235
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
8111
8115
8116
8117
8118
8119
8121
8125
8126
8127
8128
8135
8136
8211
8212
8215
8216
8221
8222
8225
8226
8231


In [21]:
for county in covid19.values():
    county["deaths"] = len(county["cases"])*[0]
    for deaths_of_day_index in range(len(county["deaths_on_day"])):
        deaths_on_day = county["deaths_on_day"][deaths_of_day_index]
        for date_index in range(deaths_of_day_index, len(county["deaths"])):
            county["deaths"][date_index] += deaths_on_day

    county["cases_from_reports"] = len(county["cases"])*[0]
    for cases_of_day_index in range(len(county["cases_on_day_from_reports"])):
        cases_on_day = county["cases_on_day_from_reports"][cases_of_day_index]
        for date_index in range(cases_of_day_index, len(county["cases_from_reports"])):
            county["cases_from_reports"][date_index] += cases_on_day

    county["recovered"] = len(county["cases"])*[0]
    for recovered_of_day_index in range(len(county["recovered_on_day"])):
        recovered_on_day = county["recovered_on_day"][recovered_of_day_index]
        for date_index in range(recovered_of_day_index, len(county["recovered"])):
            county["recovered"][date_index] += recovered_on_day

In [22]:
with open("dirty_backup.txt", "w") as file:
    file.write(json.dumps(covid19))

In [28]:
for AdmUnitID, county in covid19.items():
    for date in range(len(county["cases_from_reports"])):
        if (county["cases_from_reports"][date] - county["cases"][date] > 100 or
        county["cases_from_reports"][date] - county["cases"][date] < -100):
            print(AdmUnitID + "   " + str(date) + "   " +
                  str(county["cases_from_reports"][date]) +
                  "   " + str(county["cases"][date])
                 )

1002   267   1205   1100
1002   278   1511   1402
1002   279   1552   1435
1002   280   1575   1440
1002   281   1608   1474
1002   282   1645   1521
1002   283   1682   1565
1002   284   1719   1614
1002   285   1763   1660
1002   286   1807   1691
1002   287   1846   1705
1002   288   1891   1723
1002   289   1922   1785
1002   290   1962   1850
1002   291   1992   1878
1002   292   2036   1917
1002   293   2064   1945
1002   294   2089   1967
1002   295   2116   1993
1002   308   2442   2340
1002   309   2485   2370
1002   428   4956   4855
1003   238   575   467
1003   239   608   488
1003   240   631   498
1003   241   668   537
1003   242   695   575
1003   243   722   596
1003   244   739   602
1003   245   772   655
1003   246   794   667
1003   247   817   685
1003   248   840   698
1003   249   869   747
1003   250   894   763
1003   251   911   779
1003   252   928   797
1003   254   972   863
1003   275   1374   1254
1003   276   1406   1279
1003   277   1435   1297
1003   

5112   370   19827   19595
5112   371   19875   19635
5112   372   19968   19668
5112   373   20061   19771
5112   374   20142   19888
5112   375   20225   19983
5112   376   20305   20073
5112   377   20412   20161
5112   378   20503   20244
5112   379   20606   20282
5112   380   20718   20382
5112   381   20839   20515
5112   382   20942   20656
5112   383   21063   20755
5112   384   21176   20893
5112   385   21263   20968
5112   386   21369   21031
5112   387   21470   21169
5112   388   21564   21300
5112   389   21709   21451
5112   390   21830   21578
5112   391   21967   21714
5112   392   22098   21819
5112   393   22239   21922
5112   394   22383   22038
5112   395   22515   22202
5112   396   22647   22356
5112   397   22779   22480
5112   398   22904   22590
5112   399   22999   22670
5112   400   23135   22786
5112   401   23278   22882
5112   402   23414   23006
5112   403   23533   23166
5112   404   23696   23330
5112   405   23857   23507
5112   406   24004   23624
5

5315   214   5387   5184
5315   215   5459   5242
5315   216   5556   5304
5315   217   5642   5344
5315   218   5752   5404
5315   219   5839   5484
5315   220   5952   5635
5315   221   6048   5774
5315   222   6190   5896
5315   223   6315   5993
5315   224   6459   6088
5315   225   6603   6171
5315   226   6775   6302
5315   227   6954   6482
5315   228   7176   6685
5315   229   7340   6843
5315   230   7528   7007
5315   231   7733   7171
5315   232   7928   7320
5315   233   8205   7569
5315   234   8577   7888
5315   235   9007   8254
5315   236   9427   8633
5315   237   9752   8936
5315   238   10081   9207
5315   239   10404   9419
5315   240   10762   9855
5315   241   11114   10273
5315   242   11470   10720
5315   243   11781   11111
5315   244   12054   11390
5315   245   12367   11643
5315   246   12672   11867
5315   247   13009   12200
5315   248   13344   12597
5315   249   13676   12977
5315   250   14012   13348
5315   251   14310   13656
5315   252   14577   1391

5562   17   239   72
5562   18   268   87
5562   19   296   97
5562   20   315   98
5562   21   331   103
5562   22   366   136
5562   23   380   150
5562   24   399   155
5562   25   422   183
5562   26   450   234
5562   27   461   252
5562   28   473   265
5562   29   505   295
5562   30   529   329
5562   31   556   363
5562   32   576   387
5562   33   598   420
5562   34   629   460
5562   35   650   474
5562   36   665   490
5562   37   687   509
5562   38   714   539
5562   39   732   585
5562   40   745   610
5562   41   764   624
5562   42   785   633
5562   43   796   635
5562   44   807   651
5562   45   837   714
5562   46   844   732
5562   47   860   759
5562   214   2388   2287
5562   215   2424   2320
5562   216   2473   2355
5562   217   2492   2356
5562   218   2547   2382
5562   219   2583   2405
5562   220   2642   2460
5562   221   2733   2541
5562   222   2834   2648
5562   223   2907   2702
5562   224   2961   2738
5562   225   3016   2752
5562   226   3091   28

5915   429   9702   9574
5915   430   9754   9610
5915   431   9782   9661
5915   432   9821   9709
5915   433   9861   9752
5915   435   9942   9830
5954   16   129   25
5954   17   148   30
5954   18   156   44
5954   19   165   59
5954   20   179   61
5954   21   188   66
5954   22   207   82
5954   23   231   97
5954   24   244   108
5954   25   251   124
5954   26   258   143
5954   27   263   158
5954   225   1116   1012
5954   226   1158   1042
5954   227   1183   1076
5954   228   1215   1097
5954   229   1252   1130
5954   230   1301   1159
5954   231   1338   1187
5954   232   1395   1219
5954   233   1457   1257
5954   234   1538   1315
5954   235   1611   1380
5954   236   1693   1443
5954   237   1761   1473
5954   238   1837   1550
5954   239   1902   1602
5954   240   1985   1682
5954   241   2064   1730
5954   242   2160   1842
5954   243   2229   1906
5954   244   2294   1977
5954   245   2359   2048
5954   246   2440   2075
5954   247   2510   2128
5954   248   2582  

6436   420   8060   7947
6436   421   8098   7973
6436   428   8375   8261
6436   429   8421   8305
6436   430   8464   8361
6436   431   8504   8401
6437   280   1576   1474
6437   281   1602   1480
6437   282   1638   1511
6437   283   1690   1555
6437   284   1731   1604
6437   285   1775   1657
6437   286   1810   1694
6437   287   1848   1718
6437   288   1896   1752
6437   289   1928   1791
6437   290   1968   1847
6437   291   2029   1912
6437   295   2182   2075
6438   226   1658   1555
6438   231   1846   1733
6438   232   1903   1768
6438   233   1962   1818
6438   234   2034   1877
6438   235   2090   1923
6438   236   2172   2005
6438   237   2242   2047
6438   238   2323   2121
6438   239   2454   2216
6438   240   2545   2316
6438   241   2668   2433
6438   242   2796   2572
6438   243   2917   2690
6438   244   3032   2793
6438   245   3124   2877
6438   246   3228   2962
6438   247   3333   3091
6438   248   3441   3205
6438   249   3562   3348
6438   250   3676   3476


8118   246   5941   5687
8118   247   6127   5892
8118   248   6322   6111
8118   249   6477   6288
8118   250   6634   6475
8118   251   6745   6581
8118   252   6822   6622
8118   253   6960   6740
8118   254   7115   6917
8118   255   7260   7094
8118   256   7394   7254
8118   257   7558   7419
8118   258   7673   7514
8118   259   7747   7547
8118   260   7879   7640
8118   261   8027   7823
8118   262   8182   8035
8118   266   8560   8452
8118   267   8656   8525
8118   268   8788   8660
8118   269   8927   8794
8118   270   9053   8941
8118   271   9173   9067
8118   272   9272   9144
8118   273   9342   9149
8118   274   9479   9269
8118   275   9636   9436
8118   276   9816   9634
8118   277   9975   9789
8118   278   10154   9974
8118   279   10292   10080
8118   280   10384   10103
8118   281   10558   10211
8118   282   10742   10393
8118   283   10940   10597
8118   284   11133   10809
8118   285   11307   10962
8118   286   11481   11138
8118   287   11587   11169
8118  

8325   254   1569   1441
8325   255   1590   1471
8325   256   1629   1520
8325   257   1648   1546
8325   259   1712   1593
8325   260   1741   1600
8325   261   1772   1638
8325   272   2077   1967
8325   273   2117   2011
8325   274   2155   2022
8325   275   2186   2047
8325   276   2232   2110
8325   277   2281   2169
8325   280   2428   2294
8325   281   2502   2320
8325   282   2569   2381
8325   283   2635   2463
8325   284   2713   2545
8325   285   2768   2603
8325   286   2841   2666
8325   287   2882   2696
8325   288   2961   2729
8325   289   3059   2820
8325   290   3158   2922
8325   291   3257   3048
8325   292   3332   3145
8325   293   3408   3213
8325   294   3456   3267
8325   295   3526   3303
8325   296   3590   3401
8325   297   3652   3497
8325   300   3770   3655
8325   301   3796   3657
8325   302   3828   3682
8325   303   3871   3761
8325   307   3993   3890
8325   308   4016   3898
8325   309   4058   3918
8325   310   4112   3976
8325   311   4162   4023


9162   384   57116   56959
9162   385   57274   57095
9162   386   57422   57196
9162   387   57639   57435
9162   388   57900   57713
9162   389   58185   57978
9162   390   58465   58274
9162   391   58676   58456
9162   392   58817   58566
9162   393   59012   58735
9162   394   59313   59025
9162   395   59603   59337
9162   396   59849   59592
9162   397   60037   59774
9162   398   60205   59897
9162   399   60367   60015
9162   400   60545   60103
9162   401   60832   60368
9162   402   61209   60794
9162   403   61534   61142
9162   404   61850   61446
9162   405   62163   61728
9162   406   62414   61973
9162   407   62737   62265
9162   408   63157   62739
9162   409   63657   63262
9162   410   64016   63665
9162   411   64409   64055
9162   412   64759   64324
9162   413   64991   64508
9162   414   65302   64756
9162   415   65768   65292
9162   416   66158   65713
9162   417   66498   66082
9162   418   66830   66427
9162   419   67058   66591
9162   420   67255   66782
9

9775   422   6562   6436
9775   428   6807   6691
9775   429   6839   6735
9777   25   198   97
9777   29   260   153
9777   239   1107   1003
9777   240   1145   1030
9777   241   1181   1074
9777   242   1207   1091
9777   247   1409   1303
9777   281   2401   2296
9777   287   2706   2597
9777   288   2748   2634
9778   309   3010   2907
9779   386   4056   3945
9779   387   4079   3974
9779   399   4424   4323
9779   400   4449   4328
9779   401   4476   4332
9779   402   4514   4374
9779   403   4552   4431
9779   404   4612   4489
9779   405   4640   4494
9779   406   4687   4542
9779   407   4737   4589
9779   408   4777   4630
9779   409   4846   4728
9779   410   4895   4780
9779   411   4938   4829
9779   414   5040   4920
9779   415   5072   4957
10041   15   143   36
10041   16   162   37
10041   17   191   60
10041   18   231   85
10041   19   259   103
10041   20   284   113
10041   21   304   122
10041   22   336   149
10041   23   347   161
10041   24   368   179
10041 

13076   414   6005   5831
13076   415   6069   5909
13076   416   6124   5992
13076   417   6180   6076
13076   419   6315   6200
13076   420   6341   6202
13076   421   6395   6208
13076   422   6519   6413
13076   427   6731   6624
13076   428   6759   6647
14511   269   2986   2885
14511   270   3105   3004
14511   275   3671   3560
14511   276   3780   3668
14511   277   3932   3809
14511   278   4029   3901
14511   279   4183   4070
14511   280   4239   4105
14511   281   4377   4224
14511   282   4487   4343
14511   283   4650   4479
14511   284   4806   4631
14511   285   4934   4748
14511   286   5078   4907
14511   287   5218   5057
14511   288   5390   5226
14511   289   5508   5326
14511   290   5672   5489
14511   291   5813   5621
14511   292   6007   5839
14511   293   6183   6024
14511   294   6275   6114
14511   295   6566   6405
14511   296   6816   6688
14511   297   6970   6840
14511   298   7106   6978
14511   299   7188   7058
14511   300   7361   7242
14511   301 

15082   421   6298   6156
15083   256   627   526
15083   267   864   761
15083   273   1024   917
15083   274   1054   919
15083   275   1068   935
15083   276   1109   979
15083   277   1142   1002
15083   278   1177   1066
15083   279   1201   1066
15083   280   1222   1066
15083   281   1253   1120
15083   282   1284   1131
15083   283   1327   1166
15083   284   1364   1236
15083   285   1411   1279
15083   286   1429   1283
15083   287   1454   1283
15083   288   1530   1381
15083   289   1560   1425
15083   290   1599   1459
15083   291   1627   1491
15083   292   1731   1539
15083   293   1768   1617
15083   294   1788   1639
15083   295   1829   1654
15083   296   1848   1705
15083   297   1864   1741
15083   298   1892   1742
15083   301   1963   1845
15083   302   1991   1864
15083   303   2027   1873
15083   304   2106   1947
15083   305   2133   1984
15083   306   2160   2003
15083   307   2220   2031
15083   308   2246   2049
15083   309   2289   2082
15083   310   2353  

11003   244   2953   2443
11003   245   3018   2443
11003   246   3144   2641
11003   247   3245   2769
11003   248   3328   2845
11003   249   3423   2975
11003   250   3515   3096
11003   251   3564   3096
11003   252   3614   3096
11003   253   3743   3311
11003   254   3831   3418
11003   255   3910   3518
11003   256   3998   3610
11003   257   4127   3730
11003   258   4201   3730
11003   259   4255   3730
11003   260   4358   3913
11003   261   4450   4030
11003   262   4534   4153
11003   263   4615   4225
11003   264   4709   4336
11003   265   4758   4336
11003   266   4810   4336
11003   267   4927   4518
11003   268   5034   4648
11003   269   5132   4773
11003   270   5200   4875
11003   271   5324   5022
11003   272   5389   5022
11003   273   5436   5022
11003   274   5531   5180
11003   275   5610   5250
11003   276   5711   5355
11003   277   5796   5467
11003   278   5918   5597
11003   279   5968   5597
11003   280   6023   5597
11003   281   6147   5763
11003   282 

In [None]:
g=list()
g[1]

In [None]:
AdmUnitID = "8311"
county = covid19["8311"]
for i in range(len(county['cases'])):
    if county['cases'][i] != county["recovered"][i] + county["deaths"][i]:
        print(i)
        print(str(county['cases'][i])+"  "+str(county["recovered"][i])+"  "+str(county["deaths"][i]))
        print(str(county['cases'][i])+"  "+str(county["recovered"][i]+county["deaths"][i]))

In [None]:
#%matplotlib notebook
AdmUnitID = "8311"
county = covid19["8311"]
plt.figure(figsize=(5, 5))    # enlarge plot
plt.xticks(non_county_specific_data["unixtime"][::14],
           non_county_specific_data['UTC'][::14], rotation='vertical')
# suszeptible
plt.plot(non_county_specific_data["unixtime"],
         [(counties_geography[AdmUnitID]["population"] - county["cases"][i])/
          counties_geography[AdmUnitID]["population"]
          for i in range(len(county['deaths']))])
# infectious
plt.plot(non_county_specific_data["unixtime"],
         [(county['cases_from_reports'][i] - county['cases'][i])/200 for i in range(len(county['cases']))])
# removed
plt.plot(non_county_specific_data["unixtime"],
         [(county['deaths'][i] + county['recovered'][i])/
          county['recovered'][-1] for i in range(len(county['deaths']))])
plt.title("Deaths due to COVID-19 in " + AdmUnitID + " " + counties_geography[AdmUnitID]['name'])
plt.show()