In [1]:
from datetime import datetime
import pandas as pd
import requests
import zipfile
import sqlalchemy
import src.config as config

data_path = config.PL_HISTORICAL_DATA

In [2]:
db = sqlalchemy.create_engine(
    config.DATABASE_ENGINE,
)
district_df = pd.read_sql('district', db)
province_df = pd.read_sql('province', db)

In [3]:
def parse_data(filename, zip_file):
    try:
        with zip_file.open(filename) as my_file:
            raw_df = pd.read_csv(my_file, encoding='cp1250', sep=';')
    except UnicodeDecodeError:
        with zip_file.open(filename) as my_file:
            raw_df = pd.read_csv(my_file, encoding='utf8', sep=';')
    return raw_df

In [4]:
get_response = requests.get(data_path,stream=True)
file_name  = 'data_files.zip'
with open(file_name, 'wb') as f:
    for chunk in get_response.iter_content(chunk_size=1024):
        if chunk: # filter out keep-alive new chunks
            f.write(chunk)
maps = {}
with zipfile.ZipFile(file_name) as zip:
    files = zip.namelist()
    files.remove('readme.txt')
    # files.reverse()

    for filename in files:
        print(filename)
        raw_df = parse_data(filename, zip)
        df = raw_df[raw_df['powiat_miasto']!='Cały kraj']

        df = pd.merge(df, province_df, how='left', left_on=['wojewodztwo'], right_on=['province_name'])
        df = pd.merge(df, district_df, how='left', left_on=['id_province', 'powiat_miasto'], right_on=['id_province', 'district_name'])

        df['id_district'] = df['id_district'].astype('Int64')
        df['id_province'] = df['id_province'].astype('Int64')
        df['id_cntry'] = 136
        df['day'] = datetime.strptime(filename[:8],'%Y%m%d').strftime('%Y-%m-%d')
        if 'liczba_ozdrowiencow' not in df.columns:
            df['liczba_ozdrowiencow'] = None
        df.rename(inplace=True, columns={
            "liczba_przypadkow": "cases",
            "zgony": "deaths",
            "liczba_ozdrowiencow": "recovered"
        })
        df = df[['day','id_cntry','id_province','id_district', 'cases', 'deaths', 'recovered']]
        maps[filename] = len(df.index)
        df.to_sql('cases', db, if_exists='append', index=False)

sum(v for k,v in maps.items())



20201124060000_rap_rcb_pow_eksport.csv
20201125060000_rap_rcb_pow_eksport.csv
20201126060000_rap_rcb_pow_eksport.csv
20201127060000_rap_rcb_pow_eksport.csv
20201128060000_rap_rcb_pow_eksport.csv
20201129060000_rap_rcb_pow_eksport.csv
20201130060000_rap_rcb_pow_eksport.csv
20201201060000_rap_rcb_pow_eksport.csv
20201202060000_rap_rcb_pow_eksport.csv
20201203060000_rap_rcb_pow_eksport.csv
20201204060000_rap_rcb_pow_eksport.csv
20201205060000_rap_rcb_pow_eksport.csv
20201206060000_rap_rcb_pow_eksport.csv
20201207060000_rap_rcb_pow_eksport.csv
20201208060000_rap_rcb_pow_eksport.csv
20201209060000_rap_rcb_pow_eksport.csv
20201210060000_rap_rcb_pow_eksport.csv
20201211060000_rap_rcb_pow_eksport.csv
20201212060000_rap_rcb_pow_eksport.csv
20201213060000_rap_rcb_pow_eksport.csv
20201214060000_rap_rcb_pow_eksport.csv
20201215060000_rap_rcb_pow_eksport.csv
20201216060000_rap_rcb_pow_eksport.csv
20201217060000_rap_rcb_pow_eksport.csv
20201218060000_rap_rcb_pow_eksport.csv
20201219060000_rap_rcb_po

157700