In [1]:
import requests

import os
import os.path as pth

from multiprocessing import Pool
from functools import partial

from tqdm.notebook import tqdm

import zipfile

In [2]:
def download_file_from_google_drive(id_, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()
    response = session.get(URL, params = { 'id' : id_ }, stream = True)
    token = get_confirm_token(response)
    if token:
        params = { 'id' : id_, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)
        
    basename = response.headers['Content-Disposition'].split(';')[1].split('filename=')[1].replace('\"', '')
    full_dst_filenname = pth.join(destination, basename)
    save_response_content(response, full_dst_filenname)
    return full_dst_filenname

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768
    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

In [3]:
file_id_list = [
    '1emS3CMUyd4EGLL6BGVzIAzjIYy6EO0QJ',
]

In [4]:
destination = 'data' ### YOUR_DOWNLOAD_PATH
os.makedirs(destination, exist_ok=True)

filename_list = []

# ### Use single process
# for file_id in file_id_list:
#     filename = download_file_from_google_drive(id_=file_id, destination=destination)
#     print('{} is done!'.format(filename))
#     filename_list.append(filename)

### If you want to download more faster
download_func = partial(download_file_from_google_drive, destination=destination)
with Pool(4) as pool:
    for i, filename in tqdm(enumerate(pool.imap_unordered(download_func, file_id_list)), total=len(file_id_list)):
        print('{} is done!'.format(filename))
        filename_list.append(filename)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

data/KT_data_20200717.zip is done!



In [5]:
zip_filename_list = [filename for filename in filename_list if filename.endswith('.zip')]
    
for zip_filename in tqdm(zip_filename_list):
    with zipfile.ZipFile(zip_filename) as target_zip:
        dest_path = pth.splitext(zip_filename)[0]
        os.makedirs(dest_path, exist_ok=True)
        target_zip.extractall(dest_path)
        print('{} is done!'.format(dest_path))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

data/KT_data_20200717 is done!



In [6]:
os.listdir('data/')

['flight-data',
 'KT_data_20200717.zip',
 'retail-data',
 'activity-data',
 'KT_data_20200717']

In [7]:
os.listdir('data/KT_data_20200717')

['card_20200717.csv',
 'COVID_19',
 'adstrd_master.csv',
 'fpopl.csv',
 'delivery.csv',
 'COVID_eng_kor_table.xlsx',
 'index.csv']

In [8]:
os.listdir('data/KT_data_20200717/COVID_19')

['PatientInfo_20200717.csv',
 'TimeAge.csv',
 'TimeProvince.csv',
 'Region.csv',
 'TimeGender.csv',
 'Policy.csv',
 'Case.csv',
 'Time.csv']