# Step 0: Set up directory

In [8]:
# ------------------------------------------------------------
# Download ~10GB of RAW GDELT GKG files from MAY 2018
# Store masterfile + data in separate folders for GitHub structure
# ------------------------------------------------------------

import os
import requests
from tqdm import tqdm
import gzip
import shutil
import zipfile
import shutil

# ------------------------------------------------------------
# WINDOWS PATH YOU REQUESTED
# ------------------------------------------------------------
BASE_DIR = r"C:\Users\jonat\Documents\UMBC\FALL_2025\Final_Project_UMBC_BIG_DATA\DATA"

MASTER_DIR = os.path.join(BASE_DIR, "master")
RAW_DIR = os.path.join(BASE_DIR, "raw")

MASTERFILE = os.path.join(MASTER_DIR, "masterfilelist.txt")
TARGET_GB = 3 # this is for compressed zip files, unzip it's like 3 - 4 times it's size. 

# Ensure folders exist
os.makedirs(MASTER_DIR, exist_ok=True)
os.makedirs(RAW_DIR, exist_ok=True)

MASTERLIST_URL = "http://data.gdeltproject.org/gdeltv2/masterfilelist.txt"


# Step 1: Download masterfilelist.txt → stored in DATA/master

In [9]:
def download_masterfilelist():
    if not os.path.exists(MASTERFILE):
        print("Downloading masterfilelist.txt …")
        r = requests.get(MASTERLIST_URL, timeout=60)
        with open(MASTERFILE, "wb") as f:
            f.write(r.content)
    else:
        print("masterfilelist.txt already exists.")
    return MASTERFILE


master_path = download_masterfilelist()
master_path


masterfilelist.txt already exists.


'C:\\Users\\jonat\\Documents\\UMBC\\FALL_2025\\Final_Project_UMBC_BIG_DATA\\DATA\\master\\masterfilelist.txt'

# Step 2: Collect all May 2018 GKG URLs

In [10]:
def get_may_2018_gkg_urls(path):
    urls = []

    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            if "gkg.csv.zip" in line:
                parts = line.strip().split()
                url = parts[-1]
                fname = url.split("/")[-1]

                if fname.startswith("201805"):
                    urls.append((fname, url))

    return urls


may_urls = get_may_2018_gkg_urls(master_path)

print(f"Found {len(may_urls)} GKG files for May 2018.")


Found 2853 GKG files for May 2018.


# Step 3: Download raw files until ~10GB, saved in DATA/raw

In [11]:
def download_until_10gb(url_list):
    total_bytes = 0
    target_bytes = TARGET_GB * 1024 * 1024 * 1024

    for fname, url in tqdm(url_list):
        if total_bytes >= target_bytes:
            print(f"\nReached ~{TARGET_GB}GB. Stopping downloads.")
            break

        out_path = os.path.join(RAW_DIR, fname)

        # Skip existing (resume support)
        if os.path.exists(out_path):
            file_size = os.path.getsize(out_path)
            total_bytes += file_size
            continue

        try:
            r = requests.get(url, stream=True, timeout=60)
            if r.status_code != 200:
                print("Failed:", url)
                continue

            with open(out_path, "wb") as f:
                for chunk in r.iter_content(chunk_size=1024 * 1024):
                    if chunk:
                        f.write(chunk)

            file_size = os.path.getsize(out_path)
            total_bytes += file_size

            print(f"Downloaded {fname} ({file_size/1024/1024:.2f} MB)")
            print(f"TOTAL: {total_bytes/1024/1024/1024:.2f} GB")

        except Exception as e:
            print("ERROR:", e)
            continue

    print("\nFINAL TOTAL:", total_bytes/1024/1024/1024, "GB")


download_until_10gb(may_urls)


  0%|          | 1/2853 [00:00<24:43,  1.92it/s]

Downloaded 20180501000000.gkg.csv.zip (12.08 MB)
TOTAL: 0.01 GB


  0%|          | 2/2853 [00:01<24:03,  1.97it/s]

Downloaded 20180501001500.gkg.csv.zip (10.73 MB)
TOTAL: 0.02 GB


  0%|          | 3/2853 [00:01<22:04,  2.15it/s]

Downloaded 20180501003000.gkg.csv.zip (10.64 MB)
TOTAL: 0.03 GB


  0%|          | 4/2853 [00:01<21:09,  2.24it/s]

Downloaded 20180501004500.gkg.csv.zip (9.26 MB)
TOTAL: 0.04 GB


  0%|          | 5/2853 [00:02<25:59,  1.83it/s]

Downloaded 20180501010000.gkg.csv.zip (9.88 MB)
TOTAL: 0.05 GB


  0%|          | 6/2853 [00:03<24:12,  1.96it/s]

Downloaded 20180501011500.gkg.csv.zip (8.74 MB)
TOTAL: 0.06 GB


  0%|          | 7/2853 [00:03<23:15,  2.04it/s]

Downloaded 20180501013000.gkg.csv.zip (9.85 MB)
TOTAL: 0.07 GB


  0%|          | 8/2853 [00:03<22:43,  2.09it/s]

Downloaded 20180501014500.gkg.csv.zip (8.41 MB)
TOTAL: 0.08 GB


  0%|          | 9/2853 [00:04<22:13,  2.13it/s]

Downloaded 20180501020000.gkg.csv.zip (9.04 MB)
TOTAL: 0.09 GB


  0%|          | 10/2853 [00:04<23:16,  2.04it/s]

Downloaded 20180501021500.gkg.csv.zip (10.97 MB)
TOTAL: 0.10 GB


  0%|          | 11/2853 [00:05<21:16,  2.23it/s]

Downloaded 20180501023000.gkg.csv.zip (8.77 MB)
TOTAL: 0.11 GB


  0%|          | 12/2853 [00:05<20:29,  2.31it/s]

Downloaded 20180501024500.gkg.csv.zip (8.43 MB)
TOTAL: 0.11 GB


  0%|          | 13/2853 [00:06<20:09,  2.35it/s]

Downloaded 20180501030000.gkg.csv.zip (7.83 MB)
TOTAL: 0.12 GB


  0%|          | 14/2853 [00:06<20:00,  2.36it/s]

Downloaded 20180501031500.gkg.csv.zip (9.69 MB)
TOTAL: 0.13 GB


  1%|          | 15/2853 [00:06<19:48,  2.39it/s]

Downloaded 20180501033000.gkg.csv.zip (8.74 MB)
TOTAL: 0.14 GB


  1%|          | 16/2853 [00:07<23:21,  2.02it/s]

Downloaded 20180501034500.gkg.csv.zip (8.73 MB)
TOTAL: 0.15 GB


  1%|          | 17/2853 [00:07<21:09,  2.23it/s]

Downloaded 20180501040000.gkg.csv.zip (7.34 MB)
TOTAL: 0.16 GB


  1%|          | 18/2853 [00:08<21:11,  2.23it/s]

Downloaded 20180501041500.gkg.csv.zip (8.08 MB)
TOTAL: 0.16 GB


  1%|          | 19/2853 [00:08<20:35,  2.29it/s]

Downloaded 20180501050000.gkg.csv.zip (8.32 MB)
TOTAL: 0.17 GB


  1%|          | 20/2853 [00:09<19:49,  2.38it/s]

Downloaded 20180501051500.gkg.csv.zip (8.75 MB)
TOTAL: 0.18 GB


  1%|          | 21/2853 [00:09<20:05,  2.35it/s]

Downloaded 20180501053000.gkg.csv.zip (7.72 MB)
TOTAL: 0.19 GB


  1%|          | 22/2853 [00:10<23:08,  2.04it/s]

Downloaded 20180501054500.gkg.csv.zip (8.44 MB)
TOTAL: 0.20 GB


  1%|          | 23/2853 [00:10<22:23,  2.11it/s]

Downloaded 20180501060000.gkg.csv.zip (8.16 MB)
TOTAL: 0.20 GB


  1%|          | 24/2853 [00:11<20:50,  2.26it/s]

Downloaded 20180501061500.gkg.csv.zip (7.25 MB)
TOTAL: 0.21 GB


  1%|          | 25/2853 [00:11<20:43,  2.28it/s]

Downloaded 20180501063000.gkg.csv.zip (8.30 MB)
TOTAL: 0.22 GB


  1%|          | 26/2853 [00:11<20:17,  2.32it/s]

Downloaded 20180501064500.gkg.csv.zip (9.13 MB)
TOTAL: 0.23 GB


  1%|          | 27/2853 [00:12<22:09,  2.13it/s]

Downloaded 20180501070000.gkg.csv.zip (9.02 MB)
TOTAL: 0.24 GB


  1%|          | 28/2853 [00:13<25:17,  1.86it/s]

Downloaded 20180501071500.gkg.csv.zip (7.73 MB)
TOTAL: 0.24 GB


  1%|          | 29/2853 [00:13<23:12,  2.03it/s]

Downloaded 20180501073000.gkg.csv.zip (8.21 MB)
TOTAL: 0.25 GB


  1%|          | 30/2853 [00:14<23:35,  1.99it/s]

Downloaded 20180501074500.gkg.csv.zip (6.87 MB)
TOTAL: 0.26 GB


  1%|          | 31/2853 [00:14<21:41,  2.17it/s]

Downloaded 20180501080000.gkg.csv.zip (8.24 MB)
TOTAL: 0.27 GB


  1%|          | 32/2853 [00:14<20:08,  2.33it/s]

Downloaded 20180501083000.gkg.csv.zip (8.34 MB)
TOTAL: 0.28 GB


  1%|          | 33/2853 [00:15<19:54,  2.36it/s]

Downloaded 20180501084500.gkg.csv.zip (8.34 MB)
TOTAL: 0.28 GB


  1%|          | 34/2853 [00:15<19:02,  2.47it/s]

Downloaded 20180501090000.gkg.csv.zip (7.32 MB)
TOTAL: 0.29 GB


  1%|          | 35/2853 [00:15<19:57,  2.35it/s]

Downloaded 20180501093000.gkg.csv.zip (8.70 MB)
TOTAL: 0.30 GB


  1%|▏         | 36/2853 [00:16<18:47,  2.50it/s]

Downloaded 20180501094500.gkg.csv.zip (8.44 MB)
TOTAL: 0.31 GB


  1%|▏         | 37/2853 [00:16<18:26,  2.55it/s]

Downloaded 20180501100000.gkg.csv.zip (8.75 MB)
TOTAL: 0.32 GB


  1%|▏         | 38/2853 [00:17<18:29,  2.54it/s]

Downloaded 20180501101500.gkg.csv.zip (7.93 MB)
TOTAL: 0.32 GB


  1%|▏         | 39/2853 [00:17<18:50,  2.49it/s]

Downloaded 20180501103000.gkg.csv.zip (9.66 MB)
TOTAL: 0.33 GB


  1%|▏         | 40/2853 [00:18<19:48,  2.37it/s]

Downloaded 20180501104500.gkg.csv.zip (8.79 MB)
TOTAL: 0.34 GB


  1%|▏         | 41/2853 [00:18<19:54,  2.35it/s]

Downloaded 20180501110000.gkg.csv.zip (8.95 MB)
TOTAL: 0.35 GB


  1%|▏         | 42/2853 [00:18<20:50,  2.25it/s]

Downloaded 20180501111500.gkg.csv.zip (9.80 MB)
TOTAL: 0.36 GB


  2%|▏         | 43/2853 [00:19<20:14,  2.31it/s]

Downloaded 20180501113000.gkg.csv.zip (9.91 MB)
TOTAL: 0.37 GB


  2%|▏         | 44/2853 [00:19<19:58,  2.34it/s]

Downloaded 20180501114500.gkg.csv.zip (10.15 MB)
TOTAL: 0.38 GB


  2%|▏         | 45/2853 [00:20<19:33,  2.39it/s]

Downloaded 20180501120000.gkg.csv.zip (8.67 MB)
TOTAL: 0.39 GB


  2%|▏         | 46/2853 [00:20<19:28,  2.40it/s]

Downloaded 20180501121500.gkg.csv.zip (9.89 MB)
TOTAL: 0.40 GB


  2%|▏         | 47/2853 [00:21<21:15,  2.20it/s]

Downloaded 20180501123000.gkg.csv.zip (11.65 MB)
TOTAL: 0.41 GB


  2%|▏         | 48/2853 [00:21<21:01,  2.22it/s]

Downloaded 20180501124500.gkg.csv.zip (11.22 MB)
TOTAL: 0.42 GB


  2%|▏         | 49/2853 [00:21<20:53,  2.24it/s]

Downloaded 20180501130000.gkg.csv.zip (11.07 MB)
TOTAL: 0.43 GB


  2%|▏         | 50/2853 [00:22<21:17,  2.19it/s]

Downloaded 20180501131500.gkg.csv.zip (10.42 MB)
TOTAL: 0.44 GB


  2%|▏         | 51/2853 [00:22<21:48,  2.14it/s]

Downloaded 20180501133000.gkg.csv.zip (13.56 MB)
TOTAL: 0.45 GB


  2%|▏         | 52/2853 [00:23<21:24,  2.18it/s]

Downloaded 20180501134500.gkg.csv.zip (10.69 MB)
TOTAL: 0.46 GB


  2%|▏         | 53/2853 [00:23<21:41,  2.15it/s]

Downloaded 20180501140000.gkg.csv.zip (11.80 MB)
TOTAL: 0.48 GB


  2%|▏         | 54/2853 [00:24<22:14,  2.10it/s]

Downloaded 20180501141500.gkg.csv.zip (11.95 MB)
TOTAL: 0.49 GB


  2%|▏         | 55/2853 [00:24<22:16,  2.09it/s]

Downloaded 20180501143000.gkg.csv.zip (12.03 MB)
TOTAL: 0.50 GB


  2%|▏         | 56/2853 [00:25<21:39,  2.15it/s]

Downloaded 20180501144500.gkg.csv.zip (11.90 MB)
TOTAL: 0.51 GB


  2%|▏         | 57/2853 [00:25<21:47,  2.14it/s]

Downloaded 20180501150000.gkg.csv.zip (12.42 MB)
TOTAL: 0.52 GB


  2%|▏         | 58/2853 [00:26<21:02,  2.21it/s]

Downloaded 20180501151500.gkg.csv.zip (11.71 MB)
TOTAL: 0.53 GB


  2%|▏         | 59/2853 [00:26<20:59,  2.22it/s]

Downloaded 20180501153000.gkg.csv.zip (13.06 MB)
TOTAL: 0.55 GB


  2%|▏         | 60/2853 [00:27<22:34,  2.06it/s]

Downloaded 20180501154500.gkg.csv.zip (15.81 MB)
TOTAL: 0.56 GB


  2%|▏         | 61/2853 [00:27<22:31,  2.07it/s]

Downloaded 20180501160000.gkg.csv.zip (14.19 MB)
TOTAL: 0.58 GB


  2%|▏         | 62/2853 [00:28<22:19,  2.08it/s]

Downloaded 20180501161500.gkg.csv.zip (12.30 MB)
TOTAL: 0.59 GB


  2%|▏         | 63/2853 [00:28<22:57,  2.02it/s]

Downloaded 20180501163000.gkg.csv.zip (15.69 MB)
TOTAL: 0.60 GB


  2%|▏         | 64/2853 [00:29<24:04,  1.93it/s]

Downloaded 20180501164500.gkg.csv.zip (14.68 MB)
TOTAL: 0.62 GB


  2%|▏         | 65/2853 [00:29<24:13,  1.92it/s]

Downloaded 20180501170000.gkg.csv.zip (14.03 MB)
TOTAL: 0.63 GB


  2%|▏         | 66/2853 [00:30<26:11,  1.77it/s]

Downloaded 20180501171500.gkg.csv.zip (15.15 MB)
TOTAL: 0.65 GB


  2%|▏         | 67/2853 [00:30<24:42,  1.88it/s]

Downloaded 20180501173000.gkg.csv.zip (13.31 MB)
TOTAL: 0.66 GB


  2%|▏         | 68/2853 [00:31<25:01,  1.85it/s]

Downloaded 20180501174500.gkg.csv.zip (14.34 MB)
TOTAL: 0.67 GB


  2%|▏         | 69/2853 [00:32<25:56,  1.79it/s]

Downloaded 20180501180000.gkg.csv.zip (14.14 MB)
TOTAL: 0.69 GB


  2%|▏         | 70/2853 [00:32<24:44,  1.87it/s]

Downloaded 20180501181500.gkg.csv.zip (13.84 MB)
TOTAL: 0.70 GB


  2%|▏         | 71/2853 [00:33<27:33,  1.68it/s]

Downloaded 20180501183000.gkg.csv.zip (14.72 MB)
TOTAL: 0.72 GB


  3%|▎         | 72/2853 [00:33<27:00,  1.72it/s]

Downloaded 20180501184500.gkg.csv.zip (15.33 MB)
TOTAL: 0.73 GB


  3%|▎         | 73/2853 [00:34<25:23,  1.82it/s]

Downloaded 20180501190000.gkg.csv.zip (12.17 MB)
TOTAL: 0.74 GB


  3%|▎         | 74/2853 [00:34<25:53,  1.79it/s]

Downloaded 20180501191500.gkg.csv.zip (13.67 MB)
TOTAL: 0.76 GB


  3%|▎         | 75/2853 [00:35<28:42,  1.61it/s]

Downloaded 20180501193000.gkg.csv.zip (15.10 MB)
TOTAL: 0.77 GB


  3%|▎         | 76/2853 [00:36<27:02,  1.71it/s]

Downloaded 20180501194500.gkg.csv.zip (12.77 MB)
TOTAL: 0.78 GB


  3%|▎         | 77/2853 [00:36<26:33,  1.74it/s]

Downloaded 20180501200000.gkg.csv.zip (13.45 MB)
TOTAL: 0.80 GB


  3%|▎         | 78/2853 [00:37<25:25,  1.82it/s]

Downloaded 20180501201500.gkg.csv.zip (13.41 MB)
TOTAL: 0.81 GB


  3%|▎         | 79/2853 [00:37<25:35,  1.81it/s]

Downloaded 20180501203000.gkg.csv.zip (13.32 MB)
TOTAL: 0.82 GB


  3%|▎         | 80/2853 [00:38<25:00,  1.85it/s]

Downloaded 20180501204500.gkg.csv.zip (13.65 MB)
TOTAL: 0.84 GB


  3%|▎         | 81/2853 [00:38<25:19,  1.82it/s]

Downloaded 20180501210000.gkg.csv.zip (15.22 MB)
TOTAL: 0.85 GB


  3%|▎         | 82/2853 [00:39<25:50,  1.79it/s]

Downloaded 20180501211500.gkg.csv.zip (12.52 MB)
TOTAL: 0.86 GB


  3%|▎         | 83/2853 [00:40<26:44,  1.73it/s]

Downloaded 20180501213000.gkg.csv.zip (13.59 MB)
TOTAL: 0.88 GB


  3%|▎         | 84/2853 [00:40<26:17,  1.76it/s]

Downloaded 20180501214500.gkg.csv.zip (13.73 MB)
TOTAL: 0.89 GB


  3%|▎         | 85/2853 [00:41<25:40,  1.80it/s]

Downloaded 20180501220000.gkg.csv.zip (12.94 MB)
TOTAL: 0.90 GB


  3%|▎         | 86/2853 [00:41<23:53,  1.93it/s]

Downloaded 20180501221500.gkg.csv.zip (11.52 MB)
TOTAL: 0.91 GB


  3%|▎         | 87/2853 [00:41<22:51,  2.02it/s]

Downloaded 20180501223000.gkg.csv.zip (12.20 MB)
TOTAL: 0.93 GB


  3%|▎         | 88/2853 [00:42<27:27,  1.68it/s]

Downloaded 20180501224500.gkg.csv.zip (11.93 MB)
TOTAL: 0.94 GB


  3%|▎         | 89/2853 [00:43<25:25,  1.81it/s]

Downloaded 20180501230000.gkg.csv.zip (11.57 MB)
TOTAL: 0.95 GB


  3%|▎         | 90/2853 [00:43<24:47,  1.86it/s]

Downloaded 20180501231500.gkg.csv.zip (12.99 MB)
TOTAL: 0.96 GB


  3%|▎         | 91/2853 [00:44<24:08,  1.91it/s]

Downloaded 20180501233000.gkg.csv.zip (11.90 MB)
TOTAL: 0.97 GB


  3%|▎         | 92/2853 [00:44<23:21,  1.97it/s]

Downloaded 20180501234500.gkg.csv.zip (11.33 MB)
TOTAL: 0.98 GB


  3%|▎         | 93/2853 [00:45<22:19,  2.06it/s]

Downloaded 20180502000000.gkg.csv.zip (10.61 MB)
TOTAL: 0.99 GB


  3%|▎         | 94/2853 [00:45<21:25,  2.15it/s]

Downloaded 20180502001500.gkg.csv.zip (10.00 MB)
TOTAL: 1.00 GB


  3%|▎         | 95/2853 [00:46<21:10,  2.17it/s]

Downloaded 20180502003000.gkg.csv.zip (10.69 MB)
TOTAL: 1.01 GB


  3%|▎         | 96/2853 [00:46<22:17,  2.06it/s]

Downloaded 20180502004500.gkg.csv.zip (10.63 MB)
TOTAL: 1.02 GB


  3%|▎         | 97/2853 [00:47<21:50,  2.10it/s]

Downloaded 20180502010000.gkg.csv.zip (10.10 MB)
TOTAL: 1.03 GB


  3%|▎         | 98/2853 [00:47<21:43,  2.11it/s]

Downloaded 20180502011500.gkg.csv.zip (10.20 MB)
TOTAL: 1.04 GB


  3%|▎         | 99/2853 [00:47<20:30,  2.24it/s]

Downloaded 20180502013000.gkg.csv.zip (9.93 MB)
TOTAL: 1.05 GB


  4%|▎         | 100/2853 [00:48<20:13,  2.27it/s]

Downloaded 20180502014500.gkg.csv.zip (9.99 MB)
TOTAL: 1.06 GB


  4%|▎         | 101/2853 [00:48<19:54,  2.30it/s]

Downloaded 20180502020000.gkg.csv.zip (8.87 MB)
TOTAL: 1.07 GB


  4%|▎         | 102/2853 [00:49<20:24,  2.25it/s]

Downloaded 20180502021500.gkg.csv.zip (10.79 MB)
TOTAL: 1.08 GB


  4%|▎         | 103/2853 [00:49<19:02,  2.41it/s]

Downloaded 20180502023000.gkg.csv.zip (8.14 MB)
TOTAL: 1.09 GB


  4%|▎         | 104/2853 [00:49<19:17,  2.38it/s]

Downloaded 20180502024500.gkg.csv.zip (9.01 MB)
TOTAL: 1.10 GB


  4%|▎         | 105/2853 [00:50<18:34,  2.47it/s]

Downloaded 20180502030000.gkg.csv.zip (8.55 MB)
TOTAL: 1.11 GB


  4%|▎         | 106/2853 [00:50<17:47,  2.57it/s]

Downloaded 20180502031500.gkg.csv.zip (8.41 MB)
TOTAL: 1.12 GB


  4%|▍         | 107/2853 [00:51<18:13,  2.51it/s]

Downloaded 20180502033000.gkg.csv.zip (9.56 MB)
TOTAL: 1.13 GB


  4%|▍         | 108/2853 [00:51<17:59,  2.54it/s]

Downloaded 20180502034500.gkg.csv.zip (7.92 MB)
TOTAL: 1.13 GB


  4%|▍         | 109/2853 [00:51<18:05,  2.53it/s]

Downloaded 20180502040000.gkg.csv.zip (8.04 MB)
TOTAL: 1.14 GB


  4%|▍         | 110/2853 [00:52<18:10,  2.52it/s]

Downloaded 20180502041500.gkg.csv.zip (7.58 MB)
TOTAL: 1.15 GB


  4%|▍         | 111/2853 [00:52<18:19,  2.49it/s]

Downloaded 20180502043000.gkg.csv.zip (9.73 MB)
TOTAL: 1.16 GB


  4%|▍         | 112/2853 [00:53<18:47,  2.43it/s]

Downloaded 20180502044500.gkg.csv.zip (7.45 MB)
TOTAL: 1.17 GB


  4%|▍         | 113/2853 [00:53<18:34,  2.46it/s]

Downloaded 20180502050000.gkg.csv.zip (8.97 MB)
TOTAL: 1.17 GB


  4%|▍         | 114/2853 [00:53<19:03,  2.40it/s]

Downloaded 20180502051500.gkg.csv.zip (9.05 MB)
TOTAL: 1.18 GB


  4%|▍         | 115/2853 [00:54<18:41,  2.44it/s]

Downloaded 20180502053000.gkg.csv.zip (8.02 MB)
TOTAL: 1.19 GB


  4%|▍         | 116/2853 [00:54<18:28,  2.47it/s]

Downloaded 20180502054500.gkg.csv.zip (9.06 MB)
TOTAL: 1.20 GB


  4%|▍         | 117/2853 [00:55<18:11,  2.51it/s]

Downloaded 20180502060000.gkg.csv.zip (8.58 MB)
TOTAL: 1.21 GB


  4%|▍         | 118/2853 [00:55<18:05,  2.52it/s]

Downloaded 20180502061500.gkg.csv.zip (8.86 MB)
TOTAL: 1.22 GB


  4%|▍         | 119/2853 [00:55<18:15,  2.50it/s]

Downloaded 20180502063000.gkg.csv.zip (7.63 MB)
TOTAL: 1.22 GB


  4%|▍         | 120/2853 [00:56<18:36,  2.45it/s]

Downloaded 20180502064500.gkg.csv.zip (9.28 MB)
TOTAL: 1.23 GB


  4%|▍         | 121/2853 [00:56<18:43,  2.43it/s]

Downloaded 20180502070000.gkg.csv.zip (8.32 MB)
TOTAL: 1.24 GB


  4%|▍         | 122/2853 [00:57<18:10,  2.51it/s]

Downloaded 20180502071500.gkg.csv.zip (8.03 MB)
TOTAL: 1.25 GB


  4%|▍         | 123/2853 [00:57<18:52,  2.41it/s]

Downloaded 20180502073000.gkg.csv.zip (9.44 MB)
TOTAL: 1.26 GB


  4%|▍         | 124/2853 [00:58<19:29,  2.33it/s]

Downloaded 20180502074500.gkg.csv.zip (9.43 MB)
TOTAL: 1.27 GB


  4%|▍         | 125/2853 [00:58<19:04,  2.38it/s]

Downloaded 20180502080000.gkg.csv.zip (8.11 MB)
TOTAL: 1.28 GB


  4%|▍         | 126/2853 [00:58<18:57,  2.40it/s]

Downloaded 20180502081500.gkg.csv.zip (8.94 MB)
TOTAL: 1.28 GB


  4%|▍         | 127/2853 [00:59<18:10,  2.50it/s]

Downloaded 20180502084500.gkg.csv.zip (8.10 MB)
TOTAL: 1.29 GB


  4%|▍         | 128/2853 [00:59<17:34,  2.58it/s]

Downloaded 20180502090000.gkg.csv.zip (8.39 MB)
TOTAL: 1.30 GB


  5%|▍         | 129/2853 [01:00<18:05,  2.51it/s]

Downloaded 20180502091500.gkg.csv.zip (8.84 MB)
TOTAL: 1.31 GB


  5%|▍         | 130/2853 [01:00<18:05,  2.51it/s]

Downloaded 20180502093000.gkg.csv.zip (9.21 MB)
TOTAL: 1.32 GB


  5%|▍         | 131/2853 [01:00<18:08,  2.50it/s]

Downloaded 20180502094500.gkg.csv.zip (9.82 MB)
TOTAL: 1.33 GB


  5%|▍         | 132/2853 [01:01<16:51,  2.69it/s]

Downloaded 20180502100000.gkg.csv.zip (7.55 MB)
TOTAL: 1.33 GB


  5%|▍         | 133/2853 [01:01<17:03,  2.66it/s]

Downloaded 20180502101500.gkg.csv.zip (9.28 MB)
TOTAL: 1.34 GB


  5%|▍         | 134/2853 [01:01<18:21,  2.47it/s]

Downloaded 20180502104500.gkg.csv.zip (10.92 MB)
TOTAL: 1.35 GB


  5%|▍         | 135/2853 [01:02<18:05,  2.50it/s]

Downloaded 20180502110000.gkg.csv.zip (9.31 MB)
TOTAL: 1.36 GB


  5%|▍         | 136/2853 [01:02<18:22,  2.47it/s]

Downloaded 20180502111500.gkg.csv.zip (10.27 MB)
TOTAL: 1.37 GB


  5%|▍         | 137/2853 [01:03<18:33,  2.44it/s]

Downloaded 20180502113000.gkg.csv.zip (10.28 MB)
TOTAL: 1.38 GB


  5%|▍         | 138/2853 [01:03<19:52,  2.28it/s]

Downloaded 20180502114500.gkg.csv.zip (11.11 MB)
TOTAL: 1.39 GB


  5%|▍         | 139/2853 [01:04<25:00,  1.81it/s]

Downloaded 20180502120000.gkg.csv.zip (11.20 MB)
TOTAL: 1.41 GB


  5%|▍         | 140/2853 [01:04<23:32,  1.92it/s]

Downloaded 20180502121500.gkg.csv.zip (10.19 MB)
TOTAL: 1.42 GB


  5%|▍         | 141/2853 [01:05<22:55,  1.97it/s]

Downloaded 20180502123000.gkg.csv.zip (12.04 MB)
TOTAL: 1.43 GB


  5%|▍         | 142/2853 [01:05<22:05,  2.05it/s]

Downloaded 20180502124500.gkg.csv.zip (11.76 MB)
TOTAL: 1.44 GB


  5%|▌         | 143/2853 [01:06<22:37,  2.00it/s]

Downloaded 20180502130000.gkg.csv.zip (11.02 MB)
TOTAL: 1.45 GB


  5%|▌         | 144/2853 [01:06<21:55,  2.06it/s]

Downloaded 20180502131500.gkg.csv.zip (12.81 MB)
TOTAL: 1.46 GB


  5%|▌         | 145/2853 [01:07<22:41,  1.99it/s]

Downloaded 20180502133000.gkg.csv.zip (13.40 MB)
TOTAL: 1.47 GB


  5%|▌         | 146/2853 [01:07<22:17,  2.02it/s]

Downloaded 20180502134500.gkg.csv.zip (12.53 MB)
TOTAL: 1.49 GB


  5%|▌         | 147/2853 [01:08<22:47,  1.98it/s]

Downloaded 20180502140000.gkg.csv.zip (12.31 MB)
TOTAL: 1.50 GB


  5%|▌         | 148/2853 [01:08<22:50,  1.97it/s]

Downloaded 20180502141500.gkg.csv.zip (13.78 MB)
TOTAL: 1.51 GB


  5%|▌         | 149/2853 [01:09<22:56,  1.96it/s]

Downloaded 20180502143000.gkg.csv.zip (13.19 MB)
TOTAL: 1.53 GB


  5%|▌         | 150/2853 [01:09<23:01,  1.96it/s]

Downloaded 20180502144500.gkg.csv.zip (13.59 MB)
TOTAL: 1.54 GB


  5%|▌         | 151/2853 [01:10<23:00,  1.96it/s]

Downloaded 20180502151500.gkg.csv.zip (15.37 MB)
TOTAL: 1.55 GB


  5%|▌         | 152/2853 [01:11<27:06,  1.66it/s]

Downloaded 20180502153000.gkg.csv.zip (15.32 MB)
TOTAL: 1.57 GB


  5%|▌         | 153/2853 [01:12<29:31,  1.52it/s]

Downloaded 20180502154500.gkg.csv.zip (15.37 MB)
TOTAL: 1.58 GB


  5%|▌         | 154/2853 [01:12<29:25,  1.53it/s]

Downloaded 20180502160000.gkg.csv.zip (15.39 MB)
TOTAL: 1.60 GB


  5%|▌         | 155/2853 [01:13<27:34,  1.63it/s]

Downloaded 20180502161500.gkg.csv.zip (14.68 MB)
TOTAL: 1.61 GB


  5%|▌         | 156/2853 [01:13<27:12,  1.65it/s]

Downloaded 20180502163000.gkg.csv.zip (15.17 MB)
TOTAL: 1.63 GB


  6%|▌         | 157/2853 [01:14<25:47,  1.74it/s]

Downloaded 20180502164500.gkg.csv.zip (14.59 MB)
TOTAL: 1.64 GB


  6%|▌         | 158/2853 [01:14<25:19,  1.77it/s]

Downloaded 20180502170000.gkg.csv.zip (15.11 MB)
TOTAL: 1.66 GB


  6%|▌         | 159/2853 [01:15<25:11,  1.78it/s]

Downloaded 20180502171500.gkg.csv.zip (16.47 MB)
TOTAL: 1.67 GB


  6%|▌         | 160/2853 [01:16<25:47,  1.74it/s]

Downloaded 20180502174500.gkg.csv.zip (16.15 MB)
TOTAL: 1.69 GB


  6%|▌         | 161/2853 [01:16<24:38,  1.82it/s]

Downloaded 20180502180000.gkg.csv.zip (13.43 MB)
TOTAL: 1.70 GB


  6%|▌         | 162/2853 [01:17<25:23,  1.77it/s]

Downloaded 20180502173000.gkg.csv.zip (15.73 MB)
TOTAL: 1.72 GB


  6%|▌         | 163/2853 [01:17<24:35,  1.82it/s]

Downloaded 20180502181500.gkg.csv.zip (14.16 MB)
TOTAL: 1.73 GB


  6%|▌         | 164/2853 [01:18<24:52,  1.80it/s]

Downloaded 20180502184500.gkg.csv.zip (16.04 MB)
TOTAL: 1.75 GB


  6%|▌         | 165/2853 [01:18<25:31,  1.75it/s]

Downloaded 20180502191500.gkg.csv.zip (14.44 MB)
TOTAL: 1.76 GB


  6%|▌         | 166/2853 [01:19<26:12,  1.71it/s]

Downloaded 20180502194500.gkg.csv.zip (16.23 MB)
TOTAL: 1.78 GB


  6%|▌         | 167/2853 [01:20<25:59,  1.72it/s]

Downloaded 20180502200000.gkg.csv.zip (16.18 MB)
TOTAL: 1.79 GB


  6%|▌         | 168/2853 [01:20<25:44,  1.74it/s]

Downloaded 20180502201500.gkg.csv.zip (14.07 MB)
TOTAL: 1.81 GB


  6%|▌         | 169/2853 [01:21<25:52,  1.73it/s]

Downloaded 20180502203000.gkg.csv.zip (14.90 MB)
TOTAL: 1.82 GB


  6%|▌         | 170/2853 [01:21<26:19,  1.70it/s]

Downloaded 20180502204500.gkg.csv.zip (14.48 MB)
TOTAL: 1.83 GB


  6%|▌         | 171/2853 [01:22<25:49,  1.73it/s]

Downloaded 20180502210000.gkg.csv.zip (14.91 MB)
TOTAL: 1.85 GB


  6%|▌         | 172/2853 [01:22<22:36,  1.98it/s]

Downloaded 20180502211500.gkg.csv.zip (6.30 MB)
TOTAL: 1.86 GB


  6%|▌         | 173/2853 [01:23<25:12,  1.77it/s]

Downloaded 20180502213000.gkg.csv.zip (18.78 MB)
TOTAL: 1.87 GB


  6%|▌         | 174/2853 [01:23<25:43,  1.74it/s]

Downloaded 20180502214500.gkg.csv.zip (14.58 MB)
TOTAL: 1.89 GB


  6%|▌         | 175/2853 [01:24<24:29,  1.82it/s]

Downloaded 20180502220000.gkg.csv.zip (12.64 MB)
TOTAL: 1.90 GB


  6%|▌         | 176/2853 [01:24<23:58,  1.86it/s]

Downloaded 20180502221500.gkg.csv.zip (13.14 MB)
TOTAL: 1.91 GB


  6%|▌         | 177/2853 [01:25<25:15,  1.77it/s]

Downloaded 20180502223000.gkg.csv.zip (13.30 MB)
TOTAL: 1.93 GB


  6%|▌         | 178/2853 [01:26<25:31,  1.75it/s]

Downloaded 20180502224500.gkg.csv.zip (12.39 MB)
TOTAL: 1.94 GB


  6%|▋         | 179/2853 [01:26<24:54,  1.79it/s]

Downloaded 20180502230000.gkg.csv.zip (12.92 MB)
TOTAL: 1.95 GB


  6%|▋         | 180/2853 [01:27<23:45,  1.87it/s]

Downloaded 20180502231500.gkg.csv.zip (11.68 MB)
TOTAL: 1.96 GB


  6%|▋         | 181/2853 [01:27<22:39,  1.97it/s]

Downloaded 20180502233000.gkg.csv.zip (11.57 MB)
TOTAL: 1.97 GB


  6%|▋         | 182/2853 [01:28<22:41,  1.96it/s]

Downloaded 20180502234500.gkg.csv.zip (12.21 MB)
TOTAL: 1.99 GB


  6%|▋         | 183/2853 [01:28<21:32,  2.07it/s]

Downloaded 20180503000000.gkg.csv.zip (11.33 MB)
TOTAL: 2.00 GB


  6%|▋         | 184/2853 [01:29<21:30,  2.07it/s]

Downloaded 20180503001500.gkg.csv.zip (11.16 MB)
TOTAL: 2.01 GB


  6%|▋         | 185/2853 [01:29<22:15,  2.00it/s]

Downloaded 20180503003000.gkg.csv.zip (12.29 MB)
TOTAL: 2.02 GB


  7%|▋         | 186/2853 [01:30<21:28,  2.07it/s]

Downloaded 20180503004500.gkg.csv.zip (10.54 MB)
TOTAL: 2.03 GB


  7%|▋         | 187/2853 [01:30<21:13,  2.09it/s]

Downloaded 20180503010000.gkg.csv.zip (10.07 MB)
TOTAL: 2.04 GB


  7%|▋         | 188/2853 [01:31<21:41,  2.05it/s]

Downloaded 20180503011500.gkg.csv.zip (10.44 MB)
TOTAL: 2.05 GB


  7%|▋         | 189/2853 [01:31<21:31,  2.06it/s]

Downloaded 20180503013000.gkg.csv.zip (11.45 MB)
TOTAL: 2.06 GB


  7%|▋         | 190/2853 [01:31<21:40,  2.05it/s]

Downloaded 20180503014500.gkg.csv.zip (10.22 MB)
TOTAL: 2.07 GB


  7%|▋         | 191/2853 [01:32<20:17,  2.19it/s]

Downloaded 20180503020000.gkg.csv.zip (9.47 MB)
TOTAL: 2.08 GB


  7%|▋         | 192/2853 [01:32<20:05,  2.21it/s]

Downloaded 20180503021500.gkg.csv.zip (9.78 MB)
TOTAL: 2.09 GB


  7%|▋         | 193/2853 [01:33<19:18,  2.30it/s]

Downloaded 20180503023000.gkg.csv.zip (10.16 MB)
TOTAL: 2.10 GB


  7%|▋         | 194/2853 [01:33<19:00,  2.33it/s]

Downloaded 20180503024500.gkg.csv.zip (9.90 MB)
TOTAL: 2.11 GB


  7%|▋         | 195/2853 [01:34<18:59,  2.33it/s]

Downloaded 20180503030000.gkg.csv.zip (8.47 MB)
TOTAL: 2.12 GB


  7%|▋         | 196/2853 [01:34<20:46,  2.13it/s]

Downloaded 20180503031500.gkg.csv.zip (8.58 MB)
TOTAL: 2.13 GB


  7%|▋         | 197/2853 [01:35<20:47,  2.13it/s]

Downloaded 20180503033000.gkg.csv.zip (9.55 MB)
TOTAL: 2.14 GB


  7%|▋         | 198/2853 [01:35<19:41,  2.25it/s]

Downloaded 20180503034500.gkg.csv.zip (7.19 MB)
TOTAL: 2.14 GB


  7%|▋         | 199/2853 [01:35<18:58,  2.33it/s]

Downloaded 20180503040000.gkg.csv.zip (7.33 MB)
TOTAL: 2.15 GB


  7%|▋         | 200/2853 [01:36<18:43,  2.36it/s]

Downloaded 20180503041500.gkg.csv.zip (9.00 MB)
TOTAL: 2.16 GB


  7%|▋         | 201/2853 [01:36<19:01,  2.32it/s]

Downloaded 20180503043000.gkg.csv.zip (9.80 MB)
TOTAL: 2.17 GB


  7%|▋         | 202/2853 [01:37<18:08,  2.43it/s]

Downloaded 20180503044500.gkg.csv.zip (8.44 MB)
TOTAL: 2.18 GB


  7%|▋         | 203/2853 [01:37<18:28,  2.39it/s]

Downloaded 20180503050000.gkg.csv.zip (10.35 MB)
TOTAL: 2.19 GB


  7%|▋         | 204/2853 [01:37<17:57,  2.46it/s]

Downloaded 20180503051500.gkg.csv.zip (8.62 MB)
TOTAL: 2.19 GB


  7%|▋         | 205/2853 [01:38<18:54,  2.33it/s]

Downloaded 20180503053000.gkg.csv.zip (9.32 MB)
TOTAL: 2.20 GB


  7%|▋         | 206/2853 [01:38<18:37,  2.37it/s]

Downloaded 20180503054500.gkg.csv.zip (10.04 MB)
TOTAL: 2.21 GB


  7%|▋         | 207/2853 [01:39<18:30,  2.38it/s]

Downloaded 20180503060000.gkg.csv.zip (8.87 MB)
TOTAL: 2.22 GB


  7%|▋         | 208/2853 [01:39<19:01,  2.32it/s]

Downloaded 20180503061500.gkg.csv.zip (8.40 MB)
TOTAL: 2.23 GB


  7%|▋         | 209/2853 [01:40<18:43,  2.35it/s]

Downloaded 20180503063000.gkg.csv.zip (9.06 MB)
TOTAL: 2.24 GB


  7%|▋         | 210/2853 [01:40<17:52,  2.47it/s]

Downloaded 20180503064500.gkg.csv.zip (8.75 MB)
TOTAL: 2.25 GB


  7%|▋         | 211/2853 [01:40<16:54,  2.60it/s]

Downloaded 20180503070000.gkg.csv.zip (8.47 MB)
TOTAL: 2.26 GB


  7%|▋         | 212/2853 [01:41<16:42,  2.64it/s]

Downloaded 20180503071500.gkg.csv.zip (8.94 MB)
TOTAL: 2.27 GB


  7%|▋         | 213/2853 [01:41<16:47,  2.62it/s]

Downloaded 20180503073000.gkg.csv.zip (9.33 MB)
TOTAL: 2.27 GB


  8%|▊         | 214/2853 [01:42<18:34,  2.37it/s]

Downloaded 20180503074500.gkg.csv.zip (8.46 MB)
TOTAL: 2.28 GB


  8%|▊         | 215/2853 [01:42<18:38,  2.36it/s]

Downloaded 20180503080000.gkg.csv.zip (10.52 MB)
TOTAL: 2.29 GB


  8%|▊         | 216/2853 [01:42<17:48,  2.47it/s]

Downloaded 20180503081500.gkg.csv.zip (8.89 MB)
TOTAL: 2.30 GB


  8%|▊         | 217/2853 [01:43<18:34,  2.37it/s]

Downloaded 20180503083000.gkg.csv.zip (8.81 MB)
TOTAL: 2.31 GB


  8%|▊         | 218/2853 [01:43<19:18,  2.27it/s]

Downloaded 20180503090000.gkg.csv.zip (8.60 MB)
TOTAL: 2.32 GB


  8%|▊         | 219/2853 [01:44<20:07,  2.18it/s]

Downloaded 20180503091500.gkg.csv.zip (8.69 MB)
TOTAL: 2.33 GB


  8%|▊         | 220/2853 [01:44<19:33,  2.24it/s]

Downloaded 20180503093000.gkg.csv.zip (9.75 MB)
TOTAL: 2.34 GB


  8%|▊         | 221/2853 [01:45<19:53,  2.21it/s]

Downloaded 20180503100000.gkg.csv.zip (9.17 MB)
TOTAL: 2.35 GB


  8%|▊         | 222/2853 [01:45<19:18,  2.27it/s]

Downloaded 20180503101500.gkg.csv.zip (9.12 MB)
TOTAL: 2.35 GB


  8%|▊         | 223/2853 [01:46<19:34,  2.24it/s]

Downloaded 20180503104500.gkg.csv.zip (9.41 MB)
TOTAL: 2.36 GB


  8%|▊         | 224/2853 [01:46<19:36,  2.23it/s]

Downloaded 20180503110000.gkg.csv.zip (10.70 MB)
TOTAL: 2.37 GB


  8%|▊         | 225/2853 [01:47<23:20,  1.88it/s]

Downloaded 20180503111500.gkg.csv.zip (11.24 MB)
TOTAL: 2.38 GB


  8%|▊         | 226/2853 [01:47<24:58,  1.75it/s]

Downloaded 20180503113000.gkg.csv.zip (10.45 MB)
TOTAL: 2.40 GB


  8%|▊         | 227/2853 [01:48<23:07,  1.89it/s]

Downloaded 20180503114500.gkg.csv.zip (10.98 MB)
TOTAL: 2.41 GB


  8%|▊         | 228/2853 [01:48<22:36,  1.94it/s]

Downloaded 20180503120000.gkg.csv.zip (12.78 MB)
TOTAL: 2.42 GB


  8%|▊         | 229/2853 [01:49<22:00,  1.99it/s]

Downloaded 20180503121500.gkg.csv.zip (10.87 MB)
TOTAL: 2.43 GB


  8%|▊         | 230/2853 [01:49<22:16,  1.96it/s]

Downloaded 20180503123000.gkg.csv.zip (12.19 MB)
TOTAL: 2.44 GB


  8%|▊         | 231/2853 [01:50<22:29,  1.94it/s]

Downloaded 20180503124500.gkg.csv.zip (11.76 MB)
TOTAL: 2.45 GB


  8%|▊         | 232/2853 [01:50<21:13,  2.06it/s]

Downloaded 20180503130000.gkg.csv.zip (10.83 MB)
TOTAL: 2.46 GB


  8%|▊         | 233/2853 [01:51<23:41,  1.84it/s]

Downloaded 20180503131500.gkg.csv.zip (12.85 MB)
TOTAL: 2.48 GB


  8%|▊         | 234/2853 [01:52<28:20,  1.54it/s]

Downloaded 20180503133000.gkg.csv.zip (12.56 MB)
TOTAL: 2.49 GB


  8%|▊         | 235/2853 [01:52<26:25,  1.65it/s]

Downloaded 20180503134500.gkg.csv.zip (12.33 MB)
TOTAL: 2.50 GB


  8%|▊         | 236/2853 [01:53<26:05,  1.67it/s]

Downloaded 20180503140000.gkg.csv.zip (14.75 MB)
TOTAL: 2.51 GB


  8%|▊         | 237/2853 [01:54<26:31,  1.64it/s]

Downloaded 20180503141500.gkg.csv.zip (12.77 MB)
TOTAL: 2.53 GB


  8%|▊         | 238/2853 [01:54<26:06,  1.67it/s]

Downloaded 20180503143000.gkg.csv.zip (13.78 MB)
TOTAL: 2.54 GB


  8%|▊         | 239/2853 [01:55<24:52,  1.75it/s]

Downloaded 20180503144500.gkg.csv.zip (13.49 MB)
TOTAL: 2.55 GB


  8%|▊         | 240/2853 [01:55<24:08,  1.80it/s]

Downloaded 20180503150000.gkg.csv.zip (14.39 MB)
TOTAL: 2.57 GB


  8%|▊         | 241/2853 [01:56<23:59,  1.81it/s]

Downloaded 20180503151500.gkg.csv.zip (14.59 MB)
TOTAL: 2.58 GB


  8%|▊         | 242/2853 [01:56<25:18,  1.72it/s]

Downloaded 20180503153000.gkg.csv.zip (15.86 MB)
TOTAL: 2.60 GB


  9%|▊         | 243/2853 [01:57<25:06,  1.73it/s]

Downloaded 20180503154500.gkg.csv.zip (13.64 MB)
TOTAL: 2.61 GB


  9%|▊         | 244/2853 [01:57<24:27,  1.78it/s]

Downloaded 20180503160000.gkg.csv.zip (14.54 MB)
TOTAL: 2.62 GB


  9%|▊         | 245/2853 [01:58<24:39,  1.76it/s]

Downloaded 20180503161500.gkg.csv.zip (15.08 MB)
TOTAL: 2.64 GB


  9%|▊         | 246/2853 [01:59<23:58,  1.81it/s]

Downloaded 20180503163000.gkg.csv.zip (14.95 MB)
TOTAL: 2.65 GB


  9%|▊         | 247/2853 [01:59<24:53,  1.75it/s]

Downloaded 20180503164500.gkg.csv.zip (15.05 MB)
TOTAL: 2.67 GB


  9%|▊         | 248/2853 [02:00<24:52,  1.74it/s]

Downloaded 20180503170000.gkg.csv.zip (15.58 MB)
TOTAL: 2.68 GB


  9%|▊         | 249/2853 [02:00<24:27,  1.77it/s]

Downloaded 20180503171500.gkg.csv.zip (14.51 MB)
TOTAL: 2.70 GB


  9%|▉         | 250/2853 [02:01<25:25,  1.71it/s]

Downloaded 20180503173000.gkg.csv.zip (12.96 MB)
TOTAL: 2.71 GB


  9%|▉         | 251/2853 [02:02<26:28,  1.64it/s]

Downloaded 20180503180000.gkg.csv.zip (14.29 MB)
TOTAL: 2.72 GB


  9%|▉         | 252/2853 [02:02<26:24,  1.64it/s]

Downloaded 20180503183000.gkg.csv.zip (16.11 MB)
TOTAL: 2.74 GB


  9%|▉         | 253/2853 [02:03<26:10,  1.66it/s]

Downloaded 20180503184500.gkg.csv.zip (13.98 MB)
TOTAL: 2.75 GB


  9%|▉         | 254/2853 [02:03<25:13,  1.72it/s]

Downloaded 20180503190000.gkg.csv.zip (14.50 MB)
TOTAL: 2.77 GB


  9%|▉         | 255/2853 [02:04<25:07,  1.72it/s]

Downloaded 20180503191500.gkg.csv.zip (15.74 MB)
TOTAL: 2.78 GB


  9%|▉         | 256/2853 [02:04<25:21,  1.71it/s]

Downloaded 20180503193000.gkg.csv.zip (14.12 MB)
TOTAL: 2.80 GB


  9%|▉         | 257/2853 [02:05<24:06,  1.79it/s]

Downloaded 20180503194500.gkg.csv.zip (14.63 MB)
TOTAL: 2.81 GB


  9%|▉         | 258/2853 [02:06<28:59,  1.49it/s]

Downloaded 20180503200000.gkg.csv.zip (13.99 MB)
TOTAL: 2.83 GB


  9%|▉         | 259/2853 [02:07<29:09,  1.48it/s]

Downloaded 20180503201500.gkg.csv.zip (13.25 MB)
TOTAL: 2.84 GB


  9%|▉         | 260/2853 [02:07<28:20,  1.53it/s]

Downloaded 20180503203000.gkg.csv.zip (12.98 MB)
TOTAL: 2.85 GB


  9%|▉         | 261/2853 [02:08<27:07,  1.59it/s]

Downloaded 20180503204500.gkg.csv.zip (12.86 MB)
TOTAL: 2.86 GB


  9%|▉         | 262/2853 [02:08<26:09,  1.65it/s]

Downloaded 20180503210000.gkg.csv.zip (13.34 MB)
TOTAL: 2.88 GB


  9%|▉         | 263/2853 [02:09<24:41,  1.75it/s]

Downloaded 20180503211500.gkg.csv.zip (12.58 MB)
TOTAL: 2.89 GB


  9%|▉         | 264/2853 [02:09<24:17,  1.78it/s]

Downloaded 20180503213000.gkg.csv.zip (15.10 MB)
TOTAL: 2.90 GB


  9%|▉         | 265/2853 [02:10<25:39,  1.68it/s]

Downloaded 20180503214500.gkg.csv.zip (12.46 MB)
TOTAL: 2.92 GB


  9%|▉         | 266/2853 [02:11<25:04,  1.72it/s]

Downloaded 20180503220000.gkg.csv.zip (13.82 MB)
TOTAL: 2.93 GB


  9%|▉         | 267/2853 [02:11<23:34,  1.83it/s]

Downloaded 20180503221500.gkg.csv.zip (12.21 MB)
TOTAL: 2.94 GB


  9%|▉         | 268/2853 [02:11<22:22,  1.93it/s]

Downloaded 20180503223000.gkg.csv.zip (12.63 MB)
TOTAL: 2.95 GB


  9%|▉         | 269/2853 [02:12<22:37,  1.90it/s]

Downloaded 20180503224500.gkg.csv.zip (12.67 MB)
TOTAL: 2.97 GB


  9%|▉         | 270/2853 [02:12<21:27,  2.01it/s]

Downloaded 20180503230000.gkg.csv.zip (11.20 MB)
TOTAL: 2.98 GB


  9%|▉         | 271/2853 [02:13<20:18,  2.12it/s]

Downloaded 20180503231500.gkg.csv.zip (11.11 MB)
TOTAL: 2.99 GB


 10%|▉         | 272/2853 [02:13<20:41,  2.08it/s]

Downloaded 20180503233000.gkg.csv.zip (11.06 MB)
TOTAL: 3.00 GB


 10%|▉         | 273/2853 [02:14<21:09,  2.03it/s]

Downloaded 20180503234500.gkg.csv.zip (11.68 MB)
TOTAL: 3.01 GB

Reached ~3GB. Stopping downloads.

FINAL TOTAL: 3.0097222616896033 GB





In [12]:


RAW_DIR = r"C:\Users\jonat\Documents\UMBC\FALL_2025\Final_Project_UMBC_BIG_DATA\DATA\raw"
STREAM_IN_DIR = r"C:\Users\jonat\Documents\UMBC\FALL_2025\Final_Project_UMBC_BIG_DATA\DATA\stream_in"

os.makedirs(STREAM_IN_DIR, exist_ok=True)

for fname in os.listdir(RAW_DIR):
    if not fname.endswith(".gkg.csv.zip"):
        continue

    zip_path = os.path.join(RAW_DIR, fname)
    out_name = fname.replace(".zip", "")  # e.g. 20180501000000.gkg.csv
    out_path = os.path.join(STREAM_IN_DIR, out_name)

    if os.path.exists(out_path):
        print(f"Already decompressed, skipping: {out_name}")
        continue

    # Peek at first two bytes to detect format
    with open(zip_path, "rb") as f:
        magic = f.read(2)

    print(f"Decompressing {fname} -> {out_name}")

    try:
        if magic == b"\x1f\x8b":  # gzip signature
            with gzip.open(zip_path, "rb") as f_in, open(out_path, "wb") as f_out:
                shutil.copyfileobj(f_in, f_out)

        elif magic == b"PK":  # zip file signature
            with zipfile.ZipFile(zip_path, "r") as zf:
                # GDELT archives normally contain a single CSV member
                inner_name = zf.namelist()[0]
                with zf.open(inner_name) as f_in, open(out_path, "wb") as f_out:
                    shutil.copyfileobj(f_in, f_out)

        else:
            print(f"Unknown compression format for {fname}, magic bytes={magic!r}")
            continue

    except Exception as e:
        print(f"Error decompressing {fname}: {e}")

print("Done decompressing.")


Decompressing 20180501000000.gkg.csv.zip -> 20180501000000.gkg.csv
Decompressing 20180501001500.gkg.csv.zip -> 20180501001500.gkg.csv
Decompressing 20180501003000.gkg.csv.zip -> 20180501003000.gkg.csv
Decompressing 20180501004500.gkg.csv.zip -> 20180501004500.gkg.csv
Decompressing 20180501010000.gkg.csv.zip -> 20180501010000.gkg.csv
Decompressing 20180501011500.gkg.csv.zip -> 20180501011500.gkg.csv
Decompressing 20180501013000.gkg.csv.zip -> 20180501013000.gkg.csv
Decompressing 20180501014500.gkg.csv.zip -> 20180501014500.gkg.csv
Decompressing 20180501020000.gkg.csv.zip -> 20180501020000.gkg.csv
Decompressing 20180501021500.gkg.csv.zip -> 20180501021500.gkg.csv
Decompressing 20180501023000.gkg.csv.zip -> 20180501023000.gkg.csv
Decompressing 20180501024500.gkg.csv.zip -> 20180501024500.gkg.csv
Decompressing 20180501030000.gkg.csv.zip -> 20180501030000.gkg.csv
Decompressing 20180501031500.gkg.csv.zip -> 20180501031500.gkg.csv
Decompressing 20180501033000.gkg.csv.zip -> 20180501033000.gkg