In [1]:
from new_downloader import Downloader
from calendar import monthrange
from datetime import datetime
import json

In [2]:
ely_codes = ['01', '02', '03', '04', '08', '09', '10', '12', '14']
with open('../elyjenmappaus/ely_lamid_mapping.json', 'r') as mappings:
    lam_id_mapping = json.loads(mappings.read())

In [3]:
def create_file_name(lam_id, year, day_number):
    year_short = year % 100
    return f"lamraw_{lam_id}_{year_short}_{day_number}.csv"

def first_and_last_day_number(month, year):
    first_day = datetime(year, month, 1)
    last_day = datetime(year, month, monthrange(year, month)[1])
    
    first = first_day.timetuple().tm_yday
    last = last_day.timetuple().tm_yday
    
    return (first, last)

def create_download_parameters_for_months(lam_id, year, months, ely_code, path):
    parameters = []
    for month in months:
        first_day, last_day = first_and_last_day_number(month, year)

        for day_number in range(first_day, last_day + 1):
            parameters.append((year, ely_code, create_file_name(lam_id, year, day_number), path))    
    
    
    return parameters

In [4]:
def find_ely(lam_id):
    for key in lam_id_mapping.keys():
        if int(lam_id) in lam_id_mapping[key]:
            return key
    return -1

def append_to_key_value(dictionary, key, value):
    if key in dictionary:
        dictionary[key].append(value)
    else:
        dictionary[key] = [value]
        
def add_missing_dict(dictionary, key):
    if key not in dictionary:
        dictionary[key] = {}

def create_errored_json(errored):
    errored_dict = {}
    for msg in errored:
        filename = msg.split(" ")[-1]
        lam_id = filename.split("_")[1]
        day = filename.split("_")[-1].split(".")[0]
        ely = find_ely(lam_id)
        add_missing_dict(errored_dict, ely)
        append_to_key_value(errored_dict[ely], lam_id, day)
        
    return errored_dict

def save_json(json_data, filename):
    with open(filename, 'w') as fp:
        json.dump(json_data, fp, sort_keys=True)


In [5]:
parameters = []
selected_months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
selected_years = [2016, 2017, 2020, 2021]
external_drive_path = "YOUR_PATH\\data\\raw_data"

# create params for each ELY code and it's LAM
for ely in ely_codes:
    lams = lam_id_mapping[ely.strip("0")]
    for selected_year in selected_years:
      for lam in lams:
          parameters = parameters + create_download_parameters_for_months(lam, selected_year, selected_months, ely, external_drive_path)

In [6]:
downloader = Downloader(external_drive_path, f"{external_drive_path}\\error_log\\error_log", f"{external_drive_path}\\progress_files\\progress_log", f"{external_drive_path}\\existed_files\\existed_log", False)

In [7]:
downloader.download(parameters, "full_4yr_download")

Creating FileWritterThread
Starting FileWritterThread
Waiting for items to write
Starting downloaders
Queued rows (0, 366, 0)
Waiting for items to write
Queued rows (0, 0, 0)
Waiting for items to write
Queued rows (0, 0, 0)
Waiting for items to write
Queued rows (0, 103, 408)
Waiting for items to write
Queued rows (0, 0, 367)
Done writing 1001 rows
Saved files and reopened them
Waiting for items to write
Queued rows (0, 69, 356)
Waiting for items to write
Queued rows (0, 73, 340)
Done writing 2001 rows
Saved files and reopened them
Waiting for items to write
Queued rows (0, 0, 268)
Waiting for items to write
Queued rows (0, 0, 377)
Waiting for items to write
Queued rows (0, 0, 373)
Done writing 3001 rows
Saved files and reopened them
Waiting for items to write
Queued rows (0, 0, 353)
Waiting for items to write
Queued rows (0, 0, 354)
Waiting for items to write
Queued rows (0, 0, 360)
Done writing 4001 rows
Saved files and reopened them
Waiting for items to write
Queued rows (0, 0, 357)