# ECS8052 Assessment Data Loader

This file contains the code necessary to download the data needed for the module assessment. You may use this code freely as part of your assessment.

In [None]:
import os
datadir = 'primekg'
if not os.path.exists(datadir):
    os.mkdir(datadir)

In [None]:
filelist = [
    ('kg.csv', 'https://dataverse.harvard.edu/api/access/datafile/6180620', 'aac8191d4fbc5bf09cdf8c3c78b4e75f'),
    ('disease_features.tab', 'https://dataverse.harvard.edu/api/access/datafile/6180618', 'f8d120497eb69848dc7d971ae30e3cd6'),
    ('drug_features.tab', 'https://dataverse.harvard.edu/api/access/datafile/6180619', 'e8c67d20e815b0d26d9d91be79adfff8'),

    ('nodes.tab', 'https://dataverse.harvard.edu/api/access/datafile/6180617', '4924de04fb3deefa1e0a8dada424538e'),  # MD5 differs on website
    ('edges.csv', 'https://dataverse.harvard.edu/api/access/datafile/6180616', '5d4d211a22e88544b78fde2735e797bc'),

    ('README.txt', 'https://dataverse.harvard.edu/api/access/datafile/6191270', '608e37d4808bb97643186a1b6dc8f307'),
]

In [None]:
import requests
import hashlib

for f in filelist:
    print(f'INITIATING: download of {f[0]} from {f[1]}')
    response = requests.get(f[1])
    file_Path = datadir + '/' + f[0]
    if response.status_code != 200:
        print('Failed to download file {f[0]} from {f[1]}')
    else:
        m = hashlib.md5()
        m.update(response.content)
        if m.hexdigest() == f[2]:
            print(f'SUCCESS: File {f[0]} downloaded from {f[1]} with correct checksum {f[2]}')
            with open(file_Path, 'wb') as file:
                file.write(response.content)
        else:
            print(f'ERROR: File {f[0]} downloaded from {f[1]} with incorrect checksum {m.hexdigest()} (should be {f[2]})')            
