In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
import os
os.chdir('/content/gdrive/My Drive/')

In [0]:
import requests
import json
from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
import pickle
import os

In [0]:
requestSession = None

def initializeRequestSession():
    global requestSession
    requestSession = requests.Session()
    retries = Retry(total=5,
                    backoff_factor=0.1,
                    status_forcelist=[500, 502, 503, 504])
    requestSession.mount('http://', HTTPAdapter(max_retries=retries))
    requestSession.mount('https://', HTTPAdapter(max_retries=retries))

def getPostRequest(reqUrl,payLoad={}):
    return requestSession.post(reqUrl, data=payLoad)

def getGetRequest(reqUrl,payLoad={}):
    return requestSession.get(reqUrl, data=payLoad)

def getTheUrl(reqUrl,payLoad={}):
    try:
        req = getGetRequest(reqUrl,payLoad)
        return parseRequest(req)
    except Exception as e:
        print(str(e))
        raise e

def postTheUrl(reqUrl,payLoad={}):
    try:
        req = getPostRequest(reqUrl,payLoad)
        return parseRequest(req)
    except Exception as e:
        print(str(e))
        raise e

def parseRequest(req):
    try:
        data = json.loads(req.text)
        return data
    except Exception as e:
        raise e

def getImageContent(imageDownloadUrl):
    return requestSession.get(imageDownloadUrl).content

In [0]:
def createDirectory(dirName):
    if not os.path.exists(dirName):
        os.makedirs(dirName)

def fileExists(filePath):
    return os.path.isfile(filePath)

def saveImage(imageContent,destinationPath):
    f = open(destinationPath, 'wb')
    f.write(imageContent)
    f.close()

In [0]:
# URLS
dataSetUrl = "https://isic-archive.com:443/api/v1/dataset?limit=0&offset=0&sort=name&sortdir=1"
imageSetBaseUrl = "https://isic-archive.com:443/api/v1/image?limit=0&offset=0&sort=name&sortdir=1"
imageDownloadBaseUrl = "https://isic-archive.com:443/api/v1/image/"
imageDetailsDownloadBaseUrl = "https://isic-archive.com:443/api/v1/image/"

# Pickled Files
imageIdClassMapPkl = "imageIdClassMap.pkl"
datasetImageIdMapPkl = "dataSetImageIdMap.pkl"

In [0]:
#map containing the image id and the class
imageIdClassMap = {}
#map containing the dataset and the list of image ids in it
datasetImageIdMap = {}

def extractImageIdsFromUrl(imageSetUrl):
    imageIds = []
    imageListJson = getTheUrl(imageSetUrl)
    for imageJson in imageListJson:
        imageIds.append(imageJson["_id"])
    return imageIds

def extractDatasetList():
    dataSetIds = []
    dataSetListJson = getTheUrl(dataSetUrl)
    for dataSetJson in dataSetListJson:
        dataSetIds.append(dataSetJson["_id"])
    return dataSetIds

def extractImageIdsOfAllDatasets():
    print("Extracting DataSet List...")
    dataSetIds = extractDatasetList()
    print("DataSet Ids..."+str(dataSetIds))
    datasetImageMap = {}
    for dataSetId in dataSetIds:
        imageSetFullUrl = imageSetBaseUrl+"&datasetId="+dataSetId
        datasetImageMap[dataSetId] = extractImageIdsFromUrl(imageSetFullUrl)
    for dataSetId,imageIds in datasetImageMap.items():
        print("DataSet Id:Number of Images "+str(dataSetId)+":"+str(len(imageIds)))
    print("Pickling the dataset image map...")
    # Output Files
    dataSetImageIdMapFileHandle = open(datasetImageIdMapPkl, 'wb')
    dataSetImageIdMapFileHandle.truncate()
    pickle.dump(datasetImageMap, dataSetImageIdMapFileHandle)
    print("Pickling Done...")
    dataSetImageIdMapFileHandle.close()
    return datasetImageMap

def getImageClass(imageId):

    if imageId in imageIdClassMap:
        return imageIdClassMap.get(imageId)

    imageDetailsDownloadUrl = imageDetailsDownloadBaseUrl+imageId
    try:
        imageDetails = getTheUrl(imageDetailsDownloadUrl)
        imageClass = imageDetails["meta"]["clinical"]["benign_malignant"]
        if(imageClass is None):
            return "_Null_Class_"
        else:
            return imageClass
    except Exception as e:
        print("ERROR: while extracting the class for an image"+str(e))
        return "_Fetch_Error_"

def fetchAndPickleClassesForImage(imageIds):
    print("Fetching Classes For ImageIds...")
    classFetchesLogFile = open("classes_fetching_logs.txt", 'w')
    classFetchesLogFile.truncate()
    count = 0
    totalCount = len(imageIds)
    for imageId in imageIds:
        count = count + 1
        print(str(count)+"/"+str(totalCount))
        imageClass = getImageClass(imageId)
        imageIdClassMap[imageId] = imageClass
        classFetchesLogFile.write("Fetched Class:"+imageClass+" for ImageId:"+imageId)
        classFetchesLogFile.write("\n")
    classFetchesLogFile.close()
    print("Pickling Classes For ImageIds...")
    imageIdClassMapFileHandle = open(imageIdClassMapPkl, 'wb')
    imageIdClassMapFileHandle.truncate()
    pickle.dump(imageIdClassMap, imageIdClassMapFileHandle)
    print("Pickled Classes  For ImageIds...")

def fetchImagesMetadata():
    initializeDataSetImageIdMap()
    allImageIds = []
    for dataSetId,imageIds in datasetImageIdMap.items():
        allImageIds.extend(imageIds)
    fetchAndPickleClassesForImage(allImageIds)

def getImageDestinationPath(dataSetId,imageClass,imageId):
    return dataSetId+"/"+imageClass+"/" + imageId + ".jpg"

def imageAlreadyDownloaded(dataSetId,imageId):
    imageClass = getImageClass(imageId)
    destinationPath = getImageDestinationPath(dataSetId,imageClass, imageId)
    if (fileExists(destinationPath)):
        print("Image already present, so not downloading it again:" + imageId)
        return True
    return False

def downloadImage(dataSetId,imageId):

    if(imageAlreadyDownloaded(dataSetId,imageId)):
        return True

    print("Downloading Image:"+imageId)
    try:
        imageClass = getImageClass(imageId)
        createDirectory(dataSetId+"/"+imageClass)
        destinationPath = getImageDestinationPath(dataSetId,imageClass,imageId)
        imageDownloadUrl = imageDownloadBaseUrl + imageId + "/download"
        imageContent = getImageContent(imageDownloadUrl)
        if(imageContent is None):
            print("Download of Image:"+imageId+" failed...")
            return False
        else:
            saveImage(imageContent,destinationPath)
        print("Downloaded Image:"+imageId)
        return True
    except Exception as e:
        print("Download of Image:"+imageId+" failed..."+str(e))
        return False

def initializeDataSetImageIdMap():
    global datasetImageIdMap
    if(len(datasetImageIdMap)==0):
        dataSetImageIdMapFileHandle = open(datasetImageIdMapPkl, 'rb')
        datasetImageIdMap = pickle.load(dataSetImageIdMapFileHandle)

def initializeImageIdClassMap():
    global imageIdClassMap
    if(len(imageIdClassMap)==0):
        imageIdClassMapFileHandle = open(imageIdClassMapPkl,'rb')
        imageIdClassMap = pickle.load(imageIdClassMapFileHandle)

def downloadImages():

    initializeDataSetImageIdMap()
    initializeImageIdClassMap()

    for dataSetId,imageIds in datasetImageIdMap.items():
        #if(dataSetId!="5627eefe9fc3c132be08d84c"):
        #    continue
        print("Downloading Images for Dataset:"+dataSetId)
        totalImagesInDataSet = len(imageIds)
        imagesDownloadedSuccessFully = 0
        createDirectory(dataSetId)
        failedDownloads = open(dataSetId+"/"+dataSetId+"_failed.txt", 'w')
        failedDownloads.truncate()
        imageIndxProcessed = 0
        for imageId in imageIds:
            imageIndxProcessed = imageIndxProcessed + 1
            if(downloadImage(dataSetId,imageId)):
                imagesDownloadedSuccessFully = imagesDownloadedSuccessFully +1
            else:
                failedDownloads.write(imageId)
                failedDownloads.write("\n")
            print("Processing Image:"+str(imageIndxProcessed)+"/"+str(totalImagesInDataSet))
        failedDownloads.write(str(imagesDownloadedSuccessFully)+" images downloaded out of "+str(totalImagesInDataSet) +" for the dataset "+str(dataSetId))
        failedDownloads.close()
        print(str(imagesDownloadedSuccessFully)+" images downloaded out of "+str(totalImagesInDataSet) +" for the dataset "+str(dataSetId))
        print("Downloaded the images for the dataset:"+dataSetId)


In [8]:
initializeRequestSession()
downloadImages()

Downloading Images for Dataset:5aa2baff1165972a9e961672
Downloading Image:5436e3abbae478396759f0cf
Downloaded Image:5436e3abbae478396759f0cf
Processing Image:1/23906
Downloading Image:5436e3acbae478396759f0d1
Downloaded Image:5436e3acbae478396759f0d1
Processing Image:2/23906
Downloading Image:5436e3acbae478396759f0d3
Downloaded Image:5436e3acbae478396759f0d3
Processing Image:3/23906
Downloading Image:5436e3acbae478396759f0d5
Downloaded Image:5436e3acbae478396759f0d5
Processing Image:4/23906
Downloading Image:5436e3acbae478396759f0d7
Downloaded Image:5436e3acbae478396759f0d7
Processing Image:5/23906
Downloading Image:5436e3acbae478396759f0d9
Downloaded Image:5436e3acbae478396759f0d9
Processing Image:6/23906
Downloading Image:5436e3acbae478396759f0db
Downloaded Image:5436e3acbae478396759f0db
Processing Image:7/23906
Downloading Image:5436e3acbae478396759f0dd
Downloaded Image:5436e3acbae478396759f0dd
Processing Image:8/23906
Downloading Image:5436e3acbae478396759f0df
Downloaded Image:5436

KeyboardInterrupt: ignored