This is fast image downloader using this trick:
https://www.kaggle.com/c/landmark-recognition-challenge/discussion/49703
And you can change target size that you prefer.

Reference:
https://www.kaggle.com/c/landmark-recognition-challenge/discussion/48895
```
For 256,256 this should be 22 GB
For 224,224 this should be 16.8 GB
For 139,139 this should be 6.5 GB
For 128,128 this should be 5.5 GB
For 96,96 this should be 3.1 GB
For 64,64 this should be 1.4 GB
```

In [1]:
import os
from io import BytesIO
from urllib import request
import urllib3
import pandas as pd
import re
import tqdm
from PIL import Image



In [2]:

# set files and dir
DATA_FRAME, OUT_DIR = pd.read_csv('train.csv'), 'train'  # recognition challenge
# DATA_FRAME, OUT_DIR = pd.read_csv('../input/index.csv'), '../input/index'  # retrieval challenge
# DATA_FRAME, OUT_DIR = pd.read_csv('../input/test.csv'), '../input/test'  # test data

# preferences
TARGET_SIZE = 128  # image resolution to be stored
IMG_QUALITY = 90  # JPG quality

In [3]:
def overwrite_urls(df):
    def reso_overwrite(url_tail, reso=TARGET_SIZE):
        pattern = 's[0-9]+'
        search_result = re.match(pattern, url_tail)
        if search_result is None:
            return url_tail
        else:
            return 's{}'.format(reso)

    def join_url(parsed_url, s_reso):
        parsed_url[-2] = s_reso
        return '/'.join(parsed_url)

    parsed_url = df.url.apply(lambda x: x.split('/'))
    train_url_tail = parsed_url.apply(lambda x: x[-2])
    resos = train_url_tail.apply(lambda x: reso_overwrite(x, reso=TARGET_SIZE))

    overwritten_df = pd.concat([parsed_url, resos], axis=1)
    overwritten_df.columns = ['url', 's_reso']
    df['url'] = overwritten_df.apply(lambda x: join_url(x['url'], x['s_reso']), axis=1)
    return df


We found that almost images have 1600x resolution.
Downloading such a high resolution images takes so much time, so I recommend you to download images after changing url "s1600" to "s{TARGET_SIZE}" like the below script.

In [4]:
def parse_data(df):
    #print(df.head)
    key_url_list = [line[:3] for line in df.values]
    return key_url_list


In [5]:
def download_image(key_url):
    (key, url,label) = key_url
    #print(key,url,label)
    filename = 'train/'+str(label) +'/'+key+'.jpg'
    #print(filename)
    if not os.path.isdir('train/'+str(label)):
        os.mkdir('train/'+str(label))
    if os.path.exists(filename):
        print('Image {} already exists. Skipping download.'.format(filename))
        return 0

    try:
        response = request.urlopen(url)
        image_data = response.read()
    except:
        print('Warning: Could not download image {} from {}'.format(key, url))
        return 1

    try:
        pil_image = Image.open(BytesIO(image_data))
    except:
        print('Warning: Failed to parse image {}'.format(key))
        return 1

    try:
        pil_image_rgb = pil_image.convert('RGB')
    except:
        print('Warning: Failed to convert image {} to RGB'.format(key))
        return 1

    try:
        pil_image_resize = pil_image_rgb.resize((TARGET_SIZE, TARGET_SIZE))
    except:
        print('Warning: Failed to resize image {}'.format(key))
        return 1

    try:
        #print('saving '+filename)
        pil_image_resize.save(filename, format='JPEG', quality=IMG_QUALITY)
    except:
        print('Warning: Failed to save image {}'.format(filename))
        return 1

    return 0

In [6]:
key_url_list = parse_data(overwrite_urls(DATA_FRAME))

In [None]:
count = 0
for url in key_url_list:
    count+=1
    download_image(url)
    if(count%100 ==0):
        print(count)

100
200
300
400
500
600
700
800


In [11]:
res=request.urlopen('http://static.panoramio.com/photos/original/70761397.jpg')