In [None]:
import multiprocess
import urllib
import pandas as pd
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split

In [None]:
# Read, parse, and organize the data
df = pd.DataFrame.from_csv('WikiArt-Emotions-All.tsv', sep='\t')
obsolete_columns = [column for column in df.columns
                    if column != 'Image URL' and
                    'ImageOnly' not in column]
df = df.drop(columns=obsolete_columns)
df = df.rename(columns={name: name.split(' ')[1] for name in df.columns if name != 'Image URL'})

# Split the data
df_train, df_validate = train_test_split(df, train_size=0.8, test_size=0.2)

In [None]:
# Create a downloading function we can multithread
def download_image(iterrow, folder):
    index, row = iterrow
    url = row['Image URL'].replace('https', 'http')
    urllib.urlretrieve(url, folder + index + '.jpg')


# Download the training images
pool = multiprocess.Pool(4)
iterator = pool.imap(lambda row: download_image(row, 'train/'),
                     df_train.iterrows())
_ = list(tqdm_notebook(iterator, total=df_train.shape[0]))

# Download the validation images
iterator = pool.imap(lambda row: download_image(row, 'validate/'),
                     df_validate.iterrows())
_ = list(tqdm_notebook(iterator, total=df_validate.shape[0]))