In [None]:
import pandas as pd
import os
import requests
from PIL import Image
import io
import random

pd.set_option('max.colwidth', 100)

In [None]:
df = pd.read_csv('../../../data/zalando/data.csv', encoding='utf-8', sep=';')
df.head()

In [None]:
model_urls = df[['category', 'img_path', 'model_img_urls']]
model_urls['model_img_urls'] = model_urls['model_img_urls'].apply(lambda x: str(x).split(', '))

In [None]:
model_urls.head()

In [None]:
model_urls = model_urls.groupby('category').sum()

In [None]:
data_folder = '../../../data/zalando_models'
img_width = 400

if not os.path.exists(data_folder):
    os.makedirs(data_folder)

In [None]:
def download_image(img_data, img_filepath, img_width):
    try:
        img = Image.open(io.BytesIO(img_data.content))
        img_ratio = img.size[0] / img.size[1]
        new_size = [img_width, int(img_width/img_ratio)]
        img = img.resize(new_size, Image.ANTIALIAS)
        img.save(img_filepath)
    except:
        print('Problem with downloading image: {}'.format(img_filepath))

In [None]:
def get_response(url):
    
    response = None
    
    try:
        response = requests.get(url, timeout=10)
        return response
    except:
        print("Problem downloading response content for: {} Response Code: {}".format(url, response.status_code))

In [None]:
categories = ['pullover-und-strickjacken']

In [None]:
for category in categories:
    print('-' * 50)
    print('Downloading category {}'.format(category))

    cat_folder = os.path.join(data_folder, category)
    if not os.path.exists(cat_folder):
        os.makedirs(cat_folder)

    img_urls = model_urls.loc[category].model_img_urls
    print('Found {} images'.format(len(img_urls)))
    
    for idx, img_url in enumerate(img_urls):
        if idx % 1000 == 0:
            print('Downloaded images: ', idx)
        
        img_path = img_url.split('/')[-1]
        img_filepath = os.path.join(cat_folder, img_path)

        if not os.path.exists(img_filepath):
            try:
                img_data = get_response(img_url)
                if img_data.status_code == requests.codes.ok:
                    download_image(img_data, img_filepath, img_width)
            except:
                print('Problem downloading image: {}'.format(img_filepath))
                
        else:
            print('image already exists {}'.format(img_filepath))
          